diff options
author | Vivien Kraus <vivien@planete-kraus.eu> | 2020-11-28 09:51:15 +0100 |
---|---|---|
committer | Vivien Kraus <vivien@planete-kraus.eu> | 2021-05-11 00:30:12 +0200 |
commit | c4479afbd1159b921e1b9c3155e992e32806b712 (patch) | |
tree | d13da0e91c6271edee5087032752c14ef288c13b | |
parent | 814ab9feab59c499d3221971b0524972b0d161a4 (diff) |
Use a web cache on the file system
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | doc/manual.html | 48 | ||||
-rw-r--r-- | guix/vkraus/packages/webid-oidc.scm | 4 | ||||
-rw-r--r-- | src/scm/webid-oidc/Makefile.am | 6 | ||||
-rw-r--r-- | src/scm/webid-oidc/cache.scm | 278 | ||||
-rw-r--r-- | src/scm/webid-oidc/stubs.scm | 20 | ||||
-rw-r--r-- | tests/Makefile.am | 4 | ||||
-rw-r--r-- | tests/cache-revalidate.scm | 42 | ||||
-rw-r--r-- | tests/cache-valid.scm | 58 |
9 files changed, 459 insertions, 4 deletions
@@ -23,6 +23,9 @@ The function =sign= creates a signature with a known JWA, and =verify= verifies the signature. ** Encode and decode a JWS The decoding function lets you fetch a key for validation. +** Web cache +Since DPoP and OIDC fetch a lot of things from the internet, it is in +our interest to add a web cache. # Local Variables: # mode: org # End: diff --git a/doc/manual.html b/doc/manual.html index 49350d3..9a14e39 100644 --- a/doc/manual.html +++ b/doc/manual.html @@ -150,6 +150,54 @@ Encode the JWT and sign it with <info:var>key</info:var>. </p> </info:deffn> + <h1>Caching on server side</h1> + <p> + Both the identity provider and the resource server need to cache + things. The identity provider will cache application webids, and + the resource server will cache the identity provider keys, for + instance. + </p> + <p> + The solution is to use a file-system cache. Every response + (except those that have a cache-control policy of no-store) are + stored to a sub-directory of <emph>XDG_CACHE_HOME</emph>. Each + store has a 5% chance of triggering a cleanup of the cache. When + a cleanup occurs, each cached response has a 5% chance of being + dropped, including responses that are indicated as valid. This + way, a malicious cache response that has a maliciously long + validity will not stay too long in the cache. A log line will + indicate which items are dropped. + </p> + <p> + The <emph>(webid-oidc cache)</emph> module exports two + functions to deal with the cache. + </p> + <info:deffn type="function" name="clean-cache" arguments="[#percents] [#dir]"> + <p> + Drop <info:var>percents</info:var>% of the cache right now, in + <info:var>dir</info:var> (defaults to some place within + <emph>XDG_CACHE_HOME</emph>). + </p> + </info:deffn> + <info:deffn type="function" name="with-cache" arguments="[#current-time] [#http-get] [#dir]"> + <p> + Return a function acting as <emph>http-get</emph> from + <emph>(web client)</emph> (takes an URI as the first + parameter, and an optional <info:var>#:headers</info:var> set, + and returns 2 values, the response and its body). + </p> + <p> + The cache will be read and written in <info:var>dir</info:var> + (defaults to some place within <emph>XDG_CACHE_HOME</emph>), + and the <info:var>current-time</info:var> number of seconds, + SRFI-19 time or date, or time-returning thunk will be used to + check for the validity of responses. + </p> + <p> + The back-end function, <info:var>http-get</info:var>, defaults + to that of <emph>(web client)</emph>. + </p> + </info:deffn> <h1>What if something goes wrong?</h1> <p> The library will raise an exception whenever something fishy diff --git a/guix/vkraus/packages/webid-oidc.scm b/guix/vkraus/packages/webid-oidc.scm index 0d971e8..8ed0d65 100644 --- a/guix/vkraus/packages/webid-oidc.scm +++ b/guix/vkraus/packages/webid-oidc.scm @@ -16,7 +16,9 @@ #:use-module (gnu packages gettext) #:use-module (gnu packages man) #:use-module (gnu packages tls) - #:use-module (gnu packages xml)) + #:use-module (gnu packages xml) + #:use-module (gnu packages emacs) + #:use-module (gnu packages emacs-xyz)) (define-public webid-oidc-snapshot (package diff --git a/src/scm/webid-oidc/Makefile.am b/src/scm/webid-oidc/Makefile.am index aca5f0c..91dff23 100644 --- a/src/scm/webid-oidc/Makefile.am +++ b/src/scm/webid-oidc/Makefile.am @@ -3,10 +3,12 @@ dist_webidoidcmod_DATA += \ %reldir%/stubs.scm \ %reldir%/testing.scm \ %reldir%/jwk.scm \ - %reldir%/jws.scm + %reldir%/jws.scm \ + %reldir%/cache.scm webidoidcgo_DATA += \ %reldir%/errors.go \ %reldir%/stubs.go \ %reldir%/testing.go \ %reldir%/jwk.go \ - %reldir%/jws.go + %reldir%/jws.go \ + %reldir%/cache.go diff --git a/src/scm/webid-oidc/cache.scm b/src/scm/webid-oidc/cache.scm new file mode 100644 index 0000000..8e28c17 --- /dev/null +++ b/src/scm/webid-oidc/cache.scm @@ -0,0 +1,278 @@ +(define-module (webid-oidc cache) + #:use-module ((webid-oidc stubs) #:prefix stubs:) + #:use-module (web client) + #:use-module (web request) + #:use-module (web response) + #:use-module (web uri) + #:use-module (ice-9 ftw) + #:use-module (ice-9 receive) + #:use-module (ice-9 optargs) + #:use-module (srfi srfi-19) + #:use-module (rnrs bytevectors)) + +;; The cache follows the recommendations of +;; https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching + +;; In a directory, we dump files, indexed by the hash of the URI, and +;; containing the request, response and response body. + +;; We keep only one version of the resource, even with varying +;; headers. If the cache validation fails due to varying headers, we +;; simply drop the old variant and replace it with the new. For things +;; like user agents or accepted language or encoding, you’re not +;; likely to alternate quickly between two values anyway. + +;; We’re not very concerned by the cache growing. We provide a cleanup +;; function that will kill a few percents of the cached entries at +;; random, and that you should call with a cron job regularly. It will +;; sometimes drop valid cached entries, but life goes on. By default, +;; each cache update has a 5% chance to trigger the elimination of 5% +;; of the cache. + +;; To update the cache, a file named $HASH~ is created, flocked, +;; populated, renamed to $HASH, and unflocked. $HASH~ might be +;; destroyed by the cache cleaner, so the renaming might fail. It’s +;; better that way, because if an app crashes before the rename +;; operation, then we’ll have dangling ~ files. + +(define (default-cache-dir) + (let ((xdg-cache-home + (or (getenv "XDG_CACHE_HOME") + (format #f "~a/.cache")))) + (format #f "~a/webid-oidc" xdg-cache-home))) + +(define (event? percents) + (<= (* (random:uniform) 100) + percents)) + +(define*-public (clean-cache + #:key + (percents 5) + (dir default-cache-dir)) + (define (survives?) + (not (event? percents))) + (define (enter? name stat result) + #t) + (define (leaf name stat result) + (unless (survives?) + (unless + (false-if-exception + (begin + (format (current-error-port) "Dropping cache item ~a.~%" name) + (delete-file name))) + (format (current-error-port) "Could not clean file ~a.~%" name))) + result) + (define (down name stat result) result) + (define (up name stat result) result) + (define (skip name stat result) result) + (define (error name stat errno result) + (format (current-error-port) "While cleaning the cache: ~a: ~a~%" + name (strerror errno)) + result) + (when (thunk? dir) + (set! dir (dir))) + (file-system-fold enter? leaf down up skip error 0 + (string-append dir "/web-cache"))) + +(define (maybe-clean-cache + pc-happen + pc-cleaned + dir) + (when (event? pc-happen) + (clean-cache #:percents pc-cleaned #:dir dir))) + +(define (file-name uri dir) + (when (thunk? dir) + (set! dir (dir))) + (when (string? uri) + (set! uri (string->uri uri))) + (string-append dir + "/web-cache/" + (stubs:hash 'SHA-256 (uri->string uri)))) + +(define (remove-uncacheable-headers response) + (let ((headers (response-headers response))) + (let ((filtered + (filter + (lambda (h) + (case (car h) + ((connection keep-alive transfer-encoding) + #f) + (else #t))) + headers))) + (build-response + #:code (response-code response) + #:reason-phrase (response-reason-phrase response) + #:headers filtered + #:port #f)))) + +(define*-public (add request response response-body + #:key (dir default-cache-dir)) + ;; Don’t store it if there’s a cache-control no-store + (unless + (let ((cc (response-cache-control response '()))) + (assq-ref cc 'no-store)) + (set! response (remove-uncacheable-headers response)) + (let ((final-file-name (file-name (request-uri request) dir))) + (maybe-clean-cache 5 5 dir) + (stubs:call-with-output-file* + (string-append final-file-name "~") + (lambda (port) + (flock port LOCK_EX) + (write-request request port) + (let ((file-response + (write-response response port))) + (when (string? response-body) + (set! response-body (string->utf8 response-body))) + (when response-body + (write-response-body file-response response-body)) + (force-output port) + (false-if-exception + (rename-file (string-append final-file-name "~") + final-file-name)) + (flock port LOCK_UN) + (close-port port))))))) + +(define (the-current-time) + (time-utc->date + (current-time))) + +(define*-public (read uri + #:key + (dir default-cache-dir)) + (let ((final-file-name (file-name uri dir))) + (catch 'system-error + (lambda () + (call-with-input-file final-file-name + (lambda (port) + (let ((request (read-request port))) + (let ((response (read-response port))) + (values request response (read-response-body response))))))) + (lambda error + (format (current-error-port) "Cache miss for ~a: ~s~%" + (uri->string uri) + error) + (values #f #f #f))))) + +(define (varies-header? request-a request-b header) + (let ((a (assq-ref (request-headers request-a) header)) + (b (assq-ref (request-headers request-b) header))) + (not (equal? a b)))) + +(define (varies-any-header? request-a request-b headers) + (and (not (null? headers)) + (or (varies-header? request-a request-b (car headers)) + (varies-any-header? request-a request-b (cdr headers))))) + +(define-public (varies? request-a request-b response) + (let ((vary (response-vary response))) + (or (eq? vary '*) + (varies-any-header? request-a request-b vary)))) + +(define*-public (valid? response + #:key + (current-time the-current-time)) + (when (thunk? current-time) + (set! current-time (current-time))) + (when (integer? current-time) + (set! current-time + (make-time time-utc 0 current-time))) + (when (time? current-time) + (set! current-time (time-utc->date current-time))) + (set! current-time + (date->time-utc current-time)) + (set! current-time + (time-second current-time)) + (let ((cc (response-cache-control response #f)) + (date (response-date response + (time-utc->date + (make-time time-utc 0 current-time)))) + (last-modified (response-last-modified response))) + (set! date (date->time-utc date)) + (set! date (time-second date)) + (when last-modified + (set! last-modified (date->time-utc last-modified)) + (set! last-modified (time-second last-modified))) + (if cc + ;; Use cache-control + (let ((cc-no-cache (assq-ref cc 'no-cache)) + (cc-no-store (assq-ref cc 'no-store)) + (cc-max-age + (or (assq-ref cc 'max-age) + ;; Heuristic freshness + (and last-modified + (/ (- date last-modified) 10))))) + (and (not cc-no-cache) + (not cc-no-store) + cc-max-age + (>= (+ date cc-max-age) current-time))) + ;; Use expires + (let ((exp (response-expires response))) + (when exp + (set! exp (date->time-utc exp)) + (set! exp (time-second exp))) + (and exp + (>= exp current-time)))))) + +(define*-public (revalidate uri response body + #:key + (headers '()) + (http-get http-get)) + (define (keep-header? h) + (case (car h) + ((if-none-match if-unmodified-since) #f) + (else #t))) + (let ((etag (response-etag response))) + (if etag + (receive (new-response new-response-body) + (http-get uri + #:headers + (acons 'if-none-match (list etag) + (filter keep-header? headers))) + (if (eqv? (response-code new-response) 304) + (values + (build-response + #:headers + (append + (response-headers new-response) + (filter + (lambda (h) + (case (car h) + ((cache-control content-location date etag expires vary) + ;; These are overriden by new-response + #f) + (else #t))) + (response-headers response)))) + body) + (values new-response new-response-body))) + (http-get uri #:headers headers)))) + +(define*-public (with-cache + #:key + (current-time the-current-time) + (http-get http-get) + (dir default-cache-dir)) + (lambda* (uri #:key (headers '())) + (when (string? uri) + (set! uri (string->uri uri))) + (let ((dir (if (thunk? dir) (dir) dir)) + (request (build-request uri #:headers headers))) + (receive (stored-request stored-response body) + (read uri #:dir dir) + (if stored-response + (let ((valid (valid? stored-response #:current-time the-current-time)) + (invariant (not (varies? request stored-request stored-response)))) + (unless invariant + (format (current-error-port) "Cache entry for ~a varies.\n" (uri->string uri))) + (if (and valid invariant) + (values stored-response body) + (receive (final-response final-body) + (revalidate uri stored-response body + #:headers headers + #:http-get http-get) + (add request final-response final-body #:dir dir) + (values final-response final-body)))) + (receive (final-response final-body) + (http-get uri #:headers headers) + (add request final-response final-body #:dir dir) + (values final-response final-body))))))) diff --git a/src/scm/webid-oidc/stubs.scm b/src/scm/webid-oidc/stubs.scm index 4ed9000..ee24724 100644 --- a/src/scm/webid-oidc/stubs.scm +++ b/src/scm/webid-oidc/stubs.scm @@ -120,3 +120,23 @@ (define fixed:scm->json scm->json) (export (fixed:scm->json . scm->json)) + +(define (mkdir-p name) + (catch 'system-error + (lambda () + (mkdir name)) + (lambda (key subr message args rest) + (case (car rest) + ((17) ;; file exists + #t) + ((2) ;; parent does not exist + (let ((parent (dirname name))) + (unless (equal? parent name) + (mkdir-p parent)) + (mkdir name))) + (else + (throw key subr message args rest)))))) + +(define-public (call-with-output-file* filename . args) + (mkdir-p (dirname filename)) + (apply call-with-output-file filename args)) diff --git a/tests/Makefile.am b/tests/Makefile.am index 6dcb042..d9aea96 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -11,7 +11,9 @@ TESTS = %reldir%/load-library.scm \ %reldir%/jkt.scm \ %reldir%/verify.scm \ %reldir%/verification-failed.scm \ - %reldir%/jws.scm + %reldir%/jws.scm \ + %reldir%/cache-valid.scm \ + %reldir%/cache-revalidate.scm EXTRA_DIST += $(TESTS) diff --git a/tests/cache-revalidate.scm b/tests/cache-revalidate.scm new file mode 100644 index 0000000..29cc038 --- /dev/null +++ b/tests/cache-revalidate.scm @@ -0,0 +1,42 @@ +(use-modules (webid-oidc cache) + (webid-oidc testing) + (web uri) + (web request) + (web response) + (srfi srfi-19) + (ice-9 optargs) + (ice-9 receive)) + +(with-test-environment + "cache-revalidate" + (lambda () + (define original-response + (build-response #:headers `((etag . ("xxx" . #t)) + (content-type text/plain) + (date . ,(time-utc->date (make-time time-utc 0 0)))))) + (define* (backend uri #:key (headers '())) + (unless (equal? uri (string->uri "https://example.com")) + (exit 1)) + (unless (equal? (assq-ref headers 'if-none-match) + '(("xxx" . #t))) + (exit 2)) + (unless (equal? (assq-ref headers 'user-agent) "Testbed") + (exit 3)) + (unless (eqv? (length headers) 2) + (exit 4)) + (values + (build-response #:code 304 #:reason-phrase "Not Modified" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 10))))) + #f)) + (receive (response response-body) + (revalidate (string->uri "https://example.com") original-response "hello" + #:headers `((if-none-match . ("yyy" . #t)) + (if-unmodified-since . ,(time-utc->date (make-time time-utc 0 42))) + (user-agent . "Testbed")) + #:http-get backend) + (unless (eqv? (response-code response) 200) + (exit 5)) + (unless (equal? (response-headers response) + `((date . ,(time-utc->date (make-time time-utc 0 10))) + (content-type text/plain))) + (exit 6))))) diff --git a/tests/cache-valid.scm b/tests/cache-valid.scm new file mode 100644 index 0000000..eda831e --- /dev/null +++ b/tests/cache-valid.scm @@ -0,0 +1,58 @@ +(use-modules (webid-oidc cache) + (webid-oidc testing) + (web uri) + (web request) + (web response) + (srfi srfi-19) + (ice-9 optargs) + (ice-9 receive)) + +(with-test-environment + "cache-valid" + (lambda () + (define response-not-stored + (build-response #:headers `((cache-control . (no-store))))) + (define response-not-cached + (build-response #:headers `((cache-control . (no-cache))))) + (define response-with-expires + (build-response #:headers `((expires . ,(time-utc->date (make-time time-utc 0 120)))))) + (define response-with-overriden-expires + (build-response #:headers `((expires . ,(time-utc->date (make-time time-utc 0 120))) + (cache-control . (private (max-age . 100))) + (date . ,(time-utc->date (make-time time-utc 0 10)))))) + (define response-without-max-age + (build-response #:headers `((cache-control . (private)) + (date . ,(time-utc->date (make-time time-utc 0 10)))))) + (define response-with-heuristic-max-age + (build-response #:headers `((cache-control . (private)) + (last-modified . ,(time-utc->date (make-time time-utc 0 10))) + (date . ,(time-utc->date (make-time time-utc 0 30)))))) + ;; response-not-stored: never valid. + (when (valid? response-not-stored #:current-time 0) + (exit 1)) + (when (valid? response-not-stored #:current-time 100) + (exit 2)) + ;; response-not-cached: never valid. + (when (valid? response-not-cached #:current-time 0) + (exit 3)) + (when (valid? response-not-cached #:current-time 100) + (exit 4)) + ;; response-with-expires: valid at 110, invalid at 130. + (unless (valid? response-with-expires #:current-time 110) + (exit 5)) + (when (valid? response-with-expires #:current-time 130) + (exit 6)) + ;; response-with-overriden-expires: valid at 105, invalid at 115 + (unless (valid? response-with-overriden-expires #:current-time 105) + (exit 7)) + (when (valid? response-with-overriden-expires #:current-time 115) + (exit 8)) + ;; response-without-max-age: not valid, cannot get a heuristic + (when (valid? response-without-max-age #:current-time 10) + (exit 9)) + ;; response-with-heuristic-max-age: the heuristic max age is 2, so + ;; it is valid at 31 but not at 33. + (unless (valid? response-with-heuristic-max-age #:current-time 31) + (exit 10)) + (when (valid? response-with-heuristic-max-age #:current-time 33) + (exit 11)))) |