;; disfluid, implementation of the Solid specification ;; Copyright (C) 2020, 2021 Vivien Kraus ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU Affero General Public License for more details. ;; You should have received a copy of the GNU Affero General Public License ;; along with this program. If not, see . (define-module (webid-oidc cache) #:use-module ((webid-oidc stubs) #:prefix stubs:) #:use-module ((webid-oidc parameters) #:prefix p:) #:use-module (web client) #:use-module (web request) #:use-module (web response) #:use-module (web uri) #:use-module (ice-9 ftw) #:use-module (ice-9 receive) #:use-module (ice-9 optargs) #:use-module (srfi srfi-19) #:use-module (rnrs bytevectors) #:export ( clean-cache add read varies? valid? revalidate with-cache )) ;; The cache follows the recommendations of ;; https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching ;; In a directory, we dump files, indexed by the hash of the URI, and ;; containing the request, response and response body. ;; We keep only one version of the resource, even with varying ;; headers. If the cache validation fails due to varying headers, we ;; simply drop the old variant and replace it with the new. For things ;; like user agents or accepted language or encoding, you’re not ;; likely to alternate quickly between two values anyway. ;; We’re not very concerned by the cache growing. We provide a cleanup ;; function that will kill a few percents of the cached entries at ;; random, and that you should call with a cron job regularly. It will ;; sometimes drop valid cached entries, but life goes on. By default, ;; each cache update has a 5% chance to trigger the elimination of 5% ;; of the cache. ;; There is a global lock file at the root of the cache, which serves ;; for region locking. Do not remove it! (define (web-cache-dir) (string-append (p:cache-home) "/web-cache/")) (define (file-name uri) (when (string? uri) (set! uri (string->uri uri))) (string-append (web-cache-dir) (stubs:hash 'SHA-256 (uri->string uri)))) (define (lock-file-name) (string-append (web-cache-dir) ".lock")) (define (event? percents) (<= (* (random:uniform) 100) percents)) (define* (clean-cache #:key (percents 5)) (define lock-file (lock-file-name)) (define (survives?) (not (event? percents))) (define (enter? name stat result) #t) (define (leaf name stat result) (unless (or (string-suffix? ".lock" name) (survives?)) (unless (false-if-exception (begin (format (current-error-port) "Dropping cache item ~a.~%" name) (stubs:atomically-update-file name lock-file (lambda (whatever) #f)))) (format (current-error-port) "Could not clean file ~a.~%" name))) result) (define (down name stat result) result) (define (up name stat result) result) (define (skip name stat result) result) (define (error name stat errno result) (format (current-error-port) "While cleaning the cache: ~a: ~a~%" name (strerror errno)) result) (file-system-fold enter? leaf down up skip error 0 (web-cache-dir))) (define (maybe-clean-cache pc-happen pc-cleaned) (when (event? pc-happen) (clean-cache #:percents pc-cleaned))) (define (remove-uncacheable-headers response) (let ((headers (response-headers response))) (let ((filtered (filter (lambda (h) (case (car h) ((connection keep-alive transfer-encoding) #f) (else #t))) headers))) (build-response #:code (response-code response) #:reason-phrase (response-reason-phrase response) #:headers filtered #:port #f)))) (define (add request response response-body) ;; Don’t store it if there’s a cache-control no-store (unless (let ((cc (response-cache-control response '()))) (assq-ref cc 'no-store)) (set! response (remove-uncacheable-headers response)) (let ((final-file-name (file-name (request-uri request))) (lock-file (lock-file-name))) (maybe-clean-cache 5 5) (stubs:atomically-update-file final-file-name lock-file (lambda (port) (write-request request port) (let ((file-response (write-response response port))) (when (string? response-body) (set! response-body (string->utf8 response-body))) (when response-body (write-response-body file-response response-body)) #t)))))) (define (read uri) (let ((final-file-name (file-name uri))) (catch 'system-error (lambda () (call-with-input-file final-file-name (lambda (port) (let ((request (read-request port))) (let ((response (read-response port))) (values request response (read-response-body response))))))) (lambda error (format (current-error-port) "Cache miss for ~a: ~s~%" (uri->string uri) error) (values #f #f #f))))) (define (varies-header? request-a request-b header) (let ((a (assq-ref (request-headers request-a) header)) (b (assq-ref (request-headers request-b) header))) (not (equal? a b)))) (define (varies-any-header? request-a request-b headers) (and (not (null? headers)) (or (varies-header? request-a request-b (car headers)) (varies-any-header? request-a request-b (cdr headers))))) (define (varies? request-a request-b response) (let ((vary (response-vary response))) (or (eq? vary '*) (varies-any-header? request-a request-b vary)))) (define (valid? response) ;; current-date is a thunk parameter (let* ((current-date ((p:current-date))) (current-time (time-second (date->time-utc current-date)))) (let ((cc (response-cache-control response #f)) (date (time-second (date->time-utc (response-date response current-date)))) (last-modified (let ((as-date (response-last-modified response))) (and as-date (time-second (date->time-utc as-date)))))) (if cc ;; Use cache-control (let ((cc-no-cache (assq-ref cc 'no-cache)) (cc-no-store (assq-ref cc 'no-store)) (cc-max-age (or (assq-ref cc 'max-age) ;; Heuristic freshness (and last-modified (/ (- date last-modified) 10))))) (and (not cc-no-cache) (not cc-no-store) cc-max-age (>= (+ date cc-max-age) current-time))) ;; Use expires (let ((exp (let ((as-date (response-expires response))) (and as-date (time-second (date->time-utc as-date)))))) (and exp (>= exp current-time))))))) (define* (revalidate uri response body #:key (headers '()) (http-get http-get)) (define (keep-header? h) (case (car h) ((if-none-match if-unmodified-since) #f) (else #t))) (let ((etag (response-etag response))) (if etag (receive (new-response new-response-body) (http-get uri #:headers (acons 'if-none-match (list etag) (filter keep-header? headers))) (if (eqv? (response-code new-response) 304) (values (build-response #:headers (append (response-headers new-response) (filter (lambda (h) (case (car h) ((cache-control content-location date etag expires vary) ;; These are overriden by new-response #f) (else #t))) (response-headers response)))) body) (values new-response new-response-body))) (http-get uri #:headers headers)))) (define* (with-cache #:key (http-get http-get)) (lambda* (uri #:key (headers '())) (when (string? uri) (set! uri (string->uri uri))) (let ((request (build-request uri #:headers headers))) (receive (stored-request stored-response body) (read uri) (if stored-response (let ((valid (valid? stored-response)) (invariant (not (varies? request stored-request stored-response)))) (unless invariant (format (current-error-port) "Cache entry for ~a varies.\n" (uri->string uri))) (if (and valid invariant) (values stored-response body) (receive (final-response final-body) (revalidate uri stored-response body #:headers headers #:http-get http-get) (add request final-response final-body) (values final-response final-body)))) (receive (final-response final-body) (http-get uri #:headers headers) (add request final-response final-body) (values final-response final-body)))))))