;; disfluid, implementation of the Solid specification ;; Copyright (C) 2020, 2021 Vivien Kraus ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Affero General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU Affero General Public License for more details. ;; You should have received a copy of the GNU Affero General Public License ;; along with this program. If not, see . (define-module (webid-oidc cache) #:use-module ((webid-oidc stubs) #:prefix stubs:) #:use-module (web client) #:use-module (web request) #:use-module (web response) #:use-module (web uri) #:use-module (ice-9 ftw) #:use-module (ice-9 receive) #:use-module (ice-9 optargs) #:use-module (srfi srfi-19) #:use-module (rnrs bytevectors)) ;; The cache follows the recommendations of ;; https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching ;; In a directory, we dump files, indexed by the hash of the URI, and ;; containing the request, response and response body. ;; We keep only one version of the resource, even with varying ;; headers. If the cache validation fails due to varying headers, we ;; simply drop the old variant and replace it with the new. For things ;; like user agents or accepted language or encoding, you’re not ;; likely to alternate quickly between two values anyway. ;; We’re not very concerned by the cache growing. We provide a cleanup ;; function that will kill a few percents of the cached entries at ;; random, and that you should call with a cron job regularly. It will ;; sometimes drop valid cached entries, but life goes on. By default, ;; each cache update has a 5% chance to trigger the elimination of 5% ;; of the cache. ;; There is a global lock file at the root of the cache, which serves ;; for region locking. Do not remove it! (define (default-cache-dir) (let ((xdg-cache-home (or (getenv "XDG_CACHE_HOME") (format #f "~a/.cache" (getenv "HOME"))))) (format #f "~a/disfluid" xdg-cache-home))) (define (web-cache-dir dir) (when (thunk? dir) (set! dir (dir))) (string-append dir "/web-cache/")) (define (file-name uri dir) (when (string? uri) (set! uri (string->uri uri))) (string-append (web-cache-dir dir) (stubs:hash 'SHA-256 (uri->string uri)))) (define (lock-file-name dir) (string-append (web-cache-dir dir) ".lock")) (define (event? percents) (<= (* (random:uniform) 100) percents)) (define*-public (clean-cache #:key (percents 5) (dir default-cache-dir)) (define lock-file (lock-file-name dir)) (define (survives?) (not (event? percents))) (define (enter? name stat result) #t) (define (leaf name stat result) (unless (or (string-suffix? ".lock" name) (survives?)) (unless (false-if-exception (begin (format (current-error-port) "Dropping cache item ~a.~%" name) (stubs:atomically-update-file name lock-file (lambda (whatever) #f)))) (format (current-error-port) "Could not clean file ~a.~%" name))) result) (define (down name stat result) result) (define (up name stat result) result) (define (skip name stat result) result) (define (error name stat errno result) (format (current-error-port) "While cleaning the cache: ~a: ~a~%" name (strerror errno)) result) (file-system-fold enter? leaf down up skip error 0 (web-cache-dir dir))) (define (maybe-clean-cache pc-happen pc-cleaned dir) (when (event? pc-happen) (clean-cache #:percents pc-cleaned #:dir dir))) (define (remove-uncacheable-headers response) (let ((headers (response-headers response))) (let ((filtered (filter (lambda (h) (case (car h) ((connection keep-alive transfer-encoding) #f) (else #t))) headers))) (build-response #:code (response-code response) #:reason-phrase (response-reason-phrase response) #:headers filtered #:port #f)))) (define*-public (add request response response-body #:key (dir default-cache-dir)) ;; Don’t store it if there’s a cache-control no-store (unless (let ((cc (response-cache-control response '()))) (assq-ref cc 'no-store)) (set! response (remove-uncacheable-headers response)) (let ((final-file-name (file-name (request-uri request) dir)) (lock-file (lock-file-name dir))) (maybe-clean-cache 5 5 dir) (stubs:atomically-update-file final-file-name lock-file (lambda (port) (write-request request port) (let ((file-response (write-response response port))) (when (string? response-body) (set! response-body (string->utf8 response-body))) (when response-body (write-response-body file-response response-body)) #t)))))) (define (the-current-time) (time-utc->date (current-time))) (define*-public (read uri #:key (dir default-cache-dir)) (let ((final-file-name (file-name uri dir))) (catch 'system-error (lambda () (call-with-input-file final-file-name (lambda (port) (let ((request (read-request port))) (let ((response (read-response port))) (values request response (read-response-body response))))))) (lambda error (format (current-error-port) "Cache miss for ~a: ~s~%" (uri->string uri) error) (values #f #f #f))))) (define (varies-header? request-a request-b header) (let ((a (assq-ref (request-headers request-a) header)) (b (assq-ref (request-headers request-b) header))) (not (equal? a b)))) (define (varies-any-header? request-a request-b headers) (and (not (null? headers)) (or (varies-header? request-a request-b (car headers)) (varies-any-header? request-a request-b (cdr headers))))) (define-public (varies? request-a request-b response) (let ((vary (response-vary response))) (or (eq? vary '*) (varies-any-header? request-a request-b vary)))) (define*-public (valid? response #:key (current-time the-current-time)) (when (thunk? current-time) (set! current-time (current-time))) (when (integer? current-time) (set! current-time (make-time time-utc 0 current-time))) (when (time? current-time) (set! current-time (time-utc->date current-time))) (set! current-time (date->time-utc current-time)) (set! current-time (time-second current-time)) (let ((cc (response-cache-control response #f)) (date (response-date response (time-utc->date (make-time time-utc 0 current-time)))) (last-modified (response-last-modified response))) (set! date (date->time-utc date)) (set! date (time-second date)) (when last-modified (set! last-modified (date->time-utc last-modified)) (set! last-modified (time-second last-modified))) (if cc ;; Use cache-control (let ((cc-no-cache (assq-ref cc 'no-cache)) (cc-no-store (assq-ref cc 'no-store)) (cc-max-age (or (assq-ref cc 'max-age) ;; Heuristic freshness (and last-modified (/ (- date last-modified) 10))))) (and (not cc-no-cache) (not cc-no-store) cc-max-age (>= (+ date cc-max-age) current-time))) ;; Use expires (let ((exp (response-expires response))) (when exp (set! exp (date->time-utc exp)) (set! exp (time-second exp))) (and exp (>= exp current-time)))))) (define*-public (revalidate uri response body #:key (headers '()) (http-get http-get)) (define (keep-header? h) (case (car h) ((if-none-match if-unmodified-since) #f) (else #t))) (let ((etag (response-etag response))) (if etag (receive (new-response new-response-body) (http-get uri #:headers (acons 'if-none-match (list etag) (filter keep-header? headers))) (if (eqv? (response-code new-response) 304) (values (build-response #:headers (append (response-headers new-response) (filter (lambda (h) (case (car h) ((cache-control content-location date etag expires vary) ;; These are overriden by new-response #f) (else #t))) (response-headers response)))) body) (values new-response new-response-body))) (http-get uri #:headers headers)))) (define*-public (with-cache #:key (current-time the-current-time) (http-get http-get) (dir default-cache-dir)) (lambda* (uri #:key (headers '())) (when (string? uri) (set! uri (string->uri uri))) (let ((dir (if (thunk? dir) (dir) dir)) (request (build-request uri #:headers headers))) (receive (stored-request stored-response body) (read uri #:dir dir) (if stored-response (let ((valid (valid? stored-response #:current-time the-current-time)) (invariant (not (varies? request stored-request stored-response)))) (unless invariant (format (current-error-port) "Cache entry for ~a varies.\n" (uri->string uri))) (if (and valid invariant) (values stored-response body) (receive (final-response final-body) (revalidate uri stored-response body #:headers headers #:http-get http-get) (add request final-response final-body #:dir dir) (values final-response final-body)))) (receive (final-response final-body) (http-get uri #:headers headers) (add request final-response final-body #:dir dir) (values final-response final-body)))))))