From 4372c7c8fe46a7612ef5e4f15695d23118d025eb Mon Sep 17 00:00:00 2001 From: Vivien Kraus Date: Sun, 21 Feb 2021 15:53:03 +0100 Subject: Add tests --- Makefile.am | 3 + web-client-with-cache.org | 369 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 367 insertions(+), 5 deletions(-) diff --git a/Makefile.am b/Makefile.am index bea440d..0671829 100644 --- a/Makefile.am +++ b/Makefile.am @@ -44,3 +44,6 @@ $(srcdir)/doc/web-client-with-cache.texi: web-client-with-cache.org --eval "(setq geiser-scheme-implementation 'guile)" \ -f org-texinfo-export-to-texinfo @mv web-client-with-cache.texi doc/web-client-with-cache.texi + +check-local: pre-inst-env web/client/with-cache.scm + ./pre-inst-env guile -c "(begin (use-modules (web client with-cache)) (run-tests))" diff --git a/web-client-with-cache.org b/web-client-with-cache.org index 17d3f4f..0639768 100644 --- a/web-client-with-cache.org +++ b/web-client-with-cache.org @@ -325,6 +325,9 @@ The expiration date can be: - the value of the Expires header; - =#f= if the others failed. +If the max-age is unreasonably large, the response will overstay. So, +we will cap the max-age with the value of one day. + This is what [[fn-estimate-response-expires]] does. #+name: fn-estimate-response-expires @@ -334,8 +337,11 @@ This is what [[fn-estimate-response-expires]] does. (let ((cache-control (response-cache-control response '())) (expires (response-expires response)) (date (response-date response))) - (let ((max-age (assoc-ref cache-control 'max-age))) + (let ((max-age (assoc-ref cache-control 'max-age)) + (reasonable-max-age (* 1 24 60 60))) (cond (max-age + (when (>= max-age reasonable-max-age) + (set! max-age reasonable-max-age)) (date-add date max-age)) (expires expires) (else #f))))) @@ -359,6 +365,26 @@ function [[fn-merge-headers]]. (hash-table->alist table)))) #+end_src +** Detect useless responses +If we just cache all responses for ever, the cache will grow +indefinitely and eat up all the memory. However, some responses are +useless, and we should not keep them for too long. Useless responses +are responses that have expired some time ago. Since the Date header +is always refreshed when the response is refreshed with ETags, this +means that the ETag responses have not been used for a day. + +Function [[fn-useful-p]] tests whether a response is useful. + +#+name: fn-useful-p +#+caption: Check whether a response is useful +#+begin_src scheme :eval no + (define (useful? current-time response response-body) + (let ((end (date-add + (or (estimate-response-expires response) + (response-date response)) + (* 1 24 60 60)))) + (not (date>=? current-time end)))) +#+end_src ** Wrap =http-fetch= so as to store the response Now we can start using the =covoiturage= function. The inner function will call =http-get=, but between the moment we have the response and @@ -366,7 +392,8 @@ response body and the moment we return them, we add them to a web cache. The function [[fn-with-cache-add-to-box]] atomically adds the response and -response-body to the cache, while ensuring there is a Date header. +response-body to the cache, while ensuring there is a Date header. It +also filters the other entries so as to remove the useless responses. #+name: fn-with-cache-add-to-box #+caption: Add a response and its body to the cache @@ -377,7 +404,8 @@ response-body to the cache, while ensuring there is a Date header. (new (acons key (cons response body) (filter (lambda (cell) - (not (equal? key (car cell)))) + (and (not (equal? key (car cell))) + (useful? current-time (cadr cell) (cddr cell)))) old))) (discarded (atomic-box-compare-and-swap! box old new))) (if (eq? discarded old) @@ -441,9 +469,9 @@ function [[fn-with-cache-query]]. ;; The response is cached but expired (let ((etag (response-etag response)) (last-modified (response-last-modified response))) - (when etag + (when (and etag (cdr etag)) (set! request-headers - (acons 'if-none-match (list etag) request-headers))) + (acons 'if-none-match (list (car etag)) request-headers))) (when last-modified (set! request-headers (acons 'if-modified-since last-modified request-headers))) @@ -522,6 +550,7 @@ The final function is [[fn-with-cache]]. It takes its docstring from <> <> <> + <> <> <> (define smart-http-get @@ -529,8 +558,338 @@ The final function is [[fn-with-cache]]. It takes its docstring from <> (lambda (uri . args) (apply with-cache-query box (current-time) smart-http-get http-get uri args)))) + + (define-public (run-tests) + <>) +#+end_src + +* Test cases +We provide a few test cases to ensure that the caching works. First, +function [[fn-run-test-case]] is defined. It takes a list of request URIs +with time, a list of expected requests with their headers, and a list +of responses. It will process each request in order, check that the +back-end receive the expected requests, and respond the responses. + +#+name: fn-run-test-case +#+caption: Run a test case +#+begin_src scheme :eval no + (define (headers-equal? x y) + (equal? (hash-table->alist (alist->hash-table x)) + (hash-table->alist (alist->hash-table y)))) + + (define (response-equal? x y) + (and (equal? (response-version x) (response-version y)) + (eq? (response-code x) (response-code y)) + (equal? (response-reason-phrase x) + (response-reason-phrase y)) + (headers-equal? (response-headers x) (response-headers y)))) + + (define (run-test-case) + (define current-time (make-time time-utc 0 0)) + (define backend-request-uri #f) + (define backend-request-headers #f) + (define backend-response #f) + (define backend-response-body #f) + (define cache (make-atomic-box '())) + (let ((http-get + (with-cache + #:box + cache + #:current-time + (lambda () current-time) + #:http-get + (lambda* (uri #:key (headers '())) + (unless (and backend-request-uri + backend-request-headers + backend-response) + (format (current-error-port) + "Test failed: the backend did not expect to have to respond to ~s ~s.\n" + uri headers) + (format (current-error-port) "The cache is currently ~s.\n" (atomic-box-ref cache)) + (exit 1)) + (unless (and (equal? uri backend-request-uri) + (headers-equal? headers backend-request-headers)) + (format (current-error-port) + "Test failed: the backend expected to respond to ~s ~s, not ~s ~s.\n" + backend-request-uri backend-request-headers + uri headers) + (format (current-error-port) "The cache is currently ~s.\n" (atomic-box-ref cache)) + (exit 2)) + (let ((ret-response backend-response) + (ret-body backend-response-body)) + (set! backend-request-uri #f) + (set! backend-request-headers #f) + (set! backend-response #f) + (set! backend-response-body #f) + (values ret-response ret-body)))))) + (lambda (time + uri headers + request-uri request-headers + response response-body + expected-response expected-response-body) + (set! current-time + (make-time time-utc 0 time)) + (set! backend-request-uri request-uri) + (set! backend-request-headers request-headers) + (set! backend-response response) + (set! backend-response-body response-body) + (receive (true-response true-response-body) + (http-get uri #:headers headers) + (when (or backend-request-uri backend-request-headers backend-response) + (format (current-error-port) + "Test failed: there should have been a backend call to ~s ~s for which we would have responded ~s ~s.\n" backend-request-uri backend-request-headers + backend-response backend-response-body) + (format (current-error-port) "The cache is currently ~s.\n" (atomic-box-ref cache)) + (exit 3)) + (unless (and (response-equal? true-response expected-response) + (equal? true-response-body expected-response-body)) + (format (current-error-port) + "Test failed: the response should be ~s / ~s, but it is ~s / ~s.\n" + expected-response expected-response-body true-response true-response-body) + (format (current-error-port) "The cache is currently ~s.\n" (atomic-box-ref cache)) + (exit 4)))))) +#+end_src + +Let's see immediately an example, program [[test-1]]. + +#+name: test-1 +#+caption: Check that responses with a max-age cache control are cached, until the expiration date +#+begin_src scheme :eval no + (let ((script (run-test-case))) + ;; At time 0, we request example.com, which is cached for 10 + ;; seconds. Since the cache is empty, it translates directly to a + ;; call to the back-end with no headers. + (let ((response-at-0 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 0))) + (cache-control . (public (max-age . 10))) + (content-type . (application/json))))) + (response-body-at-0 "{}")) + (script 0 "https://example.com" '() + "https://example.com" '() + response-at-0 response-body-at-0 + response-at-0 response-body-at-0) + ;; At time 5, we request example.com again, and we don't expect + ;; the back-end to generate a call. + (script 5 "https://example.com" '() + #f #f + #f #f + response-at-0 response-body-at-0) + ;; At time 15, we request example.com again, and we expect the + ;; cache to be invalid. + (let ((response-at-15 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 15))) + (cache-control . (public (max-age . 10))) + (content-type . (application/json))))) + (response-body-at-15 "{\"updated\": \"yes!\"}")) + (script 15 "https://example.com" '() + "https://example.com" '() + response-at-15 response-body-at-15 + response-at-15 response-body-at-15)))) #+end_src +#+name: test-2 +#+caption: Check that responses with a max-age cache control are cached, until the expiration date +#+begin_src scheme :eval no + (let ((script (run-test-case))) + ;; At time 0, we request example.com, which is set to expire at time + ;; 10. Since the cache is empty, it translates directly to a call to + ;; the back-end with no headers. + (let ((response-at-0 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 0))) + (expires . ,(time-utc->date (make-time time-utc 0 10))) + (content-type . (application/json))))) + (response-body-at-0 "{}")) + (script 0 "https://example.com" '() + "https://example.com" '() + response-at-0 response-body-at-0 + response-at-0 response-body-at-0) + ;; At time 5, we request example.com again, and we don't expect + ;; the back-end to generate a call. + (script 5 "https://example.com" '() + #f #f + #f #f + response-at-0 response-body-at-0) + ;; At time 15, we request example.com again, and we expect the + ;; cache to be invalid. + (let ((response-at-15 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 15))) + (expires . ,(time-utc->date (make-time time-utc 0 25))) + (content-type . (application/json))))) + (response-body-at-15 "{\"updated\": \"yes!\"}")) + (script 15 "https://example.com" '() + "https://example.com" '() + response-at-15 response-body-at-15 + response-at-15 response-body-at-15)))) +#+end_src + +#+name: test-3 +#+caption: Check that responses with an ETag that are used once per day stay +#+begin_src scheme :eval no + (let ((script (run-test-case))) + ;; At time 0, we request example.com, which sets an ETag. + (let ((response-at-0 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 0))) + (etag . ("Version_0.0.0" . #t)) + (content-type . (application/json))))) + (response-body-at-0 "{}")) + (script 0 "https://example.com" '() + "https://example.com" '() + response-at-0 response-body-at-0 + response-at-0 response-body-at-0)) + ;; 20 hours later, we request that again. + (let ((response-20-hours-later + (build-response + #:code 304 + #:reason-phrase "Not Modified" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 20 60 60)))))))) + (script (* 20 60 60) "https://example.com" '() + "https://example.com" '((if-none-match "Version_0.0.0")) + response-20-hours-later #f + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 20 60 60)))) + (etag . ("Version_0.0.0" . #t)) + (content-type . (application/json)))) + "{}")) + ;; 30 hours later, it should have been cleaned. We first trigger a + ;; cleaning: + (let ((cleaning-response + (build-response + #:code 200 + #:reason-phrase "OK"))) + (script (* 50 60 60) "https://example.com/cleanup" '() + "https://example.com/cleanup" '() + cleaning-response #f + (build-response + #:code 200 + #:reason-phrase "OK" + ;; The date is automatically added: + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 50 60 60)))))) + #f)) + ;; Then we try the primary URL again: + (script (* 50 60 60) "https://example.com" '() + "https://example.com" '() + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 50 60 60)))) + (etag . ("Version_0.1.0" . #t)) + (content-type . (application/json)))) + "{\"version\": \"0.1.0\"}" + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 50 60 60)))) + (etag . ("Version_0.1.0" . #t)) + (content-type . (application/json)))) + "{\"version\": \"0.1.0\"}")) +#+end_src + +#+name: test-4 +#+caption: Check that responses with a max-age of one day and an ETag are still present 1.5 days later +#+begin_src scheme :eval no + (let ((script (run-test-case))) + ;; At time 0, we request example.com, which sets an ETag and has a max-age of 1 day. + (let ((response-at-0 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 0))) + (etag . ("Version_0.0.0" . #t)) + (cache-control . (public (max-age . ,(* 24 60 60)))) + (content-type . (application/json))))) + (response-body-at-0 "{}")) + (script 0 "https://example.com" '() + "https://example.com" '() + response-at-0 response-body-at-0 + response-at-0 response-body-at-0)) + ;; 1.5 days later, we request that again. The response has not been + ;; expired for a full day yet, so it should still be present. + (let ((response-later + (build-response + #:code 304 + #:reason-phrase "Not Modified" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 30 60 60)))))))) + (script (* 30 60 60) "https://example.com" '() + "https://example.com" '((if-none-match "Version_0.0.0")) + response-later #f + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 30 60 60)))) + (etag . ("Version_0.0.0" . #t)) + (cache-control . (public (max-age . ,(* 24 60 60)))) + (content-type . (application/json)))) + "{}"))) +#+end_src + +#+name: test-5 +#+caption: Check that responses with a max-age of a week disappear after one day +#+begin_src scheme :eval no + (let ((script (run-test-case))) + ;; At time 0, we request example.com, which sets a max-age of 10 days. + (let ((response-at-0 + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 0))) + (cache-control . (public (max-age . ,(* 10 24 60 60)))) + (content-type . (application/json))))) + (response-body-at-0 "{}")) + (script 0 "https://example.com" '() + "https://example.com" '() + response-at-0 response-body-at-0 + response-at-0 response-body-at-0)) + ;; After 30 hours, clean + (let ((cleaning-response + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 30 60 60)))))))) + (script (* 30 60 60) "https://example.com/cleanup" '() + "https://example.com/cleanup" '() + cleaning-response #f + cleaning-response #f)) + ;; So now the response should not be cached anymore. + (let ((response-next-day + (build-response + #:code 200 + #:reason-phrase "OK" + #:headers `((date . ,(time-utc->date (make-time time-utc 0 (* 30 60 60)))) + (content-type . (application/json))))) + (response-body-next-day "{\"updated\": \"yes!\"}")) + (script (* 30 60 60) "https://example.com" '() + "https://example.com" '() + response-next-day response-body-next-day + response-next-day response-body-next-day))) +#+end_src + +** All the tests :noexport: +#+name: tests +#+begin_src scheme :eval no :noweb yes + <> + <> + <> + <> + <> + <> +#+end_src * GNU Free Documentation License :PROPERTIES: :APPENDIX: t -- cgit v1.2.3