From aaaf9ad6e2895d7ae85fb1b41f49f726b9324d18 Mon Sep 17 00:00:00 2001 From: Vivien Kraus Date: Thu, 27 May 2021 20:33:47 +0200 Subject: Easier interface to query an RDF dataset --- src/scm/webid-oidc/ChangeLog | 6 ++ src/scm/webid-oidc/Makefile.am | 6 +- src/scm/webid-oidc/rdf-index.scm | 135 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 src/scm/webid-oidc/rdf-index.scm diff --git a/src/scm/webid-oidc/ChangeLog b/src/scm/webid-oidc/ChangeLog index 2f6025a..04d357f 100644 --- a/src/scm/webid-oidc/ChangeLog +++ b/src/scm/webid-oidc/ChangeLog @@ -1,3 +1,9 @@ +2021-06-18 Vivien Kraus + + * rdf-index.scm (build-index): Normalize the URIs when building + the index, so that we can’t disguise triples. + (rdf-match): Normalize the URIs of the query too. + 2021-05-07 Vivien Kraus * token-endpoint.scm (make-token-endpoint): The token endpoint diff --git a/src/scm/webid-oidc/Makefile.am b/src/scm/webid-oidc/Makefile.am index 6aeadfc..af8746f 100644 --- a/src/scm/webid-oidc/Makefile.am +++ b/src/scm/webid-oidc/Makefile.am @@ -24,7 +24,8 @@ dist_webidoidcmod_DATA += \ %reldir%/hello-world.scm \ %reldir%/reverse-proxy.scm \ %reldir%/client.scm \ - %reldir%/example-app.scm + %reldir%/example-app.scm \ + %reldir%/rdf-index.scm webidoidcgo_DATA += \ %reldir%/errors.go \ @@ -52,6 +53,7 @@ webidoidcgo_DATA += \ %reldir%/hello-world.go \ %reldir%/reverse-proxy.go \ %reldir%/client.go \ - %reldir%/example-app.go + %reldir%/example-app.go \ + %reldir%/rdf-index.go EXTRA_DIST += %reldir%/ChangeLog diff --git a/src/scm/webid-oidc/rdf-index.scm b/src/scm/webid-oidc/rdf-index.scm new file mode 100644 index 0000000..f756afa --- /dev/null +++ b/src/scm/webid-oidc/rdf-index.scm @@ -0,0 +1,135 @@ +(define-module (webid-oidc rdf-index) + #:use-module (oop goops) + #:use-module (rdf rdf) + #:use-module (web uri) + #:export + ( + with-index + )) + +(define (normalize uri) + ;; It is possible to hide triples by percent-escaping + ;; some characters, so that match will fail to see + ;; them. With normalization, it should be impossible. + (when (string? uri) + (set! uri (string->uri uri))) + (let ((scheme (uri-scheme uri)) + (userinfo (uri-userinfo uri)) + (host (uri-host uri)) + (port (uri-port uri)) + (path (uri-path uri)) + (query (uri-query uri)) + (fragment (uri-fragment uri))) + (let ((normalized-scheme scheme) + (normalized-userinfo userinfo) + (normalized-host host) + (normalized-port port) + (normalized-path + (let ((path-ends-in-slash? (string-suffix? "/" path))) + (string-append + "/" + (encode-and-join-uri-path + (split-and-decode-uri-path path)) + (if (and (not (equal? path "/")) + path-ends-in-slash?) + "/" + "")))) + (normalized-query + (and query + (uri-encode (uri-decode query)))) + (normalized-fragment + (and fragment + (uri-encode (uri-decode fragment))))) + (build-uri normalized-scheme + #:userinfo normalized-userinfo + #:host normalized-host + #:port normalized-port + #:path normalized-path + #:query normalized-query + #:fragment normalized-fragment)))) + +(define (normalize-object object) + (if (string? object) + (uri->string (normalize object)) + object)) + +(define-class () + (triples #:init-keyword #:triples #:getter triples) + (subject-index #:init-thunk (lambda () (make-hash-table)) + #:getter subject-index) + (predicate-index #:init-thunk (lambda () (make-hash-table)) + #:getter predicate-index) + (object-index #:init-thunk (lambda () (make-hash-table)) + #:getter object-index)) + +(define (build-index triples) + (let ((ret (make #:triples (list->vector triples)))) + (define (do-index n triples) + (unless (null? triples) + (let ((first (car triples)) + (rest (cdr triples))) + (let ((s (normalize-object (rdf-triple-subject first))) + (p (normalize-object (rdf-triple-predicate first))) + (o (normalize-object (rdf-triple-object first)))) + (let ((other-s (hash-ref (subject-index ret) s '())) + (other-p (hash-ref (predicate-index ret) p '())) + (other-o (hash-ref (object-index ret) o '())) + (i (- n 1))) + (hash-set! (subject-index ret) s (cons i other-s)) + (hash-set! (predicate-index ret) p (cons i other-p)) + (hash-set! (object-index ret) o (cons i other-o)))) + (do-index (- n 1) rest)))) + (do-index (length triples) (reverse triples)) + ret)) + +(define (intersection-2 a b) + (cond + ((not a) b) + ((not b) a) + ((or (null? a) (null? b)) + '()) + ((< (car a) (car b)) + (intersection-2 (cdr a) b)) + ((> (car a) (car b)) + (intersection-2 a (cdr b))) + (else + (cons (car a) (intersection-2 (cdr a) (cdr b)))))) + +(define (intersection a . rest) + (if (null? rest) + a + (let ((b (car rest)) + (true-rest (cdr rest))) + (apply intersection (intersection-2 a b) true-rest)))) + +(define (rdf-match index subject predicate object) + (let ((by-subject + (and subject + (hash-ref (subject-index index) + (normalize-object subject) + '()))) + (by-predicate + (and predicate + (hash-ref (predicate-index index) + (normalize-object predicate) + '()))) + (by-object + (and object + (hash-ref (object-index index) + (normalize-object object) + '())))) + (let ((indices (intersection by-subject by-predicate by-object))) + (define (accumulate-triples acc i) + (if (null? i) + (reverse acc) + (let ((t (vector-ref (triples index) (car i)))) + (accumulate-triples (cons t acc) (cdr i))))) + (if indices + (accumulate-triples '() indices) + (vector->list (triples index)))))) + + +(define (with-index graph f) + (let ((index (build-index graph))) + (f (lambda (s p o) + (rdf-match index s p o))))) -- cgit v1.2.3