summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVivien Kraus <vivien@planete-kraus.eu>2021-05-27 20:33:47 +0200
committerVivien Kraus <vivien@planete-kraus.eu>2021-06-18 16:52:28 +0200
commit9a992e293ac9455b406d9fdf7546e863857d8570 (patch)
tree4c970a36ff2d6017e07ec838ac1dc7ef18c91508
parent79cb9e14597dc56ca83898b0207fbe60c3e5af90 (diff)
Easier interface to query an RDF dataset
-rw-r--r--src/scm/webid-oidc/ChangeLog6
-rw-r--r--src/scm/webid-oidc/Makefile.am6
-rw-r--r--src/scm/webid-oidc/rdf-index.scm135
3 files changed, 145 insertions, 2 deletions
diff --git a/src/scm/webid-oidc/ChangeLog b/src/scm/webid-oidc/ChangeLog
index 2f6025a..04d357f 100644
--- a/src/scm/webid-oidc/ChangeLog
+++ b/src/scm/webid-oidc/ChangeLog
@@ -1,3 +1,9 @@
+2021-06-18 Vivien Kraus <vivien@planete-kraus.eu>
+
+ * rdf-index.scm (build-index): Normalize the URIs when building
+ the index, so that we can’t disguise triples.
+ (rdf-match): Normalize the URIs of the query too.
+
2021-05-07 Vivien Kraus <vivien@planete-kraus.eu>
* token-endpoint.scm (make-token-endpoint): The token endpoint
diff --git a/src/scm/webid-oidc/Makefile.am b/src/scm/webid-oidc/Makefile.am
index 6aeadfc..af8746f 100644
--- a/src/scm/webid-oidc/Makefile.am
+++ b/src/scm/webid-oidc/Makefile.am
@@ -24,7 +24,8 @@ dist_webidoidcmod_DATA += \
%reldir%/hello-world.scm \
%reldir%/reverse-proxy.scm \
%reldir%/client.scm \
- %reldir%/example-app.scm
+ %reldir%/example-app.scm \
+ %reldir%/rdf-index.scm
webidoidcgo_DATA += \
%reldir%/errors.go \
@@ -52,6 +53,7 @@ webidoidcgo_DATA += \
%reldir%/hello-world.go \
%reldir%/reverse-proxy.go \
%reldir%/client.go \
- %reldir%/example-app.go
+ %reldir%/example-app.go \
+ %reldir%/rdf-index.go
EXTRA_DIST += %reldir%/ChangeLog
diff --git a/src/scm/webid-oidc/rdf-index.scm b/src/scm/webid-oidc/rdf-index.scm
new file mode 100644
index 0000000..f756afa
--- /dev/null
+++ b/src/scm/webid-oidc/rdf-index.scm
@@ -0,0 +1,135 @@
+(define-module (webid-oidc rdf-index)
+ #:use-module (oop goops)
+ #:use-module (rdf rdf)
+ #:use-module (web uri)
+ #:export
+ (
+ with-index
+ ))
+
+(define (normalize uri)
+ ;; It is possible to hide triples by percent-escaping
+ ;; some characters, so that match will fail to see
+ ;; them. With normalization, it should be impossible.
+ (when (string? uri)
+ (set! uri (string->uri uri)))
+ (let ((scheme (uri-scheme uri))
+ (userinfo (uri-userinfo uri))
+ (host (uri-host uri))
+ (port (uri-port uri))
+ (path (uri-path uri))
+ (query (uri-query uri))
+ (fragment (uri-fragment uri)))
+ (let ((normalized-scheme scheme)
+ (normalized-userinfo userinfo)
+ (normalized-host host)
+ (normalized-port port)
+ (normalized-path
+ (let ((path-ends-in-slash? (string-suffix? "/" path)))
+ (string-append
+ "/"
+ (encode-and-join-uri-path
+ (split-and-decode-uri-path path))
+ (if (and (not (equal? path "/"))
+ path-ends-in-slash?)
+ "/"
+ ""))))
+ (normalized-query
+ (and query
+ (uri-encode (uri-decode query))))
+ (normalized-fragment
+ (and fragment
+ (uri-encode (uri-decode fragment)))))
+ (build-uri normalized-scheme
+ #:userinfo normalized-userinfo
+ #:host normalized-host
+ #:port normalized-port
+ #:path normalized-path
+ #:query normalized-query
+ #:fragment normalized-fragment))))
+
+(define (normalize-object object)
+ (if (string? object)
+ (uri->string (normalize object))
+ object))
+
+(define-class <rdf-index> ()
+ (triples #:init-keyword #:triples #:getter triples)
+ (subject-index #:init-thunk (lambda () (make-hash-table))
+ #:getter subject-index)
+ (predicate-index #:init-thunk (lambda () (make-hash-table))
+ #:getter predicate-index)
+ (object-index #:init-thunk (lambda () (make-hash-table))
+ #:getter object-index))
+
+(define (build-index triples)
+ (let ((ret (make <rdf-index> #:triples (list->vector triples))))
+ (define (do-index n triples)
+ (unless (null? triples)
+ (let ((first (car triples))
+ (rest (cdr triples)))
+ (let ((s (normalize-object (rdf-triple-subject first)))
+ (p (normalize-object (rdf-triple-predicate first)))
+ (o (normalize-object (rdf-triple-object first))))
+ (let ((other-s (hash-ref (subject-index ret) s '()))
+ (other-p (hash-ref (predicate-index ret) p '()))
+ (other-o (hash-ref (object-index ret) o '()))
+ (i (- n 1)))
+ (hash-set! (subject-index ret) s (cons i other-s))
+ (hash-set! (predicate-index ret) p (cons i other-p))
+ (hash-set! (object-index ret) o (cons i other-o))))
+ (do-index (- n 1) rest))))
+ (do-index (length triples) (reverse triples))
+ ret))
+
+(define (intersection-2 a b)
+ (cond
+ ((not a) b)
+ ((not b) a)
+ ((or (null? a) (null? b))
+ '())
+ ((< (car a) (car b))
+ (intersection-2 (cdr a) b))
+ ((> (car a) (car b))
+ (intersection-2 a (cdr b)))
+ (else
+ (cons (car a) (intersection-2 (cdr a) (cdr b))))))
+
+(define (intersection a . rest)
+ (if (null? rest)
+ a
+ (let ((b (car rest))
+ (true-rest (cdr rest)))
+ (apply intersection (intersection-2 a b) true-rest))))
+
+(define (rdf-match index subject predicate object)
+ (let ((by-subject
+ (and subject
+ (hash-ref (subject-index index)
+ (normalize-object subject)
+ '())))
+ (by-predicate
+ (and predicate
+ (hash-ref (predicate-index index)
+ (normalize-object predicate)
+ '())))
+ (by-object
+ (and object
+ (hash-ref (object-index index)
+ (normalize-object object)
+ '()))))
+ (let ((indices (intersection by-subject by-predicate by-object)))
+ (define (accumulate-triples acc i)
+ (if (null? i)
+ (reverse acc)
+ (let ((t (vector-ref (triples index) (car i))))
+ (accumulate-triples (cons t acc) (cdr i)))))
+ (if indices
+ (accumulate-triples '() indices)
+ (vector->list (triples index))))))
+
+
+(define (with-index graph f)
+ (let ((index (build-index graph)))
+ (f (lambda (s p o)
+ (rdf-match index s p o)))))