diff options
Diffstat (limited to 'guix/serialization.scm')
-rw-r--r-- | guix/serialization.scm | 44 |
1 files changed, 29 insertions, 15 deletions
diff --git a/guix/serialization.scm b/guix/serialization.scm index 4f82c06862..a99f53ee0b 100644 --- a/guix/serialization.scm +++ b/guix/serialization.scm @@ -29,7 +29,8 @@ #:export (write-int read-int write-long-long read-long-long write-padding - write-string read-string read-latin1-string + write-string + read-string read-latin1-string read-maybe-utf8-string write-string-list read-string-list write-string-pairs write-store-path read-store-path @@ -109,28 +110,41 @@ (bytevector-copy! s 0 b 8 l) (put-bytevector p b))) -(define (read-string p) +(define (read-byte-string p) (let* ((len (read-int p)) (m (modulo len 8)) - (bv (get-bytevector-n* p len)) - (str (utf8->string bv))) + (bv (get-bytevector-n* p len))) (or (zero? m) (get-bytevector-n* p (- 8 m))) - str)) + bv)) -(define (read-latin1-string p) - (let* ((len (read-int p)) - (m (modulo len 8)) - ;; Note: do not use 'get-string-n' to work around Guile bug - ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for - ;; a discussion. - (str (get-bytevector-n* p len))) - (or (zero? m) - (get-bytevector-n* p (- 8 m))) +(define (read-string p) + (utf8->string (read-byte-string p))) +(define (read-latin1-string p) + "Read an ISO-8859-1 string from P." + ;; Note: do not use 'get-string-n' to work around Guile bug + ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for + ;; a discussion. + (let ((bv (read-byte-string p))) ;; XXX: Rewrite using (ice-9 iconv) when the minimum requirement is ;; upgraded to Guile >= 2.0.9. - (list->string (map integer->char (bytevector->u8-list str))))) + (list->string (map integer->char (bytevector->u8-list bv))))) + +(define (read-maybe-utf8-string p) + "Read a serialized string from port P. Attempt to decode it as UTF-8 and +substitute invalid byte sequences with question marks. This is a +\"permissive\" UTF-8 decoder." + ;; XXX: We rely on the port's decoding mechanism to do permissive decoding + ;; and substitute invalid byte sequences with question marks, but this is + ;; not very efficient. Eventually Guile may provide a lightweight + ;; permissive UTF-8 decoder. + (let* ((bv (read-byte-string p)) + (port (with-fluids ((%default-port-encoding "UTF-8") + (%default-port-conversion-strategy + 'substitute)) + (open-bytevector-input-port bv)))) + (get-string-all port))) (define (write-string-list l p) (write-int (length l) p) |