summaryrefslogtreecommitdiff
path: root/guix/import
diff options
context:
space:
mode:
authorMaxim Cournoyer <maxim.cournoyer@gmail.com>2021-03-21 23:53:21 -0400
committerMaxim Cournoyer <maxim.cournoyer@gmail.com>2021-04-09 22:41:31 -0400
commit6aee902eaf9e38d5f41f568ef787fa0cc5203318 (patch)
tree706c5c64d6a3796a85bef9c99e0d80126408ef2f /guix/import
parent2446a112dfb79e851449b832006c1160bf818504 (diff)
import: go: Improve synopsis and description parsing.
* guix/import/go.scm (%strict-tokenizer?): Set parameter to #t. (go-path-escape): Redefine to prevent inlining. (http-get*): Replace by ... (http-fetch*): this ... (json-fetch*): New procedure. (go.pkg.dev-info): Use http-fetch*. (go-package-licenses): Rewrite in terms of go.pkg.dev-info. (go-package-description): Likewise. (go-package-synopsis): Likewise. (fetch-go.mod): Use the memoized http-fetch*. (parse-go.mod): Adjust to receive content as a string. (fetch-module-meta-data): Adjust to use http-fetch*. (go-module->guix-package): Adjust to the modified fetch-go.mod return value. [inputs]: Use propagated inputs, which is the most common situations for Go libraries. [description]: Beautify description. [licenses]: Do no check for #f. The result of the license parsing is always a list. * tests/go.scm: Adjust following above changes.
Diffstat (limited to 'guix/import')
-rw-r--r--guix/import/go.scm213
-rw-r--r--guix/import/utils.scm4
2 files changed, 135 insertions, 82 deletions
diff --git a/guix/import/go.scm b/guix/import/go.scm
index 6c0231e113..8c8f20b109 100644
--- a/guix/import/go.scm
+++ b/guix/import/go.scm
@@ -33,7 +33,7 @@
#:use-module (guix http-client)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix memoization)
- #:autoload (htmlprag) (html->sxml) ;from Guile-Lib
+ #:use-module (htmlprag) ;from Guile-Lib
#:autoload (guix git) (update-cached-checkout)
#:autoload (gcrypt hash) (open-hash-port hash-algorithm sha256)
#:autoload (guix serialization) (write-file)
@@ -43,20 +43,28 @@
#:use-module (ice-9 rdelim)
#:use-module (ice-9 receive)
#:use-module (ice-9 regex)
+ #:use-module (ice-9 textual-ports)
#:use-module ((rnrs io ports) #:select (call-with-port))
#:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-2)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
- #:use-module (sxml xpath)
+ #:use-module (sxml match)
+ #:use-module ((sxml xpath) #:renamer (lambda (s)
+ (if (eq? 'filter s)
+ 'xfilter
+ s)))
#:use-module (web client)
#:use-module (web response)
#:use-module (web uri)
- #:export (go-path-escape
- go-module->guix-package
+ #:export (go-module->guix-package
go-module-recursive-import))
+;;; Parameterize htmlprag to parse valid HTML more reliably.
+(%strict-tokenizer? #t)
+
;;; Commentary:
;;;
;;; (guix import go) attempts to make it easier to create Guix package
@@ -90,6 +98,14 @@
;;; Code:
+(define http-fetch*
+ ;; Like http-fetch, but memoized and returning the body as a string.
+ (memoize (lambda args
+ (call-with-port (apply http-fetch args) get-string-all))))
+
+(define json-fetch*
+ (memoize json-fetch))
+
(define (go-path-escape path)
"Escape a module path by replacing every uppercase letter with an
exclamation mark followed with its lowercase equivalent, as per the module
@@ -99,54 +115,73 @@ https://godoc.org/golang.org/x/mod/module#hdr-Escaped_Paths)."
(string-append "!" (string-downcase (match:substring occurrence))))
(regexp-substitute/global #f "[A-Z]" path 'pre escape 'post))
+;; Prevent inlining of this procedure, which is accessed by unit tests.
+(set! go-path-escape go-path-escape)
+
+(define (go.pkg.dev-info name)
+ (http-fetch* (string-append "https://pkg.go.dev/" name)))
+
(define (go-module-latest-version goproxy-url module-path)
"Fetch the version number of the latest version for MODULE-PATH from the
given GOPROXY-URL server."
- (assoc-ref (json-fetch (format #f "~a/~a/@latest" goproxy-url
- (go-path-escape module-path)))
+ (assoc-ref (json-fetch* (format #f "~a/~a/@latest" goproxy-url
+ (go-path-escape module-path)))
"Version"))
-
(define (go-package-licenses name)
"Retrieve the list of licenses that apply to NAME, a Go package or module
-name (e.g. \"github.com/golang/protobuf/proto\"). The data is scraped from
-the https://pkg.go.dev/ web site."
- (let*-values (((url) (string-append "https://pkg.go.dev/" name
- "?tab=licenses"))
- ((response body) (http-get url))
- ;; Extract the text contained in a h2 child node of any
- ;; element marked with a "License" class attribute.
- ((select) (sxpath `(// (* (@ (equal? (class "License"))))
- h2 // *text*))))
- (and (eq? (response-code response) 200)
- (match (select (html->sxml body))
- (() #f) ;nothing selected
- (licenses licenses)))))
-
-(define (go.pkg.dev-info name)
- (http-get (string-append "https://pkg.go.dev/" name)))
-(define go.pkg.dev-info*
- (memoize go.pkg.dev-info))
+name (e.g. \"github.com/golang/protobuf/proto\")."
+ (let* ((body (go.pkg.dev-info (string-append name "?tab=licenses")))
+ ;; Extract the text contained in a h2 child node of any
+ ;; element marked with a "License" class attribute.
+ (select (sxpath `(// (* (@ (equal? (class "License"))))
+ h2 // *text*))))
+ (select (html->sxml body))))
+
+(define (sxml->texi sxml-node)
+ "A very basic SXML to Texinfo converter which attempts to preserve HTML
+formatting and links as text."
+ (sxml-match sxml-node
+ ((strong ,text)
+ (format #f "@strong{~a}" text))
+ ((a (@ (href ,url)) ,text)
+ (format #f "@url{~a,~a}" url text))
+ ((code ,text)
+ (format #f "@code{~a}" text))
+ (,something-else something-else)))
(define (go-package-description name)
"Retrieve a short description for NAME, a Go package name,
-e.g. \"google.golang.org/protobuf/proto\". The data is scraped from the
-https://pkg.go.dev/ web site."
- (let*-values (((response body) (go.pkg.dev-info* name))
- ;; Extract the text contained in a h2 child node of any
- ;; element marked with a "License" class attribute.
- ((select) (sxpath
- `(// (section
- (@ (equal? (class "Documentation-overview"))))
- (p 1)))))
- (and (eq? (response-code response) 200)
- (match (select (html->sxml body))
- (() #f) ;nothing selected
- (((p . strings))
- ;; The paragraph text is returned as a list of strings embedding
- ;; newline characters. Join them and strip the newline
- ;; characters.
- (string-delete #\newline (string-join strings)))))))
+e.g. \"google.golang.org/protobuf/proto\"."
+ (let* ((body (go.pkg.dev-info name))
+ (sxml (html->sxml body))
+ (overview ((sxpath
+ `(//
+ (* (@ (equal? (class "Documentation-overview"))))
+ (p 1))) sxml))
+ ;; Sometimes, the first paragraph just contains images/links that
+ ;; has only "\n" for text. The following filter is designed to
+ ;; omit it.
+ (contains-text? (lambda (node)
+ (remove string-null?
+ (map string-trim-both
+ (filter (node-typeof? '*text*)
+ (cdr node))))))
+ (select-content (sxpath
+ `(//
+ (* (@ (equal? (class "UnitReadme-content"))))
+ div // p ,(xfilter contains-text?))))
+ ;; Fall-back to use content; this is less desirable as it is more
+ ;; verbose, but not every page has an overview.
+ (description (if (not (null? overview))
+ overview
+ (select-content sxml)))
+ (description* (and (not (null? description))
+ (first description))))
+ (match description*
+ (() #f) ;nothing selected
+ ((p elements ...)
+ (apply string-append (filter string? (map sxml->texi elements)))))))
(define (go-package-synopsis module-name)
"Retrieve a short synopsis for a Go module named MODULE-NAME,
@@ -154,17 +189,17 @@ e.g. \"google.golang.org/protobuf\". The data is scraped from
the https://pkg.go.dev/ web site."
;; Note: Only the *module* (rather than package) page has the README title
;; used as a synopsis on the https://pkg.go.dev web site.
- (let*-values (((response body) (go.pkg.dev-info* module-name))
- ;; Extract the text contained in a h2 child node of any
- ;; element marked with a "License" class attribute.
- ((select) (sxpath
- `(// (div (@ (equal? (class "UnitReadme-content"))))
- // h3 *text*))))
- (and (eq? (response-code response) 200)
- (match (select (html->sxml body))
- (() #f) ;nothing selected
- ((title more ...) ;title is the first string of the list
- (string-trim-both title))))))
+ (let* ((url (string-append "https://pkg.go.dev/" module-name))
+ (body (http-fetch* url))
+ ;; Extract the text contained in a h2 child node of any
+ ;; element marked with a "License" class attribute.
+ (select-title (sxpath
+ `(// (div (@ (equal? (class "UnitReadme-content"))))
+ // h3 *text*))))
+ (match (select-title (html->sxml body))
+ (() #f) ;nothing selected
+ ((title more ...) ;title is the first string of the list
+ (string-trim-both title)))))
(define (list->licenses licenses)
"Given a list of LICENSES mostly following the SPDX conventions, return the
@@ -189,13 +224,13 @@ corresponding Guix license or 'unknown-license!"
'unknown-license!)))
licenses))
-(define (fetch-go.mod goproxy-url module-path version)
- "Fetches go.mod from the given GOPROXY-URL server for the given MODULE-PATH
-and VERSION."
- (let ((url (format #f "~a/~a/@v/~a.mod" goproxy-url
+(define (fetch-go.mod goproxy module-path version)
+ "Fetch go.mod from the given GOPROXY server for the given MODULE-PATH
+and VERSION and return an input port."
+ (let ((url (format #f "~a/~a/@v/~a.mod" goproxy
(go-path-escape module-path)
(go-path-escape version))))
- (http-fetch url)))
+ (http-fetch* url)))
(define %go.mod-require-directive-rx
;; A line in a require directive is composed of a module path and
@@ -216,9 +251,8 @@ and VERSION."
"[[:blank:]]+" "=>" "[[:blank:]]+"
"([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?")))
-(define (parse-go.mod port)
- "Parse the go.mod file accessible via the input PORT, returning a list of
-requirements."
+(define (parse-go.mod content)
+ "Parse the go.mod file CONTENT, returning a list of requirements."
(define-record-type <results>
(make-results requirements replacements)
results?
@@ -229,7 +263,7 @@ requirements."
(define (toplevel results)
"Main parser, RESULTS is a pair of alist serving as accumulator for
all encountered requirements and replacements."
- (let ((line (read-line port)))
+ (let ((line (read-line)))
(cond
((eof-object? line)
;; parsing ended, give back the result
@@ -255,7 +289,7 @@ requirements."
(toplevel results)))))
(define (in-require results)
- (let ((line (read-line port)))
+ (let ((line (read-line)))
(cond
((eof-object? line)
;; this should never happen here but we ignore silently
@@ -267,7 +301,7 @@ requirements."
(in-require (require-directive results line))))))
(define (in-replace results)
- (let ((line (read-line port)))
+ (let ((line (read-line)))
(cond
((eof-object? line)
;; this should never happen here but we ignore silently
@@ -306,7 +340,9 @@ requirements."
(($ <results> requirements replaced)
(make-results (alist-cons module-path version requirements) replaced)))))
- (let ((results (toplevel (make-results '() '()))))
+ (let ((results (with-input-from-string content
+ (lambda _
+ (toplevel (make-results '() '()))))))
(match results
(($ <results> requirements replaced)
;; At last we remove replaced modules from the requirements list
@@ -325,8 +361,10 @@ requirements."
(url-prefix vcs-url-prefix)
(root-regex vcs-root-regex)
(type vcs-type))
+
(define (make-vcs prefix regexp type)
- (%make-vcs prefix (make-regexp regexp) type))
+ (%make-vcs prefix (make-regexp regexp) type))
+
(define known-vcs
;; See the following URL for the official Go equivalent:
;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087
@@ -387,6 +425,14 @@ hence the need to derive this information."
"/" "-")
"_" "-"))))
+(define (strip-.git-suffix/maybe repo-url)
+ "Strip a repository URL '.git' suffix from REPO-URL if hosted at GitHub."
+ (match repo-url
+ ((and (? (cut string-prefix? "https://github.com" <>))
+ (? (cut string-suffix? ".git" <>)))
+ (string-drop-right repo-url 4))
+ (_ repo-url)))
+
(define-record-type <module-meta>
(make-module-meta import-prefix vcs repo-root)
module-meta?
@@ -399,21 +445,22 @@ hence the need to derive this information."
because goproxy servers don't currently provide all the information needed to
build a package."
;; <meta name="go-import" content="import-prefix vcs repo-root">
- (let* ((port (http-fetch (format #f "https://~a?go-get=1" module-path)))
+ (let* ((meta-data (http-fetch* (format #f "https://~a?go-get=1" module-path)))
(select (sxpath `(// head (meta (@ (equal? (name "go-import"))))
// content))))
- (match (select (call-with-port port html->sxml))
- (() #f) ;nothing selected
+ (match (select (html->sxml meta-data))
+ (() #f) ;nothing selected
(((content content-text))
(match (string-split content-text #\space)
((root-path vcs repo-url)
- (make-module-meta root-path (string->symbol vcs) repo-url)))))))
+ (make-module-meta root-path (string->symbol vcs)
+ (strip-.git-suffix/maybe repo-url))))))))
-(define (module-meta-data-repo-url meta-data goproxy-url)
+(define (module-meta-data-repo-url meta-data goproxy)
"Return the URL where the fetcher which will be used can download the
source."
(if (member (module-meta-vcs meta-data) '(fossil mod))
- goproxy-url
+ goproxy
(module-meta-repo-root meta-data)))
;; XXX: Copied from (guix scripts hash).
@@ -466,6 +513,9 @@ control system is being used."
(method git-fetch)
(uri (git-reference
(url ,vcs-repo-url)
+ ;; This is done because the version field of the package,
+ ;; which the generated quoted expression refers to, has been
+ ;; stripped of any 'v' prefixed.
(commit ,(if (and plain-version? v-prefixed?)
'(string-append "v" version)
'(go-version->git-ref version)))))
@@ -505,8 +555,8 @@ control system is being used."
(define* (go-module->guix-package module-path #:key
(goproxy-url "https://proxy.golang.org"))
(let* ((latest-version (go-module-latest-version goproxy-url module-path))
- (port (fetch-go.mod goproxy-url module-path latest-version))
- (dependencies (map car (call-with-port port parse-go.mod)))
+ (content (fetch-go.mod goproxy-url module-path latest-version))
+ (dependencies (map car (parse-go.mod content)))
(guix-name (go-module->guix-package-name module-path))
(root-module-path (module-path->repository-root module-path))
;; The VCS type and URL are not included in goproxy information. For
@@ -527,14 +577,17 @@ control system is being used."
(build-system go-build-system)
(arguments
'(#:import-path ,root-module-path))
- ,@(maybe-inputs (map go-module->guix-package-name dependencies))
+ ,@(maybe-propagated-inputs
+ (map go-module->guix-package-name dependencies))
(home-page ,(format #f "https://~a" root-module-path))
(synopsis ,synopsis)
- (description ,description)
- (license ,(match (and=> licenses list->licenses)
- ((license) license)
- ((licenses ...) `(list ,@licenses))
- (x x))))
+ (description ,(and=> description beautify-description))
+ (license ,(match (list->licenses licenses)
+ (() #f) ;unknown license
+ ((license) ;a single license
+ license)
+ ((license ...) ;a list of licenses
+ `(list ,@license)))))
dependencies)))
(define go-module->guix-package* (memoize go-module->guix-package))
diff --git a/guix/import/utils.scm b/guix/import/utils.scm
index c2db5a323b..adf90f84d7 100644
--- a/guix/import/utils.scm
+++ b/guix/import/utils.scm
@@ -446,8 +446,8 @@ obtain a node's uniquely identifying \"key\"."
"Return a list of package expressions for PACKAGE-NAME and all its
dependencies, sorted in topological order. For each package,
call (REPO->GUIX-PACKAGE NAME :KEYS version repo), which should return a
-package expression and a list of dependencies; call (GUIX-NAME NAME) to
-obtain the Guix package name corresponding to the upstream name."
+package expression and a list of dependencies; call (GUIX-NAME PACKAGE-NAME)
+to obtain the Guix package name corresponding to the upstream name."
(define-record-type <node>
(make-node name version package dependencies)
node?