summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulien Lepiller <julien@lepiller.eu>2022-07-04 21:42:02 +0200
committerMaxim Cournoyer <maxim.cournoyer@gmail.com>2023-04-14 15:12:10 -0400
commitc16251be058c221100a6acfd46e41929b613df9d (patch)
tree52652ad954f4c91e091b17cf329d677130ce07fe
parent04688c56e1817fc036273f2bd13fa9fad4371d51 (diff)
gnu: Add mecab-unidic.
* gnu/packages/language.scm (mecab-unidic): New variable.
-rw-r--r--gnu/packages/language.scm25
1 files changed, 25 insertions, 0 deletions
diff --git a/gnu/packages/language.scm b/gnu/packages/language.scm
index f643136873..208fab4f5b 100644
--- a/gnu/packages/language.scm
+++ b/gnu/packages/language.scm
@@ -1006,3 +1006,28 @@ Corporation. The engine is independent of any language, dictionary or corpus.")
(description "This package contains dictionnary data derived from
ipadic for use with MeCab.")
(license (license:non-copyleft "mecab-ipadic/COPYING"))))
+
+(define-public mecab-unidic
+ (package
+ (name "mecab-unidic")
+ (version "3.1.0")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://clrd.ninjal.ac.jp/unidic_archive/cwj/"
+ version "/unidic-cwj-" version ".zip"))
+ (sha256
+ (base32
+ "1z132p2q3bgchiw529j2d7dari21kn0fhkgrj3vcl0ncg2m521il"))))
+ (build-system copy-build-system)
+ (arguments
+ `(#:install-plan
+ '(("." "lib/mecab/dic"
+ #:include-regexp ("\\.bin$" "\\.def$" "\\.dic$" "dicrc")))))
+ (native-inputs (list unzip))
+ (home-page "https://clrd.ninjal.ac.jp/unidic/en/")
+ (synopsis "Dictionary data for MeCab")
+ (description "UniDic for morphological analysis is a dictionary for
+analysis with the morphological analyser MeCab, where the short units exported
+from the database are used as entries (heading terms).")
+ ;; triple-licensed (at the user’s choice)
+ (license (list license:gpl2+ license:lgpl2.1 license:bsd-3))))