From d0abaf8960ef0fb8a85f43c399a7ca9281c56142 Mon Sep 17 00:00:00 2001 From: Hartmut Goebel Date: Mon, 15 May 2017 11:41:54 +0200 Subject: gnu: Add catdoc. * gnu/packages/textutils.scm (catdoc): New variable. --- gnu/packages/textutils.scm | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'gnu/packages/textutils.scm') diff --git a/gnu/packages/textutils.scm b/gnu/packages/textutils.scm index dbd71c2e8f..30e2116688 100644 --- a/gnu/packages/textutils.scm +++ b/gnu/packages/textutils.scm @@ -10,6 +10,7 @@ ;;; Copyright © 2016 Marius Bakke ;;; Copyright © 2017 Eric Bavier ;;; Copyright © 2017 Rene Saavedra +;;; Copyright © 2017 Hartmut Goebel ;;; ;;; This file is part of GNU Guix. ;;; @@ -368,6 +369,45 @@ (define-public antiword runs Word\".") (license license:gpl2+))) +(define-public catdoc + (package + (name "catdoc") + (version "0.95") + (source (origin + (method url-fetch) + (uri (string-append "http://ftp.wagner.pp.ru/pub/catdoc/" + "catdoc-" version ".tar.gz")) + (sha256 + (base32 + "15h7v3bmwfk4z8r78xs5ih6vd0pskn0rj90xghvbzdjj0cc88jji")))) + (build-system gnu-build-system) + ;; TODO: Also build `wordview` which requires `tk` – make a separate + ;; package for this. + (arguments + '(#:tests? #f ; There are no tests + #:configure-flags '("--disable-wordview") + #:phases + (modify-phases %standard-phases + (add-before 'install 'fix-install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (mkdir-p (string-append out "/share/man/man1")))))))) + (home-page "http://www.wagner.pp.ru/~vitus/software/catdoc/") + (synopsis "MS-Word to TeX or plain text converter") + (description "@command{catdoc} extracts text from MS-Word files, trying to +preserve as many special printable characters as possible. It supports +everything up to Word-97. Also supported are MS Write documents and RTF files. + +@command{catdoc} does not preserve complex word formatting, but it can +translate some non-ASCII characters into TeX escape codes. It's goal is to +extract plain text and allow you to read it and, probably, reformat with TeX, +according to TeXnical rules. + +This package also provides @command{xls2csv}, which extracts data from Excel +spreadsheets and outputs it in comma-separated-value format, and +@command{catppt}, which extracts data from PowerPoint presentations.") + (license license:gpl2+))) + (define-public utfcpp (package (name "utfcpp") -- cgit v1.2.3