Codebase list archmage / af469764-b964-499d-acb5-684ef3b00274/upstream/master
Import upstream version 0.4.2.1+git20210913.1.a8f632d Debian Janitor 2 years ago
14 changed file(s) with 58 addition(s) and 42 deletion(s). Raw diff Collapse all Expand all
00 Copyright (c) 2003 Eugeny Korekin <az@ftc.ru>
11 Copyright (c) 2005-2009 Basil Shubin <basil.shubin@gmail.com>
2 Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
2 Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
0 Metadata-Version: 1.2
0 Metadata-Version: 2.1
11 Name: archmage
22 Version: 0.4.2.1
33 Summary: CHM decompressor
44 Home-page: https://github.com/dottedmag/archmage
5 Maintainer: Mikhail Gusarov
5 Maintainer: Misha Gusarov
66 Maintainer-email: dottedmag@dottedmag.net
77 License: GPLv2+
8 Description: arCHMage is a reader and decompressor for CHM format
98 Keywords: chm,HTML Help,Compiled HTML,Compressed HTML
109 Platform: UNKNOWN
1110 Classifier: Development Status :: 5 - Production/Stable
1413 Classifier: Intended Audience :: End Users/Desktop
1514 Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)
1615 Classifier: Topic :: Text Processing :: Filters
16 License-File: COPYING
17 License-File: AUTHORS
18
19 arCHMage is a reader and decompressor for CHM format
20
3232 Installation
3333 ============
3434
35 Archmage uses PyCHM that depends on (C library) CHMlib. After CHMlib is installed, do
36
3537 pip install archmage
3638
3739 Requirements
3941
4042 arCHMage has the following dependencies:
4143
42 * Python 3.5+
44 * Python 3.6+
4345 * PyCHM
4446 * BeautifulSoup4
4547
22 # archmage -- CHM decompressor
33 # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
44 # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5 # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
5 # Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
66 #
77 # This program is free software; you can redistribute it and/or modify it under
88 # the terms of the GNU General Public License as published by the Free Software
2828 import tempfile
2929 import os.path
3030 from enum import Enum
31 from typing import List, Union
3132
3233 import archmage
3334
3536
3637 # import PyCHM bindings
3738 try:
38 from chm import chmlib
39 from chm import chmlib # type: ignore
3940 except ImportError as msg:
4041 sys.exit(
4142 "ImportError: %s\nPlease check README file for system requirements."
6970 out.append(path)
7071 return chmlib.CHM_ENUMERATOR_CONTINUE
7172
72 out = []
73 out: List[str] = []
7374 if (
7475 chmlib.chm_enumerate(
7576 self._chm, chmlib.CHM_ENUMERATE_ALL, get_name, out
122123 self.cache = {}
123124 # Name of source directory with CHM content
124125 if os.path.isdir(name):
125 self.source = DirSource(name)
126 self.source: Union[DirSource, FileSource] = DirSource(name)
126127 else:
127128 self.source = FileSource(name)
128129 self.sourcename = name
176177 return self.cache["image_urls"]
177178
178179 def _image_urls(self):
179 out = []
180 out: List[str] = []
180181 image_catcher = ImageCatcher()
181182 for file in self.html_files():
183 # Use latin-1, as it will accept any byte sequences
182184 image_catcher.feed(
183185 Entry(
184186 self.source, file, self.filename_case, self.restore_framing
185 ).correct()
187 ).correct().decode("latin-1")
186188 )
187189 for image_url in image_catcher.imgurls:
188190 if not out.count(image_url):
272274
273275 def _toclevels(self):
274276 counter = TOCCounter()
275 counter.feed(self.topicstree)
277 # Use latin-1, as it will accept any byte sequences
278 counter.feed(self.topicstree.decode("latin-1"))
276279 if counter.count > self.maxtoclvl:
277280 return self.maxtoclvl
278281 else:
431434 self.extract_entry(
432435 entry=key, output_file=key.lower(), destdir=tempdir
433436 )
434 htmldoc(files, self.htmldoc_exec, options, self.toclevels, output)
437 htmldoc(files, self.htmldoc_exec, options, self.toclevels(), output)
435438 # Remove temporary files
436439 shutil.rmtree(path=tempdir)
437440
492495 data = self.lower_links(data)
493496
494497 # Delete unwanted HTML elements.
495 data = re.sub("<div .*teamlib\\.gif.*\\/div>", "", data)
496 data = re.sub("<a href.*>\\[ Team LiB \\]<\\/a>", "", data)
498 data = re.sub(b"<div .*teamlib\\.gif.*\\/div>", b"", data)
499 data = re.sub(b"<a href.*>\\[ Team LiB \\]<\\/a>", b"", data)
497500 data = re.sub(
498 "<table.*larrow\\.gif.*rarrow\\.gif.*<\\/table>", "", data
499 )
500 data = re.sub("<a href.*next\\.gif[^>]*><\\/a>", "", data)
501 data = re.sub("<a href.*previous\\.gif[^>]*><\\/a>", "", data)
502 data = re.sub("<a href.*prev\\.gif[^>]*><\\/a>", "", data)
503 data = re.sub('"[^"]*previous\\.gif"', '""', data)
504 data = re.sub('"[^"]*prev\\.gif"', '""', data)
505 data = re.sub('"[^"]*next\\.gif"', '""', data)
501 b"<table.*larrow\\.gif.*rarrow\\.gif.*<\\/table>", b"", data
502 )
503 data = re.sub(b"<a href.*next\\.gif[^>]*><\\/a>", b"", data)
504 data = re.sub(b"<a href.*previous\\.gif[^>]*><\\/a>", b"", data)
505 data = re.sub(b"<a href.*prev\\.gif[^>]*><\\/a>", b"", data)
506 data = re.sub(b'"[^"]*previous\\.gif"', b'""', data)
507 data = re.sub(b'"[^"]*prev\\.gif"', b'""', data)
508 data = re.sub(b'"[^"]*next\\.gif"', b'""', data)
506509 if data is not None:
507510 return data
508511 else:
509 return ""
512 return b""
510513
511514 def get(self):
512515 """Get CHM entry content"""
523526 if data is not None:
524527 return data
525528 else:
526 return ""
529 return b""
11 #
22 # archmage -- CHM decompressor
33 # Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
4 # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
4 # Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
55 #
66 # This program is free software; you can redistribute it and/or modify it under
77 # the terms of the GNU General Public License as published by the Free Software
2020
2121 import re
2222 import mimetypes
23 import sgmllib, urllib.request, urllib.error, urllib.parse
24
25 from bs4 import BeautifulSoup, UnicodeDammit
23 import sgmllib # type: ignore
24 import urllib.request, urllib.error, urllib.parse
25
26 from bs4 import BeautifulSoup, UnicodeDammit # type: ignore
2627 from html.parser import HTMLParser
2728 from urllib.parse import urlparse
2829
22 # archmage -- CHM decompressor
33 # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
44 # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5 # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
5 # Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
66 #
77 # This program is free software; you can redistribute it and/or modify it under
88 # the terms of the GNU General Public License as published by the Free Software
5555
5656 # CHM2PDF converting. Use following command to convert CHM content to a single
5757 # PDF file. Make sure that htmldoc is available on your system.
58 chmtopdf = '-t pdf14 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet'
58 chmtopdf = '-t pdf14 -f "%(output)s" --webpage %(toc)s --no-title --no-numbered --toctitle "Table of Contents" --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet'
5959
6060 # Maximum Table of Content levels for htmldoc utility.
6161 #
22 # archmage -- CHM decompressor
33 # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
44 # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5 # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
5 # Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
66 #
77 # This program is free software; you can redistribute it and/or modify it under
88 # the terms of the GNU General Public License as published by the Free Software
2020 """Generic converter function"""
2121
2222 import os
23 import string
2423 import tempfile
2524 import subprocess
2625
4140 options = options % {"output": output, "toc": toc}
4241 if input:
4342 # Create a htmldoc file for batch processing
44 f = tempfile.NamedTemporaryFile(delete=False)
45 f.write("#HTMLDOC 1.8.27\n")
46 f.write(options + "\n")
47 f.write(string.join(input, "\n"))
43 f = tempfile.NamedTemporaryFile(mode="wb", delete=False)
44 f.write(b"#HTMLDOC 1.8.27\n")
45 f.write(options.encode("utf-8") + b"\n")
46 f.write(b'\n'.join(f.encode('utf-8') for f in input))
4847 f.close()
4948 # Prepare command line to execute
5049 command = "%s --batch %s" % (cmd, f.name)
0 Metadata-Version: 1.2
0 Metadata-Version: 2.1
11 Name: archmage
22 Version: 0.4.2.1
33 Summary: CHM decompressor
44 Home-page: https://github.com/dottedmag/archmage
5 Maintainer: Mikhail Gusarov
5 Maintainer: Misha Gusarov
66 Maintainer-email: dottedmag@dottedmag.net
77 License: GPLv2+
8 Description: arCHMage is a reader and decompressor for CHM format
98 Keywords: chm,HTML Help,Compiled HTML,Compressed HTML
109 Platform: UNKNOWN
1110 Classifier: Development Status :: 5 - Production/Stable
1413 Classifier: Intended Audience :: End Users/Desktop
1514 Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)
1615 Classifier: Topic :: Text Processing :: Filters
16 License-File: COPYING
17 License-File: AUTHORS
18
19 arCHMage is a reader and decompressor for CHM format
20
33 NEWS
44 README.md
55 archmage.1
6 pyproject.toml
67 setup.py
78 archmage/CHM.py
89 archmage/CHMParser.py
0 beautifulsoup4
01 pychm
1 beautifulsoup4
22 sgmllib3k
0 [tool.black]
1 line-length = 80
1717 name="archmage",
1818 version="0.4.2.1",
1919 description="CHM decompressor",
20 maintainer="Mikhail Gusarov",
20 maintainer="Misha Gusarov",
2121 maintainer_email="dottedmag@dottedmag.net",
2222 url="https://github.com/dottedmag/archmage",
2323 license="GPLv2+",