Commit af469764-b964-499d-acb5-684ef3b00274/upstream/master - archmage

+1

-1

AUTHORS less more

0	0	Copyright (c) 2003 Eugeny Korekin <az@ftc.ru>
1	1	Copyright (c) 2005-2009 Basil Shubin <basil.shubin@gmail.com>
2		Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	2	Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>

+7

-3

PKG-INFO less more

0		Metadata-Version: 1.2
	0	Metadata-Version: 2.1
1	1	Name: archmage
2	2	Version: 0.4.2.1
3	3	Summary: CHM decompressor
4	4	Home-page: https://github.com/dottedmag/archmage
5		Maintainer: Mikhail Gusarov
	5	Maintainer: Misha Gusarov
6	6	Maintainer-email: dottedmag@dottedmag.net
7	7	License: GPLv2+
8		Description: arCHMage is a reader and decompressor for CHM format
9	8	Keywords: chm,HTML Help,Compiled HTML,Compressed HTML
10	9	Platform: UNKNOWN
11	10	Classifier: Development Status :: 5 - Production/Stable

14	13	Classifier: Intended Audience :: End Users/Desktop
15	14	Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)
16	15	Classifier: Topic :: Text Processing :: Filters
	16	License-File: COPYING
	17	License-File: AUTHORS
	18
	19	arCHMage is a reader and decompressor for CHM format
	20

+3

-1

README.md less more

32	32	Installation
33	33	============
34	34
	35	Archmage uses PyCHM that depends on (C library) CHMlib. After CHMlib is installed, do
	36
35	37	pip install archmage
36	38
37	39	Requirements

39	41
40	42	arCHMage has the following dependencies:
41	43
42		* Python 3.5+
	44	* Python 3.6+
43	45	* PyCHM
44	46	* BeautifulSoup4
45	47

+23

-20

archmage/CHM.py less more

2	2	# archmage -- CHM decompressor
3	3	# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4	4	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	5	# Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
6	6	#
7	7	# This program is free software; you can redistribute it and/or modify it under
8	8	# the terms of the GNU General Public License as published by the Free Software

28	28	import tempfile
29	29	import os.path
30	30	from enum import Enum
	31	from typing import List, Union
31	32
32	33	import archmage
33	34

35	36
36	37	# import PyCHM bindings
37	38	try:
38		from chm import chmlib
	39	from chm import chmlib # type: ignore
39	40	except ImportError as msg:
40	41	sys.exit(
41	42	"ImportError: %s\nPlease check README file for system requirements."

69	70	out.append(path)
70	71	return chmlib.CHM_ENUMERATOR_CONTINUE
71	72
72		out = []
	73	out: List[str] = []
73	74	if (
74	75	chmlib.chm_enumerate(
75	76	self._chm, chmlib.CHM_ENUMERATE_ALL, get_name, out

122	123	self.cache = {}
123	124	# Name of source directory with CHM content
124	125	if os.path.isdir(name):
125		self.source = DirSource(name)
	126	self.source: Union[DirSource, FileSource] = DirSource(name)
126	127	else:
127	128	self.source = FileSource(name)
128	129	self.sourcename = name

176	177	return self.cache["image_urls"]
177	178
178	179	def _image_urls(self):
179		out = []
	180	out: List[str] = []
180	181	image_catcher = ImageCatcher()
181	182	for file in self.html_files():
	183	# Use latin-1, as it will accept any byte sequences
182	184	image_catcher.feed(
183	185	Entry(
184	186	self.source, file, self.filename_case, self.restore_framing
185		).correct()
	187	).correct().decode("latin-1")
186	188	)
187	189	for image_url in image_catcher.imgurls:
188	190	if not out.count(image_url):

272	274
273	275	def _toclevels(self):
274	276	counter = TOCCounter()
275		counter.feed(self.topicstree)
	277	# Use latin-1, as it will accept any byte sequences
	278	counter.feed(self.topicstree.decode("latin-1"))
276	279	if counter.count > self.maxtoclvl:
277	280	return self.maxtoclvl
278	281	else:

431	434	self.extract_entry(
432	435	entry=key, output_file=key.lower(), destdir=tempdir
433	436	)
434		htmldoc(files, self.htmldoc_exec, options, self.toclevels, output)
	437	htmldoc(files, self.htmldoc_exec, options, self.toclevels(), output)
435	438	# Remove temporary files
436	439	shutil.rmtree(path=tempdir)
437	440

492	495	data = self.lower_links(data)
493	496
494	497	# Delete unwanted HTML elements.
495		data = re.sub("<div .teamlib\\.gif.\\/div>", "", data)
496		data = re.sub("<a href.*>\\[ Team LiB \\]<\\/a>", "", data)
	498	data = re.sub(b"<div .teamlib\\.gif.\\/div>", b"", data)
	499	data = re.sub(b"<a href.*>\\[ Team LiB \\]<\\/a>", b"", data)
497	500	data = re.sub(
498		"<table.larrow\\.gif.rarrow\\.gif.*<\\/table>", "", data
499		)
500		data = re.sub("<a href.next\\.gif[^>]><\\/a>", "", data)
501		data = re.sub("<a href.previous\\.gif[^>]><\\/a>", "", data)
502		data = re.sub("<a href.prev\\.gif[^>]><\\/a>", "", data)
503		data = re.sub('"[^"]*previous\\.gif"', '""', data)
504		data = re.sub('"[^"]*prev\\.gif"', '""', data)
505		data = re.sub('"[^"]*next\\.gif"', '""', data)
	501	b"<table.larrow\\.gif.rarrow\\.gif.*<\\/table>", b"", data
	502	)
	503	data = re.sub(b"<a href.next\\.gif[^>]><\\/a>", b"", data)
	504	data = re.sub(b"<a href.previous\\.gif[^>]><\\/a>", b"", data)
	505	data = re.sub(b"<a href.prev\\.gif[^>]><\\/a>", b"", data)
	506	data = re.sub(b'"[^"]*previous\\.gif"', b'""', data)
	507	data = re.sub(b'"[^"]*prev\\.gif"', b'""', data)
	508	data = re.sub(b'"[^"]*next\\.gif"', b'""', data)
506	509	if data is not None:
507	510	return data
508	511	else:
509		return ""
	512	return b""
510	513
511	514	def get(self):
512	515	"""Get CHM entry content"""

523	526	if data is not None:
524	527	return data
525	528	else:
526		return ""
	529	return b""

+5

-4

archmage/CHMParser.py less more

1	1	#
2	2	# archmage -- CHM decompressor
3	3	# Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
4		# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	4	# Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
5	5	#
6	6	# This program is free software; you can redistribute it and/or modify it under
7	7	# the terms of the GNU General Public License as published by the Free Software

20	20
21	21	import re
22	22	import mimetypes
23		import sgmllib, urllib.request, urllib.error, urllib.parse
24
25		from bs4 import BeautifulSoup, UnicodeDammit
	23	import sgmllib # type: ignore
	24	import urllib.request, urllib.error, urllib.parse
	25
	26	from bs4 import BeautifulSoup, UnicodeDammit # type: ignore
26	27	from html.parser import HTMLParser
27	28	from urllib.parse import urlparse
28	29

+1

-1

archmage/__init__.py less more

2	2	# archmage -- CHM decompressor
3	3	# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4	4	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	5	# Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
6	6	#
7	7	# This program is free software; you can redistribute it and/or modify it under
8	8	# the terms of the GNU General Public License as published by the Free Software

+1

-1

archmage/arch.conf less more

55	55
56	56	# CHM2PDF converting. Use following command to convert CHM content to a single
57	57	# PDF file. Make sure that htmldoc is available on your system.
58		chmtopdf = '-t pdf14 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet'
	58	chmtopdf = '-t pdf14 -f "%(output)s" --webpage %(toc)s --no-title --no-numbered --toctitle "Table of Contents" --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet'
59	59
60	60	# Maximum Table of Content levels for htmldoc utility.
61	61	#

+1

-1

archmage/cli.py less more

2	2	# archmage -- CHM decompressor
3	3	# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4	4	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	5	# Copyright (c) 2015-2020 Misha Gusarov <dottedmag@dottedmag.net>
6	6	#
7	7	# This program is free software; you can redistribute it and/or modify it under
8	8	# the terms of the GNU General Public License as published by the Free Software

+4

-5

archmage/htmldoc.py less more

20	20	"""Generic converter function"""
21	21
22	22	import os
23		import string
24	23	import tempfile
25	24	import subprocess
26	25

41	40	options = options % {"output": output, "toc": toc}
42	41	if input:
43	42	# Create a htmldoc file for batch processing
44		f = tempfile.NamedTemporaryFile(delete=False)
45		f.write("#HTMLDOC 1.8.27\n")
46		f.write(options + "\n")
47		f.write(string.join(input, "\n"))
	43	f = tempfile.NamedTemporaryFile(mode="wb", delete=False)
	44	f.write(b"#HTMLDOC 1.8.27\n")
	45	f.write(options.encode("utf-8") + b"\n")
	46	f.write(b'\n'.join(f.encode('utf-8') for f in input))
48	47	f.close()
49	48	# Prepare command line to execute
50	49	command = "%s --batch %s" % (cmd, f.name)

+7

-3

archmage.egg-info/PKG-INFO less more

0		Metadata-Version: 1.2
	0	Metadata-Version: 2.1
1	1	Name: archmage
2	2	Version: 0.4.2.1
3	3	Summary: CHM decompressor
4	4	Home-page: https://github.com/dottedmag/archmage
5		Maintainer: Mikhail Gusarov
	5	Maintainer: Misha Gusarov
6	6	Maintainer-email: dottedmag@dottedmag.net
7	7	License: GPLv2+
8		Description: arCHMage is a reader and decompressor for CHM format
9	8	Keywords: chm,HTML Help,Compiled HTML,Compressed HTML
10	9	Platform: UNKNOWN
11	10	Classifier: Development Status :: 5 - Production/Stable

14	13	Classifier: Intended Audience :: End Users/Desktop
15	14	Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)
16	15	Classifier: Topic :: Text Processing :: Filters
	16	License-File: COPYING
	17	License-File: AUTHORS
	18
	19	arCHMage is a reader and decompressor for CHM format
	20

+1

-0

archmage.egg-info/SOURCES.txt less more

3	3	NEWS
4	4	README.md
5	5	archmage.1
	6	pyproject.toml
6	7	setup.py
7	8	archmage/CHM.py
8	9	archmage/CHMParser.py

+1

-1

archmage.egg-info/requires.txt less more

	0	beautifulsoup4
0	1	pychm
1		beautifulsoup4
2	2	sgmllib3k

+2

-0

pyproject.toml less more

	0	[tool.black]
	1	line-length = 80

+1

-1

setup.py less more

17	17	name="archmage",
18	18	version="0.4.2.1",
19	19	description="CHM decompressor",
20		maintainer="Mikhail Gusarov",
	20	maintainer="Misha Gusarov",
21	21	maintainer_email="dottedmag@dottedmag.net",
22	22	url="https://github.com/dottedmag/archmage",
23	23	license="GPLv2+",