Commit 64c98074ea8fbe3f43f5b64f42223443f0cff54b - archmage

+1

-1

AUTHORS less more

0	0	Copyright (c) 2003 Eugeny Korekin <az@ftc.ru>
1	1	Copyright (c) 2005-2009 Basil Shubin <basil.shubin@gmail.com>
2		Copyright (c) 2015 Mikhail Gusarov <dottedmag@dottedmag.net>
	2	Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>

+3

-4

MANIFEST.in less more

0		include archmod/arch.conf
1		recursive-include archmod/templates .html .css *.gif
2		include AUTHORS COPYING INSTALL NEWS README archmage.1
3		include RELEASE-VERSION version.py
	0	include archmage/arch.conf
	1	recursive-include archmage/templates .html .css *.gif
	2	include AUTHORS COPYING NEWS README.md archmage.1

+15

-0

NEWS less more

	0	arCHMage 0.4
	1	============
	2	Changes:
	3
	4	* Works with Python 3.5+ (#10).
	5
	6	Bugfixes:
	7
	8	* Fix HTML conversion under Windows (#6).
	9
	10	Removals:
	11
	12	* mod_chm and option -p were removed. Extract CHM files to the filesystem and
	13	use a real HTTP server to serve them.
	14
0	15	arCHMage 0.3.1
1	16	==============
2	17	Bug fixes:

+4

-4

PKG-INFO less more

0		Metadata-Version: 1.1
	0	Metadata-Version: 1.2
1	1	Name: archmage
2		Version: 0.3.1
	2	Version: 0.4.0
3	3	Summary: CHM decompressor
4	4	Home-page: https://github.com/dottedmag/archmage
5		Author: Mikhail Gusarov
6		Author-email: dottedmag@dottedmag.net
	5	Maintainer: Mikhail Gusarov
	6	Maintainer-email: dottedmag@dottedmag.net
7	7	License: GPLv2+
8	8	Description: arCHMage is a reader and decompressor for CHM format
9	9	Keywords: chm,HTML Help,Compiled HTML,Compressed HTML

+52

-0

README.md less more

	0	arCHMage
	1	========
	2
	3	arCHMage converts CHM files to HTML, plain text and PDF. CHM is the format used
	4	by Microsoft HTML Help, also known as Compiled HTML.
	5
	6	[![Latest Version](https://img.shields.io/pypi/v/archmage.svg)](https://pypi.python.org/pypi/archmage/)
	7	[![Downloads](https://img.shields.io/pypi/dm/archmage.svg)](https://pypi.python.org/pypi/archmage/)
	8	[![License](https://img.shields.io/github/license/dottedmag/archmage.svg)](https://pypi.python.org/pypi/archmage/)
	9
	10	Usage
	11	=====
	12
	13	Extract CHM content into directory
	14	----------------------------------
	15
	16	archmage -x <chmfile> [output directory]
	17
	18	Extraction does not overwrite existing directories.
	19
	20	Dump HTML data from CHM
	21	-----------------------
	22
	23	archmage -d <chmfile>
	24
	25	Convert CHM file into another format
	26	------------------------------------
	27
	28	archmage -c (html\|text\|pdf) <chmfile> [output file]
	29
	30	This feature requires `htmldoc(1)`, and `lynx(1)` or `elinks(1)` installed.
	31
	32	Installation
	33	============
	34
	35	pip install archmage
	36
	37	Requirements
	38	============
	39
	40	arCHMage has the following dependencies:
	41
	42	* Python 3.5+
	43	* PyCHM
	44	* BeautifulSoup4
	45
	46	Optional dependencies:
	47
	48	* htmldoc - converting to plain text, single HTML, PDF formats
	49	(Debian/Ubuntu: `htmldoc`)
	50	* Lynx or ELinks - converting to plain text
	51	(Debian/Ubuntu: `lynx`)

+0

-1

~~RELEASE-VERSION~~ less more

0

0.3.1

+393

-0

archmage/CHM.py less more

	0	# -- coding: utf-8 --
	1	#
	2	# archmage -- CHM decompressor
	3	# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
	4	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
	5	# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	6	#
	7	# This program is free software; you can redistribute it and/or modify it under
	8	# the terms of the GNU General Public License as published by the Free Software
	9	# Foundation; either version 2 of the License, or (at your option) any later
	10	# version.
	11	#
	12	# This program is distributed in the hope that it will be useful, but WITHOUT
	13	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	14	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	15	# details.
	16	#
	17	# You should have received a copy of the GNU General Public License along with
	18	# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
	19	# Street, Fifth Floor, Boston, MA 02110-1301, USA.
	20	#
	21
	22	import os
	23	import sys
	24	import re
	25	import shutil
	26	import errno
	27	import string
	28	import tempfile
	29	from enum import Enum, auto
	30
	31	import archmage
	32
	33	from archmage.CHMParser import SitemapFile, PageLister, ImageCatcher, TOCCounter#, HeadersCounter
	34
	35	# import PyCHM bindings
	36	try:
	37	from chm import chmlib
	38	except ImportError as msg:
	39	sys.exit('ImportError: %s\nPlease check README file for system requirements.' % msg)
	40
	41	# External file converters
	42	from archmage.chmtotext import chmtotext
	43	from archmage.htmldoc import htmldoc
	44
	45	class Action(Enum):
	46	EXTRACT = auto()
	47	DUMPHTML = auto()
	48	CHM2TXT = auto()
	49	CHM2HTML = auto()
	50	CHM2PDF = auto()
	51
	52	PARENT_RE = re.compile(r'(^\|/\|\\)\.\.(/\|\\\|$)')
	53
	54	class CHMFile:
	55	"""Class that represent CHM content from directory"""
	56
	57	def __init__(self, name):
	58	self.cache = {}
	59	# Name of source directory with CHM content
	60	self.sourcename = name
	61	self._chm = chmlib.chm_open(name)
	62	# Import variables from config file into namespace
	63	exec(compile(open(archmage.config, "rb").read(), archmage.config, 'exec'), self.__dict__)
	64
	65	# build regexp from the list of auxiliary files
	66	self.aux_re = '\|'.join([ re.escape(s) for s in self.auxes ])
	67
	68	# Get and parse 'Table of Contents'
	69	try:
	70	self.topicstree = self.topics()
	71	except AttributeError:
	72	self.topicstree = None
	73	self.contents = SitemapFile(self.topicstree).parse()
	74
	75	def close(self):
	76	chmlib.chm_close(self._chm)
	77
	78	def entries(self):
	79	if 'entries' not in self.cache:
	80	self.cache['entries'] = self._entries()
	81	return self.cache['entries']
	82
	83	def _entries(self):
	84	def get_name(chmfile, ui, out):
	85	path = ui.path.decode('utf-8')
	86	if path != '/':
	87	out.append(path)
	88	return chmlib.CHM_ENUMERATOR_CONTINUE
	89
	90	out = []
	91	if chmlib.chm_enumerate(self._chm, chmlib.CHM_ENUMERATE_ALL, get_name, out) == 0:
	92	sys.exit('UnknownError: CHMLIB or PyCHM bug?')
	93	return out
	94
	95	# retrieves the list of HTML files contained into the CHM file, in order
	96	# (that's the important bit).
	97	# (actually performed by the PageLister class)
	98	def html_files(self):
	99	if 'html_files' not in self.cache:
	100	self.cache['html_files'] = self._html_files()
	101	return self.cache['html_files']
	102
	103	def _html_files(self):
	104	lister = PageLister()
	105	lister.feed(self.topicstree)
	106	return lister.pages
	107
	108	# retrieves the list of images urls contained into the CHM file.
	109	# (actually performed by the ImageCatcher class)
	110	def image_urls(self):
	111	if 'image_urls' not in self.cache:
	112	self.cache['image_urls'] = self._image_urls()
	113	return self.cache['image_urls']
	114
	115	def _image_urls(self):
	116	out = []
	117	image_catcher = ImageCatcher()
	118	for file in self.html_files():
	119	image_catcher.feed(CHMEntry(self, file).correct())
	120	for image_url in image_catcher.imgurls:
	121	if not out.count(image_url):
	122	out.append(image_url)
	123	return out
	124
	125	# retrieves a dictionary of actual file entries and corresponding urls into the CHM file
	126	def image_files(self):
	127	if 'image_files' not in self.cache:
	128	self.cache['image_files'] = self._image_files()
	129	return self.cache['image_files']
	130
	131	def _image_files(self):
	132	out = {}
	133	for image_url in self.image_urls():
	134	for entry in self.entries():
	135	if re.search(image_url, entry.lower()) and entry.lower() not in out:
	136	out.update({entry : image_url})
	137	return out
	138
	139	# Get topics file
	140	def topics(self):
	141	if 'topics' not in self.cache:
	142	self.cache['topics'] = self._topics()
	143	return self.cache['topics']
	144
	145	def _topics(self):
	146	for e in self.entries():
	147	if e.lower().endswith('.hhc'):
	148	return CHMEntry(self, e, frontpage=self.frontpage()).get()
	149
	150	# use first page as deftopic. Note: without heading slash
	151	def deftopic(self):
	152	if 'deftopic' not in self.cache:
	153	self.cache['deftopic'] = self._deftopic()
	154	return self.cache['deftopic']
	155
	156	def _deftopic(self):
	157	if self.html_files()[0].startswith('/'):
	158	return self.html_files()[0].replace('/', '', 1).lower()
	159	return self.html_files()[0].lower()
	160
	161	# Get frontpage name
	162	def frontpage(self):
	163	if 'frontpage' not in self.cache:
	164	self.cache['frontpage'] = self._frontpage()
	165	return self.cache['frontpage']
	166
	167	def _frontpage(self):
	168	frontpage = os.path.join('/', 'index.html')
	169	index = 2 # index2.html and etc.
	170	for filename in self.entries():
	171	if frontpage == filename:
	172	frontpage = os.path.join('/', ('index%s.html' % index))
	173	index += 1
	174	return frontpage
	175
	176	# Get all templates files
	177	def templates(self):
	178	if 'templates' not in self.cache:
	179	self.cache['templates'] = self._templates()
	180	return self.cache['templates']
	181
	182	def _templates(self):
	183	out = []
	184	for file in os.listdir(self.templates_dir):
	185	if os.path.isfile(os.path.join(self.templates_dir, file)):
	186	if os.path.join('/', file) not in self.entries():
	187	out.append(os.path.join('/', file))
	188	return out
	189
	190	# Get ToC levels
	191	def toclevels(self):
	192	if 'toclevels' not in self.cache:
	193	self.cache['toclevels'] = self._toclevels()
	194	return self.cache['toclevels']
	195
	196	def _toclevels(self):
	197	counter = TOCCounter()
	198	counter.feed(self.topicstree)
	199	if counter.count > self.maxtoclvl:
	200	return self.maxtoclvl
	201	else:
	202	return counter.count
	203
	204	def get_template(self, name):
	205	"""Get template file by its name"""
	206	if name == self.frontpage():
	207	tpl = open(os.path.join(self.templates_dir, 'index.html')).read()
	208	else:
	209	tpl = open(os.path.join(self.templates_dir, os.path.basename(name))).read()
	210	params = {
	211	'title': self.title,
	212	'contents': self.contents,
	213	'deftopic': self.deftopic(),
	214	'bcolor': self.bcolor,
	215	'fcolor': self.fcolor,
	216	}
	217	return string.Template(tpl).substitute(params)
	218
	219	def process_templates(self, destdir="."):
	220	"""Process templates"""
	221	for template in self.templates():
	222	open(os.path.join(destdir, os.path.basename(template)), 'w').write(self.get_template(template))
	223	if self.frontpage() not in self.templates():
	224	open(os.path.join(destdir, os.path.basename(self.frontpage())), 'w').write(self.get_template('index.html'))
	225	if not os.path.exists(os.path.join(destdir, 'icons/')):
	226	shutil.copytree(os.path.join(self.icons_dir), os.path.join(destdir, 'icons/'))
	227
	228	def extract_entry(self, entry, output_file, destdir=".", correct=False):
	229	# process output entry, remove first '/' in entry name
	230	fname = output_file.lower().replace('/', '', 1)
	231	# get directory name for file fname if any
	232	dname = os.path.dirname(os.path.join(destdir, fname))
	233	# if dname is a directory and it's not exist, than create it
	234	if dname and not os.path.exists(dname):
	235	os.makedirs(dname)
	236	# otherwise write a file from CHM entry
	237	if not os.path.isdir(os.path.join(destdir, fname)):
	238	# write CHM entry content into the file, corrected or as is
	239	if correct:
	240	open(os.path.join(destdir, fname), 'wb').write(CHMEntry(self, entry).correct())
	241	else:
	242	open(os.path.join(destdir, fname), 'wb').write(CHMEntry(self, entry).get())
	243
	244	def extract_entries(self, entries=[], destdir=".", correct=False):
	245	"""Extract raw CHM entries into the files"""
	246	for e in entries:
	247	# if entry is auxiliary file, than skip it
	248	if re.match(self.aux_re, e):
	249	continue
	250	if PARENT_RE.search(e):
	251	raise RuntimeError('Giving up on malicious name: %s' % e)
	252	self.extract_entry(e, output_file=e, destdir=destdir, correct=correct)
	253
	254	def extract(self, destdir):
	255	"""Extract CHM file content into FS"""
	256	try:
	257	# Create destination directory
	258	os.mkdir(destdir)
	259	# make raw content extraction
	260	self.extract_entries(entries=self.entries(), destdir=destdir)
	261	# process templates
	262	self.process_templates(destdir=destdir)
	263	except OSError as error:
	264	if error.errno == errno.EEXIST:
	265	sys.exit('%s is already exists' % destdir)
	266
	267	def dump_html(self, output=sys.stdout):
	268	"""Dump HTML data from CHM file into standard output"""
	269	for e in self.html_files():
	270	# if entry is auxiliary file, than skip it
	271	if re.match(self.aux_re, e):
	272	continue
	273	print(CHMEntry(self, e).get(), file=output)
	274
	275	def chm2text(self, output=sys.stdout):
	276	"""Convert CHM into Single Text file"""
	277	for e in self.html_files():
	278	# if entry is auxiliary file, than skip it
	279	if re.match(self.aux_re, e):
	280	continue
	281	# to use this function you should have 'lynx' or 'elinks' installed
	282	chmtotext(input=CHMEntry(self, e).get(), cmd=self.chmtotext, output=output)
	283
	284	def htmldoc(self, output, format=Action.CHM2HTML):
	285	"""CHM to other file formats converter using htmldoc"""
	286	# Extract CHM content into temporary directory
	287	output = output.replace(' ', '_')
	288	tempdir = tempfile.mkdtemp(prefix=output.rsplit('.', 1)[0])
	289	self.extract_entries(entries=self.html_files(), destdir=tempdir, correct=True)
	290	# List of temporary files
	291	files = [ os.path.abspath(tempdir + file.lower()) for file in self.html_files() ]
	292	if format == Action.CHM2HTML:
	293	options = self.chmtohtml
	294	# change output from single html file to a directory with html file and images
	295	if self.image_files():
	296	dirname = archmage.file2dir(output)
	297	if os.path.exists(dirname):
	298	sys.exit('%s is already exists' % dirname)
	299	# Extract image files
	300	os.mkdir(dirname)
	301	# Extract all images
	302	for key, value in list(self.image_files().items()):
	303	self.extract_entry(entry=key, output_file=value, destdir=dirname)
	304	# Fix output file name
	305	output = os.path.join(dirname, output)
	306	elif format == Action.CHM2PDF:
	307	options = self.chmtopdf
	308	if self.image_files():
	309	# Extract all images
	310	for key, value in list(self.image_files().items()):
	311	self.extract_entry(entry=key, output_file=key.lower(), destdir=tempdir)
	312	htmldoc(files, self.htmldoc_exec, options, self.toclevels, output)
	313	# Remove temporary files
	314	shutil.rmtree(path=tempdir)
	315
	316	class CHMEntry(object):
	317	"""Class for CHM file entry"""
	318
	319	def __init__(self, parent, name, frontpage='index.html'):
	320	# parent CHM file
	321	self.parent = parent
	322	# object inside CHM file
	323	self.name = name
	324	# frontpage name to substitute
	325	self.frontpage = os.path.basename(frontpage)
	326
	327	def read(self):
	328	"""Read CHM entry content"""
	329	result, ui = chmlib.chm_resolve_object(self.parent._chm, self.name.encode('utf-8'))
	330	if result != chmlib.CHM_RESOLVE_SUCCESS:
	331	return None
	332
	333	size, content = chmlib.chm_retrieve_object(self.parent._chm, ui, 0, ui.length)
	334	if size == 0:
	335	return None
	336	return content
	337
	338	def lower_links(self, text):
	339	"""Links to lower case"""
	340	return re.sub(b'(?i)(href\|src)\s=\s([^\s\|>]+)', lambda m:m.group(0).lower(), text)
	341
	342	def add_restoreframing_js(self, name, text):
	343	name = re.sub('/+', '/', name)
	344	depth = name.count('/')
	345
	346	js = b"""<body><script language="javascript">
	347	if (window.name != "content")
	348	document.write("<center><a href='%s%s?page=%s'>show framing</a></center>")
	349	</script>""" % ( b'../' * depth, self.frontpage.encode('utf8'), name.encode('utf8') )
	350
	351	return re.sub(b'(?i)<\sbody\s>', js, text)
	352
	353	def correct(self):
	354	"""Get correct CHM entry content"""
	355	data = self.read()
	356	# If entry is a html page?
	357	if re.search('(?i)\.html?$', self.name) and data is not None:
	358	# lower-casing links if needed
	359	if self.parent.filename_case:
	360	data = self.lower_links(data)
	361
	362	# Delete unwanted HTML elements.
	363	data = re.sub('<div .teamlib\.gif.\/div>', '', data)
	364	data = re.sub('<a href.*>\[ Team LiB \]<\/a>', '', data)
	365	data = re.sub('<table.larrow\.gif.rarrow\.gif.*<\/table>', '', data)
	366	data = re.sub('<a href.next\.gif[^>]><\/a>', '' ,data)
	367	data = re.sub('<a href.previous\.gif[^>]><\/a>', '', data)
	368	data = re.sub('<a href.prev\.gif[^>]><\/a>', '', data)
	369	data = re.sub('"[^"]*previous\.gif"', '""', data)
	370	data = re.sub('"[^"]*prev\.gif"', '""', data)
	371	data = re.sub('"[^"]*next\.gif"', '""', data)
	372	if data is not None:
	373	return data
	374	else:
	375	return ''
	376
	377	def get(self):
	378	"""Get CHM entry content"""
	379	# read entry content
	380	data = self.read()
	381	# If entry is a html page?
	382	if re.search('(?i)\.html?$', self.name) and data is not None:
	383	# lower-casing links if needed
	384	if self.parent.filename_case:
	385	data = self.lower_links(data)
	386	# restore framing if that option is set in config file
	387	if self.parent.restore_framing:
	388	data = self.add_restoreframing_js(self.name[1:], data)
	389	if data is not None:
	390	return data
	391	else:
	392	return ''

+223

-0

archmage/CHMParser.py less more

	0	# -- coding: utf-8 --
	1	#
	2	# archmage -- CHM decompressor
	3	# Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
	4	# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	5	#
	6	# This program is free software; you can redistribute it and/or modify it under
	7	# the terms of the GNU General Public License as published by the Free Software
	8	# Foundation; either version 2 of the License, or (at your option) any later
	9	# version.
	10	#
	11	# This program is distributed in the hope that it will be useful, but WITHOUT
	12	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	13	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	14	# details.
	15	#
	16	# You should have received a copy of the GNU General Public License along with
	17	# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
	18	# Street, Fifth Floor, Boston, MA 02110-1301, USA.
	19	#
	20
	21	import re
	22	import mimetypes
	23	import sgmllib, urllib.request, urllib.error, urllib.parse
	24
	25	from bs4 import BeautifulSoup, UnicodeDammit
	26	from html.parser import HTMLParser
	27	from urllib.parse import urlparse
	28
	29	START_TAG = '['
	30	END_TAG = ']'
	31
	32
	33	class SitemapFile(object):
	34	"""Sitemap file class"""
	35
	36	def __init__(self, lines):
	37	# XXX: Cooking tasty beautiful soup ;-)
	38	if lines:
	39	soup = BeautifulSoup(lines, 'html.parser')
	40	lines = soup.prettify()
	41	# XXX: Removing empty tags
	42	lines = re.sub(re.compile(r'<ul>\s*</ul>', re.I \| re.M), '', lines)
	43	lines = re.sub(re.compile(r'<li>\s*</li>', re.I \| re.M), '', lines)
	44	self.lines = lines
	45	else:
	46	self.lines = None
	47
	48	def parse(self):
	49	p = SitemapParser()
	50	if self.lines:
	51	p.feed(self.lines)
	52	# parsed text + last bracket
	53	return (p.parsed + '\n' + END_TAG)
	54
	55
	56	class TagStack(list):
	57	"""from book of David Mertz 'Text Processing in Python'"""
	58
	59	def append(self, tag):
	60	# Remove every paragraph-level tag if this is one
	61	if tag.lower() in ('p', 'blockquote'):
	62	self = TagStack([ t for t in super if t not in ('p', 'blockquote') ])
	63	super(TagStack, self).append(tag)
	64
	65	def pop(self, tag):
	66	# 'Pop' by tag from nearest position, not only last item
	67	self.reverse()
	68	try:
	69	pos = self.index(tag)
	70	except ValueError:
	71	raise Error('Tag not on stack')
	72	self[:] = self[pos + 1:]
	73	self.reverse()
	74
	75
	76	class SitemapParser(sgmllib.SGMLParser):
	77	"""Class for parsing files in SiteMap format, such as .hhc"""
	78
	79	def __init__(self):
	80	self.tagstack = TagStack()
	81	self.in_obj = False
	82	self.name = self.local = self.param = ""
	83	self.imagenumber = 1
	84	self.parsed = ""
	85	sgmllib.SGMLParser.__init__(self)
	86
	87	def unknown_starttag(self, tag, attrs):
	88	# first ul, start processing from here
	89	if tag == 'ul' and not self.tagstack:
	90	self.tagstack.append(tag)
	91	# First bracket
	92	self.parsed += '\n' + START_TAG
	93
	94	# if inside ul
	95	elif self.tagstack:
	96	if tag == 'li':
	97	# append closing bracket if needed
	98	if self.tagstack[-1] != 'ul':
	99	self.parsed += END_TAG
	100	self.tagstack.pop('li')
	101	indent = ' ' * len(self.tagstack)
	102
	103	if self.parsed != '\n' + START_TAG:
	104	self.parsed += ', '
	105
	106	self.parsed += '\n' + indent + START_TAG
	107
	108	if tag == 'object':
	109	for x, y in attrs:
	110	if x.lower() == 'type' and y.lower() == 'text/sitemap':
	111	self.in_obj = True
	112
	113	if tag.lower() == 'param' and self.in_obj:
	114	for x, y in attrs:
	115	if x.lower() == 'name':
	116	self.param = y.lower()
	117	elif x.lower() == 'value':
	118	if self.param == 'name' and not len(self.name):
	119	# XXX: Remove LF and/or CR signs from name
	120	self.name = y.replace('\n', '').replace('\r', '')
	121	# XXX: Un-escaping double quotes :-)
	122	self.name = self.name.replace('"', '\\"')
	123	elif self.param == 'local':
	124	# XXX: Change incorrect slashes in url
	125	self.local = y.lower().replace('\\', '/').replace('..\\', '')
	126	elif self.param == 'imagenumber':
	127	self.imagenumber = y
	128	self.tagstack.append(tag)
	129
	130	def unknown_endtag(self, tag):
	131	# if inside ul
	132	if self.tagstack:
	133	if tag == 'ul':
	134	self.parsed += END_TAG
	135	if tag == 'object' and self.in_obj:
	136	# "Link Name", "URL", "Icon"
	137	self.parsed += "\"%s\", \"%s\", \"%s\"" % (self.name, self.local, self.imagenumber)
	138	# Set to default values
	139	self.in_obj = False
	140	self.name = self.local = ""
	141	self.imagenumber = 1
	142	if tag != 'li':
	143	self.tagstack.pop(tag)
	144
	145
	146	class PageLister(sgmllib.SGMLParser):
	147	"""
	148	Parser of the chm.chm GetTopicsTree() method that retrieves the URL of the HTML
	149	page embedded in the CHM file.
	150	"""
	151
	152	def reset(self):
	153	sgmllib.SGMLParser.reset(self)
	154	self.pages = []
	155
	156	def feed(self, data):
	157	sgmllib.SGMLParser.feed(self, UnicodeDammit(data).unicode_markup)
	158
	159	def start_param(self, attrs):
	160	urlparam_flag = False
	161	for key, value in attrs:
	162	if key == 'name' and value.lower() == 'local':
	163	urlparam_flag = True
	164	if urlparam_flag and key == 'value':
	165	# Sometime url has incorrect slashes
	166	value = urllib.parse.unquote(urlparse(value.replace('\\', '/')).geturl())
	167	value = '/' + re.sub("#.*$", '', value)
	168	# Avoid duplicates
	169	if not self.pages.count(value):
	170	self.pages.append(value)
	171
	172
	173	class ImageCatcher(sgmllib.SGMLParser):
	174	"""
	175	Finds image urls in the current html page, so to take them out from the chm file.
	176	"""
	177
	178	def reset(self):
	179	sgmllib.SGMLParser.reset(self)
	180	self.imgurls = []
	181
	182	def start_img(self, attrs):
	183	for key, value in attrs:
	184	if key.lower() == 'src':
	185	# Avoid duplicates in the list of image URLs.
	186	if not self.imgurls.count('/' + value):
	187	self.imgurls.append('/' + value)
	188
	189	def start_a(self, attrs):
	190	for key, value in attrs:
	191	if key.lower() == 'href':
	192	url = urlparse(value)
	193	value = urllib.parse.unquote(url.geturl())
	194	# Remove unwanted crap
	195	value = '/' + re.sub("#.*$", '', value)
	196	# Check file's mimetype
	197	type = mimetypes.guess_type(value)[0]
	198	# Avoid duplicates in the list of image URLs.
	199	if not url.scheme and not self.imgurls.count(value) and \
	200	type and re.search('image/.*', type):
	201	self.imgurls.append(value)
	202
	203
	204	class TOCCounter(HTMLParser):
	205	"""Count Table of Contents levels"""
	206
	207	count = 0
	208
	209	def __init__(self):
	210	self.tagstack = TagStack()
	211	HTMLParser.__init__(self)
	212
	213	def handle_starttag(self, tag, attrs):
	214	self.tagstack.append(tag)
	215
	216	def handle_endtag(self, tag):
	217	if self.tagstack:
	218	if tag.lower() == 'object':
	219	if self.count < self.tagstack.count('param'):
	220	self.count = self.tagstack.count('param')
	221	if tag.lower() != 'li':
	222	self.tagstack.pop(tag)

+37

-0

archmage/__init__.py less more

	0	# -- coding: utf-8 --
	1	#
	2	# archmage -- CHM decompressor
	3	# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
	4	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
	5	# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	6	#
	7	# This program is free software; you can redistribute it and/or modify it under
	8	# the terms of the GNU General Public License as published by the Free Software
	9	# Foundation; either version 2 of the License, or (at your option) any later
	10	# version.
	11	#
	12	# This program is distributed in the hope that it will be useful, but WITHOUT
	13	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	14	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	15	# details.
	16	#
	17	# You should have received a copy of the GNU General Public License along with
	18	# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
	19	# Street, Fifth Floor, Boston, MA 02110-1301, USA.
	20	#
	21	__all__ = ['CHM']
	22	__version__ = '0.4.0'
	23
	24	import sys, os, pkg_resources
	25
	26	# what config file to use - local or a system wide?
	27	user_config = os.path.join(os.path.expanduser('~'), '.arch.conf')
	28	if os.path.exists(user_config):
	29	config = user_config
	30	else:
	31	config = pkg_resources.resource_filename('archmage', 'arch.conf')
	32
	33	def file2dir(filename):
	34	"""Convert file filename.chm to filename_html directory"""
	35	dirname = filename.rsplit('.', 1)[0] + '_' + 'html'
	36	return dirname

+64

-0

archmage/arch.conf less more

	0	from os.path import basename, join
	1	import pkg_resources
	2
	3	# Directory for templates
	4	templates_dir = pkg_resources.resource_filename('archmage', 'templates/')
	5
	6	# Directory with icons
	7	icons_dir = join(templates_dir, 'icons')
	8
	9	# List of auxiliary files, stored inside CHM file.
	10	# Those files would not be extracted.
	11	auxes = ('/#IDXHDR', '/#ITBITS', '/#STRINGS', '/#SYSTEM', '/#TOPICS',
	12	'/#URLSTR', '/#URLTBL', '/#WINDOWS', '/$FIftiMain', '/$OBJINST',
	13	'/$WWAssociativeLinks', '/$WWKeywordLinks', ':')
	14
	15	# Title. That is value, which you want to see in browser title.
	16	# 'sourcename' is the name of source file.
	17	title = basename(sourcename)
	18
	19	# Background and foreground colors for header.
	20	bcolor = '#63baff'
	21	fcolor = 'white'
	22
	23	# Filenames inside chm stored in utf-8, but links can be in some
	24	# national codepage. If you set fs_encoding such links would be
	25	# converted to it.
	26	#
	27	# Default: fs_encoding = 'utf-8'
	28	fs_encoding = 'utf-8'
	29
	30	# If your filesystem is case-sensitive, links in the html can point to
	31	# files that have differences in the case you need to set
	32	# filename_case to 1 in that case :-)
	33	#
	34	# Default: filename_case=1
	35	filename_case = 1
	36
	37	# If you want to add javascript code for restore framing to every
	38	# page, set addframing.
	39	#
	40	# Default: restore_framing=1
	41	restore_framing = 1
	42
	43	# Path to htmldoc executable
	44	#
	45	htmldoc_exec = '/usr/bin/htmldoc'
	46
	47	# CHM2TEXT converting. Use following command to convert CHM content to plain
	48	# text file. Make sure that below apps are available on your system.
	49	#chmtotext = 'lynx -dump -stdin'
	50	chmtotext = '/usr/bin/elinks -dump'
	51
	52	# CHM2HTML converting. Use following command to convert CHM content to a single
	53	# HTML file. Make sure that htmldoc is available on your system.
	54	chmtohtml = '-t html -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --linkstyle underline --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow --quiet'
	55
	56	# CHM2PDF converting. Use following command to convert CHM content to a single
	57	# PDF file. Make sure that htmldoc is available on your system.
	58	chmtopdf = '-t pdf14 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet'
	59
	60	# Maximum Table of Content levels for htmldoc utility.
	61	#
	62	# Default: maxtoclvl = 4
	63	maxtoclvl = 4

+34

-0

archmage/chmtotext.py less more

	0	# -- coding: utf-8 --
	1	#
	2	# archmage -- CHM decompressor
	3	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
	4	#
	5	# This program is free software; you can redistribute it and/or modify it under
	6	# the terms of the GNU General Public License as published by the Free Software
	7	# Foundation; either version 2 of the License, or (at your option) any later
	8	# version.
	9	#
	10	# This program is distributed in the hope that it will be useful, but WITHOUT
	11	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	12	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	13	# details.
	14	#
	15	# You should have received a copy of the GNU General Public License along with
	16	# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
	17	# Street, Fifth Floor, Boston, MA 02110-1301, USA.
	18	#
	19
	20	"""CHM to Text converter (using external tool: lynx or elinks)"""
	21
	22	import sys
	23	import signal
	24	from subprocess import Popen, PIPE
	25
	26	if sys.platform != "win32":
	27	signal.signal(signal.SIGPIPE, signal.SIG_DFL)
	28
	29	def chmtotext(input, cmd, output=sys.stdout):
	30	"""CHM to Text converter"""
	31	proc = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
	32	proc.stdin.write(input)
	33	print(proc.communicate()[0], file=output)

+191

-0

archmage/cli.py less more

	0	# -- coding: utf-8 --
	1	#
	2	# archmage -- CHM decompressor
	3	# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
	4	# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
	5	# Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net>
	6	#
	7	# This program is free software; you can redistribute it and/or modify it under
	8	# the terms of the GNU General Public License as published by the Free Software
	9	# Foundation; either version 2 of the License, or (at your option) any later
	10	# version.
	11	#
	12	# This program is distributed in the hope that it will be useful, but WITHOUT
	13	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	14	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	15	# details.
	16	#
	17	# You should have received a copy of the GNU General Public License along with
	18	# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
	19	# Street, Fifth Floor, Boston, MA 02110-1301, USA.
	20	#
	21
	22	"""arCHMage -- extensible reader and decompiler for files in the CHM format.
	23
	24	Usage: %(program)s [options] <chmfile> [destdir\|destfile]
	25	Where:
	26
	27	-x / --extract
	28	Extracts CHM file into specified directory. If destination
	29	directory is omitted, than the new one will be created based
	30	on name of CHM file. This options is by default.
	31
	32	-c format
	33	--convert=format
	34	Convert CHM file into specified file format. If destination
	35	file is omitted, than the new one will be created based
	36	on name of CHM file. Available formats:
	37
	38	html - Single HTML file
	39	text - Plain Text file
	40	pdf - Adobe PDF file format
	41
	42	-d / --dump
	43	Dump HTML data from CHM file into standard output.
	44
	45	-V / --version
	46	Print version number and exit.
	47
	48	-h / --help
	49	Print this text and exit.
	50	"""
	51
	52	import os, sys
	53	import getopt
	54
	55	import archmage
	56	from archmage.CHM import CHMFile, Action
	57
	58	# Return codes
	59	OK = 0
	60	ERROR = 1
	61
	62	program = sys.argv[0]
	63
	64	# Miscellaneous auxiliary functions
	65	def message(code=OK, msg=''):
	66	outfp = sys.stdout
	67	if code == ERROR:
	68	outfp = sys.stderr
	69	if msg:
	70	print(msg, file=outfp)
	71
	72	def usage(code=OK, msg=''):
	73	"""Show application usage and quit"""
	74	message(code, __doc__ % globals())
	75	message(code, msg)
	76	sys.exit(code)
	77
	78	def output_format(mode):
	79	if mode == 'text':
	80	return CHM2TXT
	81	elif mode == 'html':
	82	return CHM2HTML
	83	elif mode == 'pdf':
	84	return CHM2PDF
	85	else:
	86	sys.exit('Invalid output file format: %s' % mode)
	87
	88	def output_file(filename, mode):
	89	"""Convert filename.chm to filename.output"""
	90	if mode == CHM2TXT:
	91	file_ext = 'txt'
	92	elif mode == CHM2HTML:
	93	file_ext = 'html'
	94	elif mode == CHM2PDF:
	95	file_ext = 'pdf'
	96	else:
	97	file_ext = 'output'
	98	output_filename = filename.rsplit('.', 1)[0] + '.' + file_ext
	99	return output_filename
	100
	101	def parseargs():
	102	try:
	103	opts, args = getopt.getopt(sys.argv[1:], 'xc:dp:Vh',
	104	['extract', 'convert=', 'dump', 'port=', 'version', 'help'])
	105	except getopt.error as msg:
	106	usage(ERROR, msg)
	107
	108	class Options:
	109	mode = None # EXTRACT or other
	110	chmfile = None # CHM File to view/extract
	111	output = None # Output file or directory
	112
	113	options = Options()
	114
	115	for opt, arg in opts:
	116	if opt in ('-h', '--help'):
	117	usage()
	118	elif opt in ('-V', '--version'):
	119	message(OK, archmage.__version__)
	120	sys.exit(OK)
	121	elif opt in ('-c', '--convert'):
	122	if options.mode is not None:
	123	sys.exit('-x and -c are mutually exclusive')
	124	options.mode = output_format(str(arg))
	125	elif opt in ('-x', '--extract'):
	126	if options.mode is not None:
	127	sys.exit('-x and -c are mutually exclusive')
	128	options.mode = Action.EXTRACT
	129	elif opt in ('-d', '--dump'):
	130	if options.mode is not None:
	131	sys.exit('-d should be used without any other options')
	132	options.mode = Action.DUMPHTML
	133	else:
	134	assert False, (opt, arg)
	135
	136	# Sanity checks
	137	if options.mode is None:
	138	# Set default option
	139	options.mode = Action.EXTRACT
	140
	141	if not args:
	142	sys.exit('No CHM file was specified!')
	143	else:
	144	# Get CHM file name from command line
	145	options.chmfile = args.pop(0)
	146
	147	# if CHM content should be extracted
	148	if options.mode == Action.EXTRACT:
	149	if not args:
	150	options.output = archmage.file2dir(options.chmfile)
	151	else:
	152	# get output directory from command line
	153	options.output = args.pop(0)
	154	# or converted into another file format
	155	elif options.mode in (Action.CHM2TXT, Action.CHM2HTML, Action.CHM2PDF):
	156	if not args:
	157	options.output = output_file(options.chmfile, options.mode)
	158	else:
	159	# get output filename from command line
	160	options.output = args.pop(0)
	161
	162	# Any other arguments are invalid
	163	if args:
	164	sys.exit('Invalid arguments: ' + ', '.join(args))
	165
	166	return options
	167
	168
	169	def main():
	170	options = parseargs()
	171	if not os.path.exists(options.chmfile):
	172	sys.exit('No such file: %s' % options.chmfile)
	173
	174	if os.path.isdir(options.chmfile):
	175	sys.exit('A regular files is expected, got directory: %s' % options.chmfile)
	176
	177	source = CHMFile(options.chmfile)
	178
	179	if options.mode == Action.DUMPHTML:
	180	source.dump_html()
	181	elif options.mode == Action.CHM2TXT:
	182	if os.path.exists(options.output):
	183	sys.exit('%s is already exists' % options.output)
	184	source.chm2text(open(options.output, 'w'))
	185	elif options.mode in (Action.CHM2HTML, Action.CHM2PDF):
	186	source.htmldoc(options.output, options.mode)
	187	elif options.mode == Action.EXTRACT:
	188	source.extract(options.output)
	189
	190	source.close()

+55

-0

archmage/htmldoc.py less more

	0	# -- coding: utf-8 --
	1	#
	2	# archmage -- CHM decompressor
	3	# Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
	4	#
	5	# This program is free software; you can redistribute it and/or modify it under
	6	# the terms of the GNU General Public License as published by the Free Software
	7	# Foundation; either version 2 of the License, or (at your option) any later
	8	# version.
	9	#
	10	# This program is distributed in the hope that it will be useful, but WITHOUT
	11	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	12	# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
	13	# details.
	14	#
	15	# You should have received a copy of the GNU General Public License along with
	16	# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
	17	# Street, Fifth Floor, Boston, MA 02110-1301, USA.
	18	#
	19
	20	"""Generic converter function"""
	21
	22	import os
	23	import string
	24	import tempfile
	25	import subprocess
	26	import archmage
	27
	28
	29	def htmldoc(input, cmd, options, toclevels, output):
	30	"""CHM to other format converter
	31
	32	input - list of input html files
	33	cmd - full path to htmldoc command
	34	options - htmldoc options from arch.conf
	35	toclevels - number of ToC levels as htmldoc option
	36	output - output file (single html, ps, pdf and etc)
	37	"""
	38	if toclevels:
	39	toc = ('--toclevels %s' % (toclevels))
	40	else:
	41	toc = ('--no-toc')
	42	options = options % {'output' : output, 'toc' : toc}
	43	if input:
	44	# Create a htmldoc file for batch processing
	45	f = tempfile.NamedTemporaryFile(delete=False)
	46	f.write('#HTMLDOC 1.8.27\n')
	47	f.write(options + '\n')
	48	f.write(string.join(input, '\n'))
	49	f.close()
	50	# Prepare command line to execute
	51	command = '%s --batch %s' % (cmd, f.name)
	52	subprocess.call(command, shell=True)
	53	# Unlink temporary htmldoc file
	54	os.unlink(f.name)

+171

-0

archmage/templates/arch_contents.html less more

	0	<html>
	1	<head>
	2	<title>$title</title>
	3	<LINK rel="Stylesheet" type="text/css" href="arch_css.css">
	4	</head>
	5
	6	<body onload="setInterval('getLoc()', 500);">
	7	<script>
	8	var lastDoc;
	9	var contents = $contents;
	10
	11	var w=window,d=document
	12	var icons={'0' : 'icons/0.gif','1' : 'icons/90.gif',
	13	'2' : 'icons/91.gif', '3' : 'icons/92.gif', '4' : 'icons/99.gif',
	14	'18' : 'icons/93.gif', '19' : 'icons/94.gif', '20' : 'icons/97.gif',
	15	'26' : 'icons/95.gif', '27' : 'icons/96.gif', '28' : 'icons/98.gif'}
	16
	17	var dhtml=true
	18	try{if(d.body.innerHTML.length<=0)dhtml=false}
	19	catch(e){dhtml=false;}
	20	var tree=[];
	21
	22	get_element=d.all ?
	23	function(id){return d.all[id]}
	24	:
	25	function(id){return d.getElementById(id)}
	26
	27	function get_img1(){
	28	return icons[((this.childs.length ? 16 : 0)+(this.childs.length && this.opened ? 8 : 0)+(this.is_last()? 1 : 0)+(this.is_first()? 2 : 0)+2)]
	29	}
	30	function get_img2(){
	31	n=this.cnt[2]
	32	if(n<9){
	33	n=(this.opened ? ( n%2 ? parseInt(n)+1 : n ) : ( n%2 ? n : parseInt(n)-1 ))
	34	}
	35	return 'icons/'+n+'.gif'
	36	}
	37	function node(tree,n){
	38	this.ind=tree.ind+1
	39	this.cnt=tree.cnt[n+(this.ind ? 3 : 0)]
	40	if(!this.cnt)return
	41	this.tree=tree.tree
	42	this.parent=tree
	43	this.opened=!dhtml
	44	this.nind=this.tree.nodes.length
	45	this.tree.nodes[this.nind]=this
	46	tree.childs[n]=this
	47	this.childs=[]
	48	for(var i=0;i < this.cnt.length - 2;i++)
	49	new node(this,i)
	50	this.get_img1=get_img1
	51	this.get_img2=get_img2
	52	this.open=open
	53	this.select=select
	54	this.init=init
	55	this.is_last=function(){
	56	return n==this.parent.childs.length - 1
	57	}
	58	this.is_first=function(){
	59	return(this.ind==0)&&(n==0)&&(!this.is_last())
	60	}
	61	}
	62
	63	function open(){
	64	var childs=[]
	65	var el=get_element('divCont'+this.nind)
	66	if(!el)return
	67	if(!dhtml){
	68	d.write(childs.join(''))
	69	for(var i=0;i < this.childs.length;i++){
	70	d.write(this.childs[i].init())
	71	this.childs[i].open()
	72	}
	73	}
	74	else{
	75	if(!el.innerHTML){
	76	for(var i=0;i < this.childs.length;i++)
	77	childs[i]=this.childs[i].init()
	78	el.innerHTML=childs.join('')
	79	}
	80	el.style.display=(this.opened ? 'none' : 'block')
	81	this.opened=!this.opened
	82	var img1=d.images['img1_'+this.nind],img2=d.images['img2_'+this.nind]
	83	if(img1)img1.src=this.get_img1()
	84	if(img2)img2.src=this.get_img2()
	85	}
	86	}
	87
	88
	89	function select(nind){
	90	if(!nind){
	91	var sel=this.tree.sel
	92	this.tree.sel=this
	93	if(sel)sel.select(true)
	94	}
	95	var img2=d.images['img2_'+this.nind]
	96	if(img2)img2.src=this.get_img2()
	97	get_element('el'+this.nind).style.fontWeight=nind ? 'normal' : 'bold'
	98	return Boolean(this.cnt[1])
	99	}
	100
	101	function init(){
	102	var temp=[],par=this.parent
	103	for(var i=this.ind;i>0;i--){
	104	temp[i]='<img src="'+icons[par.is_last()? 0 : 1]+'" border="0" align="absbottom">'
	105	par=par.parent
	106	}
	107	r='<table cellpadding="0" cellspacing="0" border="0">'
	108	r+='<tr><td nowrap>'
	109	r+=temp.join('')
	110	r+=(this.childs.length ?(!dhtml ? '' : '<a href="javascript: tree.toggle('+this.nind+')" >')+'<img src="'+this.get_img1()+'" border="0" align="absbottom" name="img1_'+this.nind+'">'+(!dhtml ? '' : '</a>'): '<img src="'+this.get_img1()+'" border="0" align="absbottom">')
	111	r+='<a href="'+this.cnt[1]+'" target="'+'content'+'"'+' title="'+this.cnt[0]+'" onclick="return tree.select('+this.nind+')" '+(!dhtml ? '' : ' ondblclick="tree.toggle('+this.nind+')"')+' class="small" id="el'+this.nind+'"><img src="'+this.get_img2()+'" border="0" align="absbottom" name="img2_'+this.nind+'"> '+this.cnt[0]+'</a>'
	112	r+='</td></tr></table>'
	113	r+=(this.childs.length ? '<div id="divCont'+this.nind+'" style="display:none"></div>' : '')
	114	return r
	115	}
	116
	117	function draw_contents(cnt){
	118	tree=this;
	119	tree.cnt=cnt;
	120	tree.tree=this;
	121	tree.nodes=[];
	122	tree.sel=null;
	123	tree.ind=-1;
	124
	125	tree.select=function(i){
	126	return tree.nodes[i].select();
	127	};
	128	tree.toggle=function(i){
	129	tree.nodes[i].open()
	130	};
	131	tree.childs=[]
	132	for(var i=0;i<cnt.length;i++){
	133	new node(tree,i)
	134	}
	135	tree.nind=0;
	136
	137	for(var i=0;i < tree.childs.length;i++){
	138	d.write(tree.childs[i].init());
	139	if(!dhtml)tree.childs[i].open();
	140	}
	141	}
	142
	143
	144	function getLoc(){
	145	var doc = ""+parent.frames[1].location;
	146	if(doc != lastDoc){
	147	var keyVals = new Array();
	148	keyVals = doc.split("\/");
	149	var targetPage = ""+keyVals[(keyVals.length-1)];
	150
	151	if(targetPage.indexOf("\#") > 0){
	152	targetPage = targetPage.substr(0,targetPage.indexOf("\#"));
	153	}
	154
	155	nodeCount = 0;
	156	while( (""+tree.nodes[nodeCount].cnt[1]).lastIndexOf(targetPage) < 0){
	157	nodeCount++;
	158	}
	159	parentNode = tree.nodes[nodeCount].parent;
	160	if(parentNode != tree && parentNode.opened == false){
	161	parentNode.open();
	162	}
	163	tree.nodes[nodeCount].select();
	164	lastDoc = doc;
	165	}
	166	}
	167	new draw_contents(contents);
	168	</script>
	169	</body>
	170	</html>

+2

-0

archmage/templates/arch_css.css less more

	0	.small { font-size: x-small; }
	1	.htable { margin: 0; border: none; padding: 0 }⏎

+26

-0

archmage/templates/arch_frameset.html less more

	0	<html>
	1	<head>
	2	<title>$title</title>
	3
	4	<script>
	5	var qs=location.search.substr(1)
	6	var A=qs.split("&")
	7	var B=null
	8	var F="$deftopic"
	9	for(var i=0;i<A.length;i++){B=A[i].split("=");A[i]=[B[0],B[1]]}
	10	for(var j=0;j<A.length;j++){if(A[j][0]=='page'){ F=A[j][1];break}}
	11	</script >
	12	</head>
	13	<script>
	14	document.write('<frameset cols="200,*" bordercolor="$bcolor" frameborder="yes" framespacing="2" >')
	15	document.write('<frame name="toc" src="arch_contents.html">')
	16	document.write('<frame name="content" src="'+F+'" >')
	17	document.write('</frameset>');
	18	</script>
	19	<noscript>
	20	<frameset cols="200,*" bordercolor="$bcolor" frameborder="yes" framespacing="2" >
	21	<frame name="toc" src="arch_contents.html" >
	22	<frame name="content" src="$deftopic">
	23	</frameset>
	24	</noscript>
	25	</html>

+12

-0

archmage/templates/arch_header.html less more

	0	<html>
	1	<head>
	2	<title>$title</title>
	3	<LINK rel="Stylesheet" type="text/css" href="arch_css.css">
	4	</head>
	5	<body bgcolor="$bcolor">
	6	<table class='htable' cellpadding="0" cellspacing="0" width="100%"><td>
	7	<td align="center" width="100%">
	8	<b><font size="large" color="$fcolor">$title</font></b>
	9	</table>
	10	</body>
	11	</html>

archmage/templates/icons/0.gif less more

Binary diff not shown

archmage/templates/icons/1.gif less more

Binary diff not shown

archmage/templates/icons/10.gif less more

Binary diff not shown

archmage/templates/icons/11.gif less more

Binary diff not shown

archmage/templates/icons/12.gif less more

Binary diff not shown

archmage/templates/icons/13.gif less more

Binary diff not shown

archmage/templates/icons/14.gif less more

Binary diff not shown

archmage/templates/icons/15.gif less more

Binary diff not shown

archmage/templates/icons/16.gif less more

Binary diff not shown

archmage/templates/icons/17.gif less more

Binary diff not shown

archmage/templates/icons/18.gif less more

Binary diff not shown

archmage/templates/icons/19.gif less more

Binary diff not shown

archmage/templates/icons/2.gif less more

Binary diff not shown

archmage/templates/icons/20.gif less more

Binary diff not shown

archmage/templates/icons/21.gif less more

Binary diff not shown

archmage/templates/icons/22.gif less more

Binary diff not shown

archmage/templates/icons/23.gif less more

Binary diff not shown

archmage/templates/icons/24.gif less more

Binary diff not shown

archmage/templates/icons/25.gif less more

Binary diff not shown

archmage/templates/icons/26.gif less more

Binary diff not shown

archmage/templates/icons/27.gif less more

Binary diff not shown

archmage/templates/icons/3.gif less more

Binary diff not shown

archmage/templates/icons/35.gif less more

Binary diff not shown

archmage/templates/icons/37.gif less more

Binary diff not shown

archmage/templates/icons/39.gif less more

Binary diff not shown

archmage/templates/icons/4.gif less more

Binary diff not shown

archmage/templates/icons/5.gif less more

Binary diff not shown

archmage/templates/icons/6.gif less more

Binary diff not shown

archmage/templates/icons/7.gif less more

Binary diff not shown

archmage/templates/icons/8.gif less more

Binary diff not shown

archmage/templates/icons/9.gif less more

Binary diff not shown

archmage/templates/icons/90.gif less more

Binary diff not shown

archmage/templates/icons/91.gif less more

Binary diff not shown

archmage/templates/icons/92.gif less more

Binary diff not shown

archmage/templates/icons/93.gif less more

Binary diff not shown

archmage/templates/icons/94.gif less more

Binary diff not shown

archmage/templates/icons/95.gif less more

Binary diff not shown

archmage/templates/icons/96.gif less more

Binary diff not shown

archmage/templates/icons/97.gif less more

Binary diff not shown

archmage/templates/icons/98.gif less more

Binary diff not shown

archmage/templates/icons/99.gif less more

Binary diff not shown

archmage/templates/icons/next.gif less more

Binary diff not shown

archmage/templates/icons/prev.gif less more

Binary diff not shown

+38

-0

archmage/templates/index.html less more

	0	<html>
	1	<head>
	2	<script>var pageid="";</script>
	3
	4	<title>$title</title>
	5
	6	<script>
	7	var qs=location.search.substr(1);
	8	var A=qs.split("&")
	9	var B=null
	10	var F="$deftopic";
	11	for(var i=0;i<A.length;i++){
	12	B=A[i].split("=")
	13	A[i]=[B[0],B[1]]
	14	}
	15	for(var j=0;j<A.length;j++){
	16	if(A[j][0]=='page'){
	17	F=A[j][1]
	18	break
	19	}
	20	}
	21	</script>
	22	</head>
	23
	24	<script>
	25	document.write('<frameset rows="30,*" frameborder="no" framespacing="0" border="0" >')
	26	document.write('<frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no" >')
	27	if(F!='')F='?page='+F
	28	document.write('<frame name="main" src="arch_frameset.html'+F+'">')
	29	document.write('</frameset>')
	30	</script>
	31	<noscript>
	32	<frameset rows="30,*" frameborder="no" framespacing="0" border="0" >
	33	<frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no">
	34	<frame name="main" src="arch_frameset.html" >
	35	</frameset>
	36	</noscript>
	37	</html>

+1

-32

archmage.1 less more

20	20	.B archmage
21	21	.I chmfile directory
22	22	.br
23		.B archmage
24		\-p port
25		.I chmfile
26	23	.SH DESCRIPTION
27	24	This manual page documents briefly the
28	25	.B archmage

38	35	chmlib from GnoCHM project.
39	36	.SH USAGE
40	37	.PP
41		There is three ways to use arCHMage package now:
42		.PP
43		1) Extract .chm to directory (directory will be created):
	38	Extract .chm to directory (directory will be created):
44	39
45	40	archmage <chmfile> <directory>
46	41	.PP
47		2) Run as http-server, which will publish chm file contents on
48		specified port:
49
50		archmage \-p <port> <chmfile>
51		.PP
52		3) Tune your apache to publish chm file contents if there is trailing
53		slash in request to that file (you will need working mod_python for
54		that):
55
56		Add that lines to your httpd.conf:
57
58		AddHandler python-program .chm
59		.br
60		PythonHandler archmod.mod_chm
61
62		Restart apache.
63		.PP
64		Let's suppose, you have file sample.chm in DocumentRoot of your
65		apache. After that tuning you can receive raw chm file, if you point
66		your browser to
67
68		http://yourserver/sample.chm
69		.PP
70		or you can view chm file on the fly if you point your browser to
71
72		http://yourserver/sample.chm/ (note trailing slash)
73	42	.SH SEE ALSO
74	43	.PP
75	44	arCHMage Home Page: http://archmage.sf.net

+4

-4

archmage.egg-info/PKG-INFO less more

0		Metadata-Version: 1.1
	0	Metadata-Version: 1.2
1	1	Name: archmage
2		Version: 0.3.1
	2	Version: 0.4.0
3	3	Summary: CHM decompressor
4	4	Home-page: https://github.com/dottedmag/archmage
5		Author: Mikhail Gusarov
6		Author-email: dottedmag@dottedmag.net
	5	Maintainer: Mikhail Gusarov
	6	Maintainer-email: dottedmag@dottedmag.net
7	7	License: GPLv2+
8	8	Description: arCHMage is a reader and decompressor for CHM format
9	9	Keywords: chm,HTML Help,Compiled HTML,Compressed HTML

+56

-60

archmage.egg-info/SOURCES.txt less more

1	1	COPYING
2	2	MANIFEST.in
3	3	NEWS
4		RELEASE-VERSION
	4	README.md
5	5	archmage.1
6	6	setup.py
7		version.py
	7	archmage/CHM.py
	8	archmage/CHMParser.py
	9	archmage/__init__.py
	10	archmage/arch.conf
	11	archmage/chmtotext.py
	12	archmage/cli.py
	13	archmage/htmldoc.py
8	14	archmage.egg-info/PKG-INFO
9	15	archmage.egg-info/SOURCES.txt
10	16	archmage.egg-info/dependency_links.txt
11	17	archmage.egg-info/entry_points.txt
12	18	archmage.egg-info/requires.txt
13	19	archmage.egg-info/top_level.txt
14		archmod/CHM.py
15		archmod/CHMParser.py
16		archmod/CHMServer.py
17		archmod/Cached.py
18		archmod/__init__.py
19		archmod/arch.conf
20		archmod/chmtotext.py
21		archmod/cli.py
22		archmod/htmldoc.py
23		archmod/mod_chm.py
24		archmod/templates/arch_contents.html
25		archmod/templates/arch_css.css
26		archmod/templates/arch_frameset.html
27		archmod/templates/arch_header.html
28		archmod/templates/index.html
29		archmod/templates/icons/0.gif
30		archmod/templates/icons/1.gif
31		archmod/templates/icons/10.gif
32		archmod/templates/icons/11.gif
33		archmod/templates/icons/12.gif
34		archmod/templates/icons/13.gif
35		archmod/templates/icons/14.gif
36		archmod/templates/icons/15.gif
37		archmod/templates/icons/16.gif
38		archmod/templates/icons/17.gif
39		archmod/templates/icons/18.gif
40		archmod/templates/icons/19.gif
41		archmod/templates/icons/2.gif
42		archmod/templates/icons/20.gif
43		archmod/templates/icons/21.gif
44		archmod/templates/icons/22.gif
45		archmod/templates/icons/23.gif
46		archmod/templates/icons/24.gif
47		archmod/templates/icons/25.gif
48		archmod/templates/icons/26.gif
49		archmod/templates/icons/27.gif
50		archmod/templates/icons/3.gif
51		archmod/templates/icons/35.gif
52		archmod/templates/icons/37.gif
53		archmod/templates/icons/39.gif
54		archmod/templates/icons/4.gif
55		archmod/templates/icons/5.gif
56		archmod/templates/icons/6.gif
57		archmod/templates/icons/7.gif
58		archmod/templates/icons/8.gif
59		archmod/templates/icons/9.gif
60		archmod/templates/icons/90.gif
61		archmod/templates/icons/91.gif
62		archmod/templates/icons/92.gif
63		archmod/templates/icons/93.gif
64		archmod/templates/icons/94.gif
65		archmod/templates/icons/95.gif
66		archmod/templates/icons/96.gif
67		archmod/templates/icons/97.gif
68		archmod/templates/icons/98.gif
69		archmod/templates/icons/99.gif
70		archmod/templates/icons/next.gif
71		archmod/templates/icons/prev.gif⏎
	20	archmage/templates/arch_contents.html
	21	archmage/templates/arch_css.css
	22	archmage/templates/arch_frameset.html
	23	archmage/templates/arch_header.html
	24	archmage/templates/index.html
	25	archmage/templates/icons/0.gif
	26	archmage/templates/icons/1.gif
	27	archmage/templates/icons/10.gif
	28	archmage/templates/icons/11.gif
	29	archmage/templates/icons/12.gif
	30	archmage/templates/icons/13.gif
	31	archmage/templates/icons/14.gif
	32	archmage/templates/icons/15.gif
	33	archmage/templates/icons/16.gif
	34	archmage/templates/icons/17.gif
	35	archmage/templates/icons/18.gif
	36	archmage/templates/icons/19.gif
	37	archmage/templates/icons/2.gif
	38	archmage/templates/icons/20.gif
	39	archmage/templates/icons/21.gif
	40	archmage/templates/icons/22.gif
	41	archmage/templates/icons/23.gif
	42	archmage/templates/icons/24.gif
	43	archmage/templates/icons/25.gif
	44	archmage/templates/icons/26.gif
	45	archmage/templates/icons/27.gif
	46	archmage/templates/icons/3.gif
	47	archmage/templates/icons/35.gif
	48	archmage/templates/icons/37.gif
	49	archmage/templates/icons/39.gif
	50	archmage/templates/icons/4.gif
	51	archmage/templates/icons/5.gif
	52	archmage/templates/icons/6.gif
	53	archmage/templates/icons/7.gif
	54	archmage/templates/icons/8.gif
	55	archmage/templates/icons/9.gif
	56	archmage/templates/icons/90.gif
	57	archmage/templates/icons/91.gif
	58	archmage/templates/icons/92.gif
	59	archmage/templates/icons/93.gif
	60	archmage/templates/icons/94.gif
	61	archmage/templates/icons/95.gif
	62	archmage/templates/icons/96.gif
	63	archmage/templates/icons/97.gif
	64	archmage/templates/icons/98.gif
	65	archmage/templates/icons/99.gif
	66	archmage/templates/icons/next.gif
	67	archmage/templates/icons/prev.gif⏎

+1

-1

archmage.egg-info/entry_points.txt less more

0	0	[console_scripts]
1		archmage = archmod.cli:main
	1	archmage = archmage.cli:main
2	2

+2

-1

archmage.egg-info/requires.txt less more

0	0	pychm
1		BeautifulSoup
	1	beautifulsoup4
	2	sgmllib3k

+1

-1

archmage.egg-info/top_level.txt less more

0		archmod
	0	archmage

+0

-393

~~archmod/CHM.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4		# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		#
6		# This program is free software; you can redistribute it and/or modify it under
7		# the terms of the GNU General Public License as published by the Free Software
8		# Foundation; either version 2 of the License, or (at your option) any later
9		# version.
10		#
11		# This program is distributed in the hope that it will be useful, but WITHOUT
12		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14		# details.
15		#
16		# You should have received a copy of the GNU General Public License along with
17		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
18		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
19		#
20
21		import os
22		import sys
23		import re
24		import shutil
25		import errno
26		import string
27		import tempfile
28
29		import archmod
30
31		from archmod.CHMParser import SitemapFile, PageLister, ImageCatcher, TOCCounter#, HeadersCounter
32		from archmod.Cached import Cached
33
34		# import PyCHM bindings
35		try:
36		from chm import chmlib
37		except ImportError, msg:
38		sys.exit('ImportError: %s\nPlease check README file for system requirements.' % msg)
39
40		# External file converters
41		from archmod.chmtotext import chmtotext
42		from archmod.htmldoc import htmldoc
43
44		PARENT_RE = re.compile(r'(^\|/\|\\)\.\.(/\|\\\|$)')
45
46		class CHMDir(Cached):
47		"""Class that represent CHM content from directory"""
48
49		def __init__(self, name):
50		# Name of source directory with CHM content
51		self.sourcename = name
52		# Import variables from config file into namespace
53		execfile(archmod.config, self.__dict__)
54
55		# build regexp from the list of auxiliary files
56		self.aux_re = '\|'.join([ re.escape(s) for s in self.auxes ])
57
58		# Get and parse 'Table of Contents'
59		try:
60		self.topicstree = self.get_entry(self.topics)
61		except AttributeError:
62		self.topicstree = None
63		self.contents = SitemapFile(self.topicstree).parse()
64
65		def _getitem(self, name):
66		# Get all entries
67		if name == 'entries':
68		entries = []
69		for fname in archmod.listdir(self.sourcename):
70		name = '/' + fname
71		if os.path.isdir(self.sourcename + name):
72		name += '/'
73		entries.append(name)
74		return entries
75		# retrieves the list of HTML files contained into the CHM file, in order (that's the important bit).
76		# (actually performed by the PageLister class)
77		if name == 'html_files':
78		lister = PageLister()
79		lister.feed(self.topicstree)
80		return lister.pages
81		# retrieves the list of images urls contained into the CHM file.
82		# (actually performed by the ImageCatcher class)
83		if name == 'image_urls':
84		image_urls = []
85		image_catcher = ImageCatcher()
86		for file in self.html_files:
87		image_catcher.feed(CHMEntry(self, file).correct())
88		for image_url in image_catcher.imgurls:
89		if not image_urls.count(image_url):
90		image_urls.append(image_url)
91		return image_urls
92		# retrieves a dictionary of actual file entries and corresponding urls into the CHM file
93		if name == 'image_files':
94		image_files = {}
95		for image_url in self.image_urls:
96		for entry in self.entries:
97		if re.search(image_url, entry.lower()) and not image_files.has_key(entry.lower()):
98		image_files.update({entry : image_url})
99		return image_files
100		# Get topics file
101		if name == 'topics':
102		for e in self.entries:
103		if e.lower().endswith('.hhc'):
104		return e
105		if name == 'deftopic':
106		# use first page as deftopic. Note: without heading slash
107		if self.html_files[0].startswith('/'):
108		return self.html_files[0].replace('/', '', 1).lower()
109		return self.html_files[0].lower()
110		# Get index file
111		if name == 'index':
112		for e in self.entries:
113		if e.lower().endswith('.hhk'):
114		return e
115		# Get frontpage name
116		if name == 'frontpage':
117		frontpage = os.path.join('/', 'index.html')
118		index = 2 # index2.html and etc.
119		for filename in self.entries:
120		if frontpage == filename:
121		frontpage = os.path.join('/', ('index%s.html' % index))
122		index += 1
123		return frontpage
124		# Get all templates files
125		if name == 'templates':
126		templates = []
127		for file in os.listdir(self.templates_dir):
128		if os.path.isfile(os.path.join(self.templates_dir, file)):
129		if os.path.join('/', file) not in self.entries:
130		templates.append(os.path.join('/', file))
131		return templates
132		# Get ToC levels
133		if name == 'toclevels':
134		counter = TOCCounter()
135		counter.feed(self.topicstree)
136		if counter.count > self.maxtoclvl:
137		return self.maxtoclvl
138		else:
139		return counter.count
140		raise AttributeError(name)
141
142		def get_entry(self, name):
143		"""Get CHM entry by name"""
144		# show index page or any other substitute
145		if name == '/':
146		name = self.frontpage
147		if name in self.templates or name == self.frontpage:
148		return self.get_template(name)
149		if name.lower() in [ os.path.join('/icons', icon.lower()) for icon in os.listdir(self.icons_dir) ]:
150		return open(os.path.join(self.icons_dir, os.path.basename(name))).read()
151		for e in self.entries:
152		if e.lower() == name.lower():
153		return CHMEntry(self, e, frontpage=self.frontpage).get()
154		else:
155		archmod.message(archmod.ERROR, 'NameError: There is no %s' % name)
156
157		def sub_mytag(self, re):
158		"""Replacing tagname with attribute"""
159		try:
160		res = eval('self.' + re.group(1))
161		except:
162		try:
163		res = eval(re.group(1))
164		except:
165		res = ''
166		return res
167
168		def get_template(self, name):
169		"""Get template file by it's name"""
170		if name == self.frontpage:
171		tpl = open(os.path.join(self.templates_dir, os.path.basename('index.html'))).read()
172		else:
173		tpl = open(os.path.join(self.templates_dir, os.path.basename(name))).read()
174		return re.sub('\<%(.+?)%\>', self.sub_mytag, tpl)
175
176		def process_templates(self, destdir="."):
177		"""Process templates"""
178		for template in self.templates:
179		open(os.path.join(destdir, os.path.basename(template)), 'w').write(self.get_template(template))
180		if self.frontpage not in self.templates:
181		open(os.path.join(destdir, os.path.basename(self.frontpage)), 'w').write(self.get_template('index.html'))
182		if not os.path.exists(os.path.join(destdir, 'icons/')):
183		shutil.copytree(os.path.join(self.icons_dir), os.path.join(destdir, 'icons/'))
184
185		def extract_entry(self, entry, output_file, destdir=".", correct=False):
186		# process output entry, remove first '/' in entry name
187		fname = string.lower(output_file).replace('/', '', 1)
188		# get directory name for file fname if any
189		dname = os.path.dirname(os.path.join(destdir, fname))
190		# if dname is a directory and it's not exist, than create it
191		if dname and not os.path.exists(dname):
192		os.makedirs(dname)
193		# otherwise write a file from CHM entry
194		if not os.path.isdir(os.path.join(destdir, fname)):
195		# filename encoding conversion
196		if self.fs_encoding:
197		fname = fname.decode('utf-8').encode(self.fs_encoding)
198		# write CHM entry content into the file, corrected or as is
199		if correct:
200		open(os.path.join(destdir, fname), 'w').writelines(CHMEntry(self, entry).correct())
201		else:
202		open(os.path.join(destdir, fname), 'w').writelines(CHMEntry(self, entry).get())
203
204		def extract_entries(self, entries=[], destdir=".", correct=False):
205		"""Extract raw CHM entries into the files"""
206		for e in entries:
207		# if entry is auxiliary file, than skip it
208		if re.match(self.aux_re, e):
209		continue
210		if PARENT_RE.search(e):
211		raise RuntimeError('Giving up on malicious name: %s' % e)
212		self.extract_entry(e, output_file=e, destdir=destdir, correct=correct)
213
214		def extract(self, destdir):
215		"""Extract CHM file content into FS"""
216		try:
217		# Create destination directory
218		os.mkdir(destdir)
219		# make raw content extraction
220		self.extract_entries(entries=self.entries, destdir=destdir)
221		# process templates
222		self.process_templates(destdir=destdir)
223		except OSError, error:
224		if error[0] == errno.EEXIST:
225		sys.exit('%s is already exists' % destdir)
226
227		def dump_html(self, output=sys.stdout):
228		"""Dump HTML data from CHM file into standard output"""
229		for e in self.html_files:
230		# if entry is auxiliary file, than skip it
231		if re.match(self.aux_re, e):
232		continue
233		print >> output, CHMEntry(self, e).get()
234
235		def chm2text(self, output=sys.stdout):
236		"""Convert CHM into Single Text file"""
237		for e in self.html_files:
238		# if entry is auxiliary file, than skip it
239		if re.match(self.aux_re, e):
240		continue
241		# to use this function you should have 'lynx' or 'elinks' installed
242		chmtotext(input=CHMEntry(self, e).get(), cmd=self.chmtotext, output=output)
243
244		def htmldoc(self, output, format=archmod.CHM2HTML):
245		"""CHM to other file formats converter using htmldoc"""
246		# Extract CHM content into temporary directory
247		output = output.replace(' ', '_')
248		tempdir = tempfile.mkdtemp(prefix=output.rsplit('.', 1)[0])
249		self.extract_entries(entries=self.html_files, destdir=tempdir, correct=True)
250		# List of temporary files
251		files = [ os.path.abspath(tempdir + file.lower()) for file in self.html_files ]
252		if format == archmod.CHM2HTML:
253		options = self.chmtohtml
254		# change output from single html file to a directory with html file and images
255		if self.image_files:
256		dirname = archmod.file2dir(output)
257		if os.path.exists(dirname):
258		sys.exit('%s is already exists' % dirname)
259		# Extract image files
260		os.mkdir(dirname)
261		# Extract all images
262		for key, value in self.image_files.items():
263		self.extract_entry(entry=key, output_file=value, destdir=dirname)
264		# Fix output file name
265		output = os.path.join(dirname, output)
266		elif format == archmod.CHM2PDF:
267		options = self.chmtopdf
268		if self.image_files:
269		# Extract all images
270		for key, value in self.image_files.items():
271		self.extract_entry(entry=key, output_file=key.lower(), destdir=tempdir)
272		htmldoc(files, self.htmldoc_exec, options, self.toclevels, output)
273		# Remove temporary files
274		shutil.rmtree(path=tempdir)
275
276
277		class CHMFile(CHMDir):
278		"""CHM file class derived from CHMDir"""
279
280		def _getitem(self, name):
281		# Overriding CHMDir.entries attribute
282		if name == 'entries':
283		entries = []
284		# get CHM file content and process it
285		for name in self._get_names(self._handler):
286		if (name == '/'):
287		continue
288		entries.append(name)
289		return entries
290		if name == '_handler':
291		return chmlib.chm_open(self.sourcename)
292		return super(CHMFile, self)._getitem(name)
293
294		def __delattr__(self, name):
295		# Closes CHM file handler on class destroying
296		if name == '_handler':
297		chmlib.chm_close(self._handler)
298		return super(CHMFile, self).__delattr__(name)
299
300		def _get_names(self, chmfile):
301		"""Get object's names inside CHM file"""
302		def get_name(chmfile, ui, content):
303		content.append(ui.path)
304		return chmlib.CHM_ENUMERATOR_CONTINUE
305
306		chmdir = []
307		if (chmlib.chm_enumerate(chmfile, chmlib.CHM_ENUMERATE_ALL, get_name, chmdir)) == 0:
308		sys.exit('UnknownError: CHMLIB or PyCHM bug?')
309		return chmdir
310
311
312		class CHMEntry(object):
313		"""Class for CHM file entry"""
314
315		def __init__(self, parent, name, frontpage='index.html'):
316		# parent CHM file
317		self.parent = parent
318		# object inside CHM file
319		self.name = name
320		# frontpage name to substitute
321		self.frontpage = os.path.basename(frontpage)
322
323		def read(self):
324		"""Read CHM entry content"""
325		# Check where parent instance is CHMFile or CHMDir
326		if isinstance(self.parent, CHMFile):
327		result, ui = chmlib.chm_resolve_object(self.parent._handler, self.name)
328		if (result != chmlib.CHM_RESOLVE_SUCCESS):
329		return None
330
331		size, content = chmlib.chm_retrieve_object(self.parent._handler, ui, 0l, ui.length)
332		if (size == 0):
333		return None
334		return content
335		else:
336		return open(self.parent.sourcename + self.name).read()
337
338		def lower_links(self, text):
339		"""Links to lower case"""
340		return re.sub('(?i)(href\|src)\s=\s([^\s\|>]+)', lambda m:m.group(0).lower(), text)
341
342		def add_restoreframing_js(self, name, text):
343		name = re.sub('/+', '/', name)
344		depth = name.count('/')
345
346		js = """<body><script language="javascript">
347		if ((window.name != "content") && (navigator.userAgent.indexOf("Opera") <= -1) )
348		document.write("<center><a href='%s%s?page=%s'>show framing</a></center>")
349		</script>""" % ( '../' * depth, self.frontpage, name )
350
351		return re.sub('(?i)<\sbody\s>', js, text)
352
353		def correct(self):
354		"""Get correct CHM entry content"""
355		data = self.read()
356		# If entry is a html page?
357		if re.search('(?i)\.html?$', self.name) and data is not None:
358		# lower-casing links if needed
359		if self.parent.filename_case:
360		data = self.lower_links(data)
361
362		# Delete unwanted HTML elements.
363		data = re.sub('<div .teamlib\.gif.\/div>', '', data)
364		data = re.sub('<a href.*>\[ Team LiB \]<\/a>', '', data)
365		data = re.sub('<table.larrow\.gif.rarrow\.gif.*<\/table>', '', data)
366		data = re.sub('<a href.next\.gif[^>]><\/a>', '' ,data)
367		data = re.sub('<a href.previous\.gif[^>]><\/a>', '', data)
368		data = re.sub('<a href.prev\.gif[^>]><\/a>', '', data)
369		data = re.sub('"[^"]*previous\.gif"', '""', data)
370		data = re.sub('"[^"]*prev\.gif"', '""', data)
371		data = re.sub('"[^"]*next\.gif"', '""', data)
372		if data is not None:
373		return data
374		else:
375		return ''
376
377		def get(self):
378		"""Get CHM entry content"""
379		# read entry content
380		data = self.read()
381		# If entry is a html page?
382		if re.search('(?i)\.html?$', self.name) and data is not None:
383		# lower-casing links if needed
384		if self.parent.filename_case:
385		data = self.lower_links(data)
386		# restore framing if that option is set in config file
387		if self.parent.restore_framing:
388		data = self.add_restoreframing_js(self.name[1:], data)
389		if data is not None:
390		return data
391		else:
392		return ''

+0

-221

~~archmod/CHMParser.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
4		#
5		# This program is free software; you can redistribute it and/or modify it under
6		# the terms of the GNU General Public License as published by the Free Software
7		# Foundation; either version 2 of the License, or (at your option) any later
8		# version.
9		#
10		# This program is distributed in the hope that it will be useful, but WITHOUT
11		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13		# details.
14		#
15		# You should have received a copy of the GNU General Public License along with
16		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
17		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
18		#
19
20		import re
21		import mimetypes
22		import sgmllib, urllib2
23
24		from BeautifulSoup import BeautifulSoup
25		from HTMLParser import HTMLParser, HTMLParseError
26		from urlparse import urlparse
27
28		from archmod import COMMASPACE, LF, CR
29
30		START_TAG = '['
31		END_TAG = ']'
32
33
34		class SitemapFile(object):
35		"""Sitemap file class"""
36
37		def __init__(self, lines):
38		# XXX: Cooking tasty beautiful soup ;-)
39		if lines:
40		soup = BeautifulSoup(lines)
41		lines = soup.prettify()
42		# XXX: Removing empty tags
43		lines = re.sub(re.compile(r'<ul>\s*</ul>', re.I \| re.M), '', lines)
44		lines = re.sub(re.compile(r'<li>\s*</li>', re.I \| re.M), '', lines)
45		self.lines = lines
46		else:
47		self.lines = None
48
49		def parse(self):
50		p = SitemapParser()
51		if self.lines:
52		p.feed(self.lines)
53		# parsed text + last bracket
54		return (p.parsed + LF + END_TAG)
55
56
57		class TagStack(list):
58		"""from book of David Mertz 'Text Processing in Python'"""
59
60		def append(self, tag):
61		# Remove every paragraph-level tag if this is one
62		if tag.lower() in ('p', 'blockquote'):
63		self = TagStack([ t for t in super if t not in ('p', 'blockquote') ])
64		super(TagStack, self).append(tag)
65
66		def pop(self, tag):
67		# 'Pop' by tag from nearest position, not only last item
68		self.reverse()
69		try:
70		pos = self.index(tag)
71		except ValueError:
72		raise HTMLParseError, 'Tag not on stack'
73		self[:] = self[pos + 1:]
74		self.reverse()
75
76
77		class SitemapParser(sgmllib.SGMLParser):
78		"""Class for parsing files in SiteMap format, such as .hhc"""
79
80		def __init__(self):
81		self.tagstack = TagStack()
82		self.in_obj = False
83		self.name = self.local = self.param = ""
84		self.imagenumber = 1
85		self.parsed = ""
86		sgmllib.SGMLParser.__init__(self)
87
88		def unknown_starttag(self, tag, attrs):
89		# first ul, start processing from here
90		if tag == 'ul' and not self.tagstack:
91		self.tagstack.append(tag)
92		# First bracket
93		self.parsed += LF + START_TAG
94
95		# if inside ul
96		elif self.tagstack:
97		if tag == 'li':
98		# append closing bracket if needed
99		if self.tagstack[-1] != 'ul':
100		self.parsed += END_TAG
101		self.tagstack.pop('li')
102		indent = ' ' * len(self.tagstack)
103
104		if self.parsed != LF + START_TAG:
105		self.parsed += COMMASPACE
106
107		self.parsed += LF + indent + START_TAG
108
109		if tag == 'object':
110		for x, y in attrs:
111		if x.lower() == 'type' and y.lower() == 'text/sitemap':
112		self.in_obj = True
113
114		if tag.lower() == 'param' and self.in_obj:
115		for x, y in attrs:
116		if x.lower() == 'name':
117		self.param = y.lower()
118		elif x.lower() == 'value':
119		if self.param == 'name' and not len(self.name):
120		# XXX: Remove LF and/or CR signs from name
121		self.name = y.replace(LF, '').replace(CR, '')
122		# XXX: Un-escaping double quotes :-)
123		self.name = self.name.replace('"', '\\"')
124		elif self.param == 'local':
125		# XXX: Change incorrect slashes in url
126		self.local = y.lower().replace('\\', '/').replace('..\\', '')
127		elif self.param == 'imagenumber':
128		self.imagenumber = y
129		self.tagstack.append(tag)
130
131		def unknown_endtag(self, tag):
132		# if inside ul
133		if self.tagstack:
134		if tag == 'ul':
135		self.parsed += END_TAG
136		if tag == 'object' and self.in_obj:
137		# "Link Name", "URL", "Icon"
138		self.parsed += "\"%s\", \"%s\", \"%s\"" % (self.name, self.local, self.imagenumber)
139		# Set to default values
140		self.in_obj = False
141		self.name = self.local = ""
142		self.imagenumber = 1
143		if tag != 'li':
144		self.tagstack.pop(tag)
145
146
147		class PageLister(sgmllib.SGMLParser):
148		"""
149		Parser of the chm.chm GetTopicsTree() method that retrieves the URL of the HTML
150		page embedded in the CHM file.
151		"""
152
153		def reset(self):
154		sgmllib.SGMLParser.reset(self)
155		self.pages = []
156
157		def start_param(self, attrs):
158		urlparam_flag = False
159		for key, value in attrs:
160		if key == 'name' and value.lower() == 'local':
161		urlparam_flag = True
162		if urlparam_flag and key == 'value':
163		# Sometime url has incorrect slashes
164		value = urllib2.unquote(urlparse(value.replace('\\', '/')).geturl())
165		value = '/' + re.sub("#.*$", '', value)
166		# Avoid duplicates
167		if not self.pages.count(value):
168		self.pages.append(value)
169
170
171		class ImageCatcher(sgmllib.SGMLParser):
172		"""
173		Finds image urls in the current html page, so to take them out from the chm file.
174		"""
175
176		def reset(self):
177		sgmllib.SGMLParser.reset(self)
178		self.imgurls = []
179
180		def start_img(self, attrs):
181		for key, value in attrs:
182		if key.lower() == 'src':
183		# Avoid duplicates in the list of image URLs.
184		if not self.imgurls.count('/' + value):
185		self.imgurls.append('/' + value)
186
187		def start_a(self, attrs):
188		for key, value in attrs:
189		if key.lower() == 'href':
190		url = urlparse(value)
191		value = urllib2.unquote(url.geturl())
192		# Remove unwanted crap
193		value = '/' + re.sub("#.*$", '', value)
194		# Check file's mimetype
195		type = mimetypes.guess_type(value)[0]
196		# Avoid duplicates in the list of image URLs.
197		if not url.scheme and not self.imgurls.count(value) and \
198		type and re.search('image/.*', type):
199		self.imgurls.append(value)
200
201
202		class TOCCounter(HTMLParser):
203		"""Count Table of Contents levels"""
204
205		count = 0
206
207		def __init__(self):
208		self.tagstack = TagStack()
209		HTMLParser.__init__(self)
210
211		def handle_starttag(self, tag, attrs):
212		self.tagstack.append(tag)
213
214		def handle_endtag(self, tag):
215		if self.tagstack:
216		if tag.lower() == 'object':
217		if self.count < self.tagstack.count('param'):
218		self.count = self.tagstack.count('param')
219		if tag.lower() != 'li':
220		self.tagstack.pop(tag)

+0

-61

~~archmod/CHMServer.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4		# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		#
6		# This program is free software; you can redistribute it and/or modify it under
7		# the terms of the GNU General Public License as published by the Free Software
8		# Foundation; either version 2 of the License, or (at your option) any later
9		# version.
10		#
11		# This program is distributed in the hope that it will be useful, but WITHOUT
12		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14		# details.
15		#
16		# You should have received a copy of the GNU General Public License along with
17		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
18		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
19		#
20
21		import urllib
22		import mimetypes
23
24		from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
25
26		import archmod
27
28
29		class CHMServer(HTTPServer):
30		"""HTTP Server that handle Compressed HTML"""
31
32		def __init__(self, CHM, name='', port=8000):
33		self.address = (name, port)
34		self.httpd = HTTPServer(self.address, CHMRequestHandler)
35		self.httpd.CHM = CHM
36		self.address = (name, port)
37
38		def run(self):
39		self.httpd.serve_forever()
40
41
42		class CHMRequestHandler(BaseHTTPRequestHandler):
43		"""This class handle HTTP request for CHMServer"""
44
45		def do_GET(self):
46		pagename = urllib.unquote(self.path.split('?')[0])
47		if pagename == '/':
48		mimetype = 'text/html'
49		else:
50		mimetype = mimetypes.guess_type(pagename)[0]
51
52		self.send_response(200)
53		self.send_header('Content-type', mimetype)
54		self.end_headers()
55
56		# get html data from CHM instance and write it into output
57		try:
58		self.wfile.write(self.server.CHM.get_entry(pagename))
59		except NameError, msg:
60		archmod.message(archmod.ERROR, 'NameError: %s' % msg)

+0

-48

~~archmod/Cached.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
4		#
5		# This program is free software; you can redistribute it and/or modify it under
6		# the terms of the GNU General Public License as published by the Free Software
7		# Foundation; either version 2 of the License, or (at your option) any later
8		# version.
9		#
10		# This program is distributed in the hope that it will be useful, but WITHOUT
11		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13		# details.
14		#
15		# You should have received a copy of the GNU General Public License along with
16		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
17		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
18		#
19
20		class Cached(object):
21		"""Provides caching storage for data access decoration.
22		Usage:
23		class CachedClass(Cached):
24		def _getitem(self, name):
25		# implement data getting routine, such as db access
26
27		CachedClass().attribute1 # returns value as if _getitem('attribute1') was called
28		CachedClass().attribute2 # returns value as if _getitem('attribute2') was called
29		CachedClass().__doc__ # returns real docstring
30		"""
31
32		def __new__(classtype, args, *kwargs):
33		__instance = object.__new__(classtype, args, *kwargs)
34		__instance.cache = {}
35		return __instance
36
37		# to be implemented by contract in the descendant classes
38		def _getitem(self, name):
39		raise Exception(NotImplemented)
40
41		def __getattribute__(self, name):
42		try:
43		return object.__getattribute__(self, name)
44		except:
45		if not self.cache.has_key(name):
46		self.cache[name] = self._getitem(name)
47		return self.cache[name]

+0

-100

~~archmod/__init__.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4		# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		#
6		# This program is free software; you can redistribute it and/or modify it under
7		# the terms of the GNU General Public License as published by the Free Software
8		# Foundation; either version 2 of the License, or (at your option) any later
9		# version.
10		#
11		# This program is distributed in the hope that it will be useful, but WITHOUT
12		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14		# details.
15		#
16		# You should have received a copy of the GNU General Public License along with
17		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
18		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
19		#
20		__all__ = ['CHM', 'CHMServer', 'mod_chm']
21		__version__ = '0.2.4'
22
23		import sys, os, pkg_resources
24
25		# Return codes
26		OK = 0
27		ERROR = 1
28
29		# Global variables
30		EXTRACT = 1 # Extract CHM content
31		HTTPSERVER = 2 # Act as standalone HTTP server
32		DUMPHTML = 3 # Dump CHM file as plain text
33		CHM2TXT = 4 # Convert CHM file into Single Text file
34		CHM2HTML = 5 # Convert CHM file into Single HTML file
35		CHM2PDF = 6 # Convert CHM file into PDF Document
36		#CHM2PS = 7 # Convert CHM file into PDF Document
37
38		# Special characters
39		COMMASPACE = ', '
40		LF = '\n'
41		CR = '\r'
42
43		# what config file to use - local or a system wide?
44		user_config = os.path.join(os.path.expanduser('~'), '.arch.conf')
45		if os.path.exists(user_config):
46		config = user_config
47		else:
48		config = pkg_resources.resource_filename('archmod', 'arch.conf')
49
50		# Miscellaneous auxiliary functions
51		def message(code=OK, msg=''):
52		outfp = sys.stdout
53		if code == ERROR:
54		outfp = sys.stderr
55		if msg:
56		print >> outfp, msg
57
58		def file2dir(filename):
59		"""Convert file filename.chm to filename_html directory"""
60		dirname = filename.rsplit('.', 1)[0] + '_' + 'html'
61		return dirname
62
63		def output_format(mode):
64		if mode == 'text':
65		return CHM2TXT
66		elif mode == 'html':
67		return CHM2HTML
68		elif mode == 'pdf':
69		return CHM2PDF
70		# elif mode == 'ps':
71		# return CHM2PS
72		else:
73		sys.exit('Invalid output file format: %s' % mode)
74
75		def output_file(filename, mode):
76		"""Convert filename.chm to filename.output"""
77		if mode == CHM2TXT:
78		file_ext = 'txt'
79		elif mode == CHM2HTML:
80		file_ext = 'html'
81		elif mode == CHM2PDF:
82		file_ext = 'pdf'
83		# elif mode == CHM2PS:
84		# file_ext = 'ps'
85		else:
86		file_ext = 'output'
87		output_filename = filename.rsplit('.', 1)[0] + '.' + file_ext
88		return output_filename
89
90		# Our own listdir method :)
91		def listdir(dir):
92		def f(res, dir, files):
93		for e in files:
94		d = '/'.join(dir.split('/')[1:])
95		if d: d += '/'
96		res.append(d + e)
97		res = []
98		os.path.walk(dir, f, res)
99		return res

+0

-74

~~archmod/arch.conf~~ less more

0		# Directory for templates, all files in that directory will be parsed
1		# and <%.+%> occurencies will be replaced with values from that
2		# file. For example, <%title%>, will be substituted by value of title
3		# variable.
4		# There is also some special variables, which have default values:
5		# contents - list, which represents chm file contents and deftopic -
6		# name of default page.
7		from os.path import basename, join
8		import pkg_resources
9
10		templates_dir = pkg_resources.resource_filename('archmod', 'templates/')
11
12		# Directory with icons
13		icons_dir = join(templates_dir, 'icons')
14
15		# List of auxiliary files, stored inside CHM file.
16		# Those files would not be extracted.
17		auxes = ('/#IDXHDR', '/#ITBITS', '/#STRINGS', '/#SYSTEM', '/#TOPICS',
18		'/#URLSTR', '/#URLTBL', '/#WINDOWS', '/$FIftiMain', '/$OBJINST',
19		'/$WWAssociativeLinks', '/$WWKeywordLinks', ':')
20
21		# Title. That is value, which you want to see in browser title.
22		# 'sourcename' is the name of source file.
23		title = basename(sourcename)
24
25		# Background and foreground colors for header.
26		bcolor = '#63baff'
27		fcolor = 'white'
28
29		# Filenames inside chm stored in utf-8, but links can be in some
30		# national codepage. If you set fs_encoding such links would be
31		# converted to it.
32		#
33		# Default: fs_encoding = 'utf-8'
34		fs_encoding = 'utf-8'
35
36		# If your filesystem is case-sensitive, links in the html can point to
37		# files that have differences in the case you need to set
38		# filename_case to 1 in that case :-)
39		#
40		# Default: filename_case=1
41		filename_case = 1
42
43		# If you want to add javascript code for restore framing to every
44		# page, set addframing.
45		#
46		# Default: restore_framing=1
47		restore_framing = 1
48
49		# Path to htmldoc executable
50		#
51		htmldoc_exec = '/usr/bin/htmldoc'
52
53		# CHM2TEXT converting. Use following command to convert CHM content to plain
54		# text file. Make sure that below apps are available on your system.
55		#chmtotext = 'lynx -dump -stdin'
56		chmtotext = '/usr/bin/elinks -dump'
57
58		# CHM2HTML converting. Use following command to convert CHM content to a single
59		# HTML file. Make sure that htmldoc is available on your system.
60		chmtohtml = '-t html -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --linkstyle underline --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow --quiet'
61
62		# CHM2PDF converting. Use following command to convert CHM content to a single
63		# PDF file. Make sure that htmldoc is available on your system.
64		chmtopdf = '-t pdf14 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet'
65
66		# CHM2PS converting. Use following command to convert CHM content to a single
67		# PostScript file. Make sure that htmldoc is available on your system.
68		#chmtops = '-t ps2 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle underline --size A4 --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow --quiet'
69
70		# Maximum Table of Content levels for htmldoc utility.
71		#
72		# Default: maxtoclvl = 4
73		maxtoclvl = 4

+0

-34

~~archmod/chmtotext.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
4		#
5		# This program is free software; you can redistribute it and/or modify it under
6		# the terms of the GNU General Public License as published by the Free Software
7		# Foundation; either version 2 of the License, or (at your option) any later
8		# version.
9		#
10		# This program is distributed in the hope that it will be useful, but WITHOUT
11		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13		# details.
14		#
15		# You should have received a copy of the GNU General Public License along with
16		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
17		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
18		#
19
20		"""CHM to Text converter (using external tool: lynx or elinks)"""
21
22		import sys
23		import signal
24		from subprocess import Popen, PIPE
25
26		signal.signal(signal.SIGPIPE, signal.SIG_DFL)
27
28
29		def chmtotext(input, cmd, output=sys.stdout):
30		"""CHM to Text converter"""
31		proc = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
32		proc.stdin.write(input)
33		print >> output, proc.communicate()[0]

+0

-174

~~archmod/cli.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4		# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		#
6		# This program is free software; you can redistribute it and/or modify it under
7		# the terms of the GNU General Public License as published by the Free Software
8		# Foundation; either version 2 of the License, or (at your option) any later
9		# version.
10		#
11		# This program is distributed in the hope that it will be useful, but WITHOUT
12		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14		# details.
15		#
16		# You should have received a copy of the GNU General Public License along with
17		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
18		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
19		#
20
21		"""arCHMage -- extensible reader and decompiler for files in the CHM format.
22
23		Usage: %(program)s [options] <chmfile> [destdir\|destfile]
24		Where:
25
26		-x / --extract
27		Extracts CHM file into specified directory. If destination
28		directory is omitted, than the new one will be created based
29		on name of CHM file. This options is by default.
30
31		-c format
32		--convert=format
33		Convert CHM file into specified file format. If destination
34		file is omitted, than the new one will be created based
35		on name of CHM file. Available formats:
36
37		html - Single HTML file
38		text - Plain Text file
39		pdf - Adobe PDF file format
40
41		-p number
42		--port=number
43		Acts as HTTP server on specified port number, so you can read
44		CHM file with your favorite browser. You can specify a directory
45		with decompressed content.
46
47		-d / --dump
48		Dump HTML data from CHM file into standard output.
49
50		-V / --version
51		Print version number and exit.
52
53		-h / --help
54		Print this text and exit.
55		"""
56
57		import os, sys
58		import getopt
59
60		import archmod
61		from archmod.CHM import CHMFile, CHMDir
62		from archmod.CHMServer import CHMServer
63
64
65		program = sys.argv[0]
66
67		def usage(code=archmod.OK, msg=''):
68		"""Show application usage and quit"""
69		archmod.message(code, __doc__ % globals())
70		archmod.message(code, msg)
71		sys.exit(code)
72
73
74		def parseargs():
75		try:
76		opts, args = getopt.getopt(sys.argv[1:], 'xc:dp:Vh',
77		['extract', 'convert=', 'dump', 'port=', 'version', 'help'])
78		except getopt.error, msg:
79		usage(archmod.ERROR, msg)
80
81		class Options:
82		mode = None # EXTRACT or HTTPSERVER or other
83		port = None # HTTP port number
84		chmfile = None # CHM File to view/extract
85		output = None # Output file or directory
86
87		options = Options()
88
89		for opt, arg in opts:
90		if opt in ('-h', '--help'):
91		usage()
92		elif opt in ('-V', '--version'):
93		archmod.message(archmod.OK, archmod.__version__)
94		sys.exit(archmod.OK)
95		elif opt in ('-p', '--port'):
96		if options.mode is not None:
97		sys.exit('-x and -p or -c are mutually exclusive')
98		options.mode = archmod.HTTPSERVER
99		try:
100		options.port = int(arg)
101		except ValueError, msg:
102		sys.exit('Invalid port number: %s' % msg)
103		elif opt in ('-c', '--convert'):
104		if options.mode is not None:
105		sys.exit('-x and -p or -c are mutually exclusive')
106		options.mode = archmod.output_format(str(arg))
107		elif opt in ('-x', '--extract'):
108		if options.mode is not None:
109		sys.exit('-x and -p or -c are mutually exclusive')
110		options.mode = archmod.EXTRACT
111		elif opt in ('-d', '--dump'):
112		if options.mode is not None:
113		sys.exit('-d should be used without any other options')
114		options.mode = archmod.DUMPHTML
115		else:
116		assert False, (opt, arg)
117
118		# Sanity checks
119		if options.mode is None:
120		# Set default option
121		options.mode = archmod.EXTRACT
122
123		if not args:
124		sys.exit('No CHM file was specified!')
125		else:
126		# Get CHM file name from command line
127		options.chmfile = args.pop(0)
128
129		# if CHM content should be extracted
130		if options.mode == archmod.EXTRACT:
131		if not args:
132		options.output = archmod.file2dir(options.chmfile)
133		else:
134		# get output directory from command line
135		options.output = args.pop(0)
136		# or converted into another file format
137		elif options.mode in (archmod.CHM2TXT, archmod.CHM2HTML, archmod.CHM2PDF):
138		if not args:
139		options.output = archmod.output_file(options.chmfile, options.mode)
140		else:
141		# get output filename from command line
142		options.output = args.pop(0)
143
144		# Any other arguments are invalid
145		if args:
146		sys.exit('Invalid arguments: ' + archmod.COMMASPACE.join(args))
147
148		return options
149
150
151		def main():
152		options = parseargs()
153		if not os.path.exists(options.chmfile):
154		sys.exit('No such file: %s' % options.chmfile)
155
156		# Check where is argument a CHM file or directory with decompressed
157		# content. Depending on results make 'source' instance of CHMFile or
158		# CHMDir class.
159		source = os.path.isfile(options.chmfile) and \
160		CHMFile(options.chmfile) or CHMDir(options.chmfile)
161
162		if options.mode == archmod.HTTPSERVER:
163		CHMServer(source, port=options.port).run()
164		elif options.mode == archmod.DUMPHTML:
165		source.dump_html()
166		elif options.mode == archmod.CHM2TXT:
167		if os.path.exists(options.output):
168		sys.exit('%s is already exists' % options.output)
169		source.chm2text(open(options.output, 'w'))
170		elif options.mode in (archmod.CHM2HTML, archmod.CHM2PDF):
171		source.htmldoc(options.output, options.mode)
172		elif options.mode == archmod.EXTRACT:
173		source.extract(options.output)

+0

-55

~~archmod/htmldoc.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net>
4		#
5		# This program is free software; you can redistribute it and/or modify it under
6		# the terms of the GNU General Public License as published by the Free Software
7		# Foundation; either version 2 of the License, or (at your option) any later
8		# version.
9		#
10		# This program is distributed in the hope that it will be useful, but WITHOUT
11		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13		# details.
14		#
15		# You should have received a copy of the GNU General Public License along with
16		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
17		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
18		#
19
20		"""Generic converter function"""
21
22		import os
23		import string
24		import tempfile
25		import subprocess
26		import archmod
27
28
29		def htmldoc(input, cmd, options, toclevels, output):
30		"""CHM to other format converter
31
32		input - list of input html files
33		cmd - full path to htmldoc command
34		options - htmldoc options from arch.conf
35		toclevels - number of ToC levels as htmldoc option
36		output - output file (single html, ps, pdf and etc)
37		"""
38		if toclevels:
39		toc = ('--toclevels %s' % (toclevels))
40		else:
41		toc = ('--no-toc')
42		options = options % {'output' : output, 'toc' : toc}
43		if input:
44		# Create a htmldoc file for batch processing
45		f = tempfile.NamedTemporaryFile(delete=False)
46		f.write('#HTMLDOC 1.8.27' + archmod.LF)
47		f.write(options + archmod.LF)
48		f.write(string.join(input, archmod.LF))
49		f.close()
50		# Prepare command line to execute
51		command = '%s --batch %s' % (cmd, f.name)
52		subprocess.call(command, shell=True)
53		# Unlink temporary htmldoc file
54		os.unlink(f.name)

+0

-66

~~archmod/mod_chm.py~~ less more

0		# -- coding: utf-8 --
1		#
2		# archmage -- CHM decompressor
3		# Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net>
4		# Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net>
5		#
6		# This program is free software; you can redistribute it and/or modify it under
7		# the terms of the GNU General Public License as published by the Free Software
8		# Foundation; either version 2 of the License, or (at your option) any later
9		# version.
10		#
11		# This program is distributed in the hope that it will be useful, but WITHOUT
12		# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13		# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14		# details.
15		#
16		# You should have received a copy of the GNU General Public License along with
17		# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
18		# Street, Fifth Floor, Boston, MA 02110-1301, USA.
19		#
20
21		from mod_python import apache
22		from mimetypes import guess_type
23		from archmod.CHM import CHMFile
24
25		chmfile = None
26		chmname = None
27
28
29		def handler(req):
30		source = req.filename
31		pagename = req.path_info
32
33		global chmfile, chmname
34
35		if chmname != source:
36		chmfile = CHMFile(source)
37
38		chmname = source
39
40		if pagename:
41		try:
42		page = chmfile.get_entry(pagename)
43		except:
44		return apache.HTTP_NOT_FOUND
45
46		if pagename == '/':
47		mimetype = 'text/html'
48		else:
49		mimetype = guess_type(pagename)[0] or 'application/octet-stream'
50
51		req.content_type = mimetype
52		req.send_http_header()
53
54		req.write(page)
55		else:
56		mimetype = 'application/chm'
57		req.content_type = mimetype
58		req.send_http_header()
59		file = open(source, 'rb')
60		while 1:
61		tmp = file.read(4096)
62		if len(tmp) == 0:
63		break
64		req.write(tmp)
65		return apache.OK

+0

-171

~~archmod/templates/arch_contents.html~~ less more

0		<html>
1		<head>
2		<title><%title%></title>
3		<LINK rel="Stylesheet" type="text/css" href="arch_css.css">
4		</head>
5
6		<body onload="setInterval('getLoc()', 500);">
7		<script>
8		var lastDoc;
9		var contents = <%contents%>;
10
11		var w=window,d=document
12		var icons={'0' : 'icons/0.gif','1' : 'icons/90.gif',
13		'2' : 'icons/91.gif', '3' : 'icons/92.gif', '4' : 'icons/99.gif',
14		'18' : 'icons/93.gif', '19' : 'icons/94.gif', '20' : 'icons/97.gif',
15		'26' : 'icons/95.gif', '27' : 'icons/96.gif', '28' : 'icons/98.gif'}
16
17		var dhtml=true
18		try{if(d.body.innerHTML.length<=0)dhtml=false}
19		catch(e){dhtml=false;}
20		var tree=[];
21
22		get_element=d.all ?
23		function(id){return d.all[id]}
24		:
25		function(id){return d.getElementById(id)}
26
27		function get_img1(){
28		return icons[((this.childs.length ? 16 : 0)+(this.childs.length && this.opened ? 8 : 0)+(this.is_last()? 1 : 0)+(this.is_first()? 2 : 0)+2)]
29		}
30		function get_img2(){
31		n=this.cnt[2]
32		if(n<9){
33		n=(this.opened ? ( n%2 ? parseInt(n)+1 : n ) : ( n%2 ? n : parseInt(n)-1 ))
34		}
35		return 'icons/'+n+'.gif'
36		}
37		function node(tree,n){
38		this.ind=tree.ind+1
39		this.cnt=tree.cnt[n+(this.ind ? 3 : 0)]
40		if(!this.cnt)return
41		this.tree=tree.tree
42		this.parent=tree
43		this.opened=!dhtml
44		this.nind=this.tree.nodes.length
45		this.tree.nodes[this.nind]=this
46		tree.childs[n]=this
47		this.childs=[]
48		for(var i=0;i < this.cnt.length - 2;i++)
49		new node(this,i)
50		this.get_img1=get_img1
51		this.get_img2=get_img2
52		this.open=open
53		this.select=select
54		this.init=init
55		this.is_last=function(){
56		return n==this.parent.childs.length - 1
57		}
58		this.is_first=function(){
59		return(this.ind==0)&&(n==0)&&(!this.is_last())
60		}
61		}
62
63		function open(){
64		var childs=[]
65		var el=get_element('divCont'+this.nind)
66		if(!el)return
67		if(!dhtml){
68		d.write(childs.join(''))
69		for(var i=0;i < this.childs.length;i++){
70		d.write(this.childs[i].init())
71		this.childs[i].open()
72		}
73		}
74		else{
75		if(!el.innerHTML){
76		for(var i=0;i < this.childs.length;i++)
77		childs[i]=this.childs[i].init()
78		el.innerHTML=childs.join('')
79		}
80		el.style.display=(this.opened ? 'none' : 'block')
81		this.opened=!this.opened
82		var img1=d.images['img1_'+this.nind],img2=d.images['img2_'+this.nind]
83		if(img1)img1.src=this.get_img1()
84		if(img2)img2.src=this.get_img2()
85		}
86		}
87
88
89		function select(nind){
90		if(!nind){
91		var sel=this.tree.sel
92		this.tree.sel=this
93		if(sel)sel.select(true)
94		}
95		var img2=d.images['img2_'+this.nind]
96		if(img2)img2.src=this.get_img2()
97		get_element('el'+this.nind).style.fontWeight=nind ? 'normal' : 'bold'
98		return Boolean(this.cnt[1])
99		}
100
101		function init(){
102		var temp=[],par=this.parent
103		for(var i=this.ind;i>0;i--){
104		temp[i]='<img src="'+icons[par.is_last()? 0 : 1]+'" border="0" align="absbottom">'
105		par=par.parent
106		}
107		r='<table cellpadding="0" cellspacing="0" border="0">'
108		r+='<tr><td nowrap>'
109		r+=temp.join('')
110		r+=(this.childs.length ?(!dhtml ? '' : '<a href="javascript: tree.toggle('+this.nind+')" >')+'<img src="'+this.get_img1()+'" border="0" align="absbottom" name="img1_'+this.nind+'">'+(!dhtml ? '' : '</a>'): '<img src="'+this.get_img1()+'" border="0" align="absbottom">')
111		r+='<a href="'+this.cnt[1]+'" target="'+'content'+'"'+' title="'+this.cnt[0]+'" onclick="return tree.select('+this.nind+')" '+(!dhtml ? '' : ' ondblclick="tree.toggle('+this.nind+')"')+' class="small" id="el'+this.nind+'"><img src="'+this.get_img2()+'" border="0" align="absbottom" name="img2_'+this.nind+'"> '+this.cnt[0]+'</a>'
112		r+='</td></tr></table>'
113		r+=(this.childs.length ? '<div id="divCont'+this.nind+'" style="display:none"></div>' : '')
114		return r
115		}
116
117		function draw_contents(cnt){
118		tree=this;
119		tree.cnt=cnt;
120		tree.tree=this;
121		tree.nodes=[];
122		tree.sel=null;
123		tree.ind=-1;
124
125		tree.select=function(i){
126		return tree.nodes[i].select();
127		};
128		tree.toggle=function(i){
129		tree.nodes[i].open()
130		};
131		tree.childs=[]
132		for(var i=0;i<cnt.length;i++){
133		new node(tree,i)
134		}
135		tree.nind=0;
136
137		for(var i=0;i < tree.childs.length;i++){
138		d.write(tree.childs[i].init());
139		if(!dhtml)tree.childs[i].open();
140		}
141		}
142
143
144		function getLoc(){
145		var doc = ""+parent.frames[1].location;
146		if(doc != lastDoc){
147		var keyVals = new Array();
148		keyVals = doc.split("\/");
149		var targetPage = ""+keyVals[(keyVals.length-1)];
150
151		if(targetPage.indexOf("\#") > 0){
152		targetPage = targetPage.substr(0,targetPage.indexOf("\#"));
153		}
154
155		nodeCount = 0;
156		while( (""+tree.nodes[nodeCount].cnt[1]).lastIndexOf(targetPage) < 0){
157		nodeCount++;
158		}
159		parentNode = tree.nodes[nodeCount].parent;
160		if(parentNode != tree && parentNode.opened == false){
161		parentNode.open();
162		}
163		tree.nodes[nodeCount].select();
164		lastDoc = doc;
165		}
166		}
167		new draw_contents(contents);
168		</script>
169		</body>
170		</html>

+0

-2

~~archmod/templates/arch_css.css~~ less more

0		.small { font-size: x-small; }
1		.htable { margin: 0; border: none; padding: 0 }⏎

+0

-26

~~archmod/templates/arch_frameset.html~~ less more

0		<html>
1		<head>
2		<title><%title%></title>
3
4		<script>
5		var qs=location.search.substr(1)
6		var A=qs.split("&")
7		var B=null
8		var F="<%deftopic%>"
9		for(var i=0;i<A.length;i++){B=A[i].split("=");A[i]=[B[0],B[1]]}
10		for(var j=0;j<A.length;j++){if(A[j][0]=='page'){ F=A[j][1];break}}
11		</script >
12		</head>
13		<script>
14		document.write('<frameset cols="200,*" bordercolor="<%bcolor%>" frameborder="yes" framespacing="2" >')
15		document.write('<frame name="toc" src="arch_contents.html">')
16		document.write('<frame name="content" src="'+F+'" >')
17		document.write('</frameset>');
18		</script>
19		<noscript>
20		<frameset cols="200,*" bordercolor="<%bcolor%>" frameborder="yes" framespacing="2" >
21		<frame name="toc" src="arch_contents.html" >
22		<frame name="content" src="<%deftopic%>">
23		</frameset>
24		</noscript>
25		</html>

+0

-12

~~archmod/templates/arch_header.html~~ less more

0		<html>
1		<head>
2		<title><%title%></title>
3		<LINK rel="Stylesheet" type="text/css" href="arch_css.css">
4		</head>
5		<body bgcolor="<%bcolor%>">
6		<table class='htable' cellpadding="0" cellspacing="0" width="100%"><td>
7		<td align="center" width="100%">
8		<b><font size="large" color="<%fcolor%>"><%title%></font></b>
9		</table>
10		</body>
11		</html>

~~archmod/templates/icons/0.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/1.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/10.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/11.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/12.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/13.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/14.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/15.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/16.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/17.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/18.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/19.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/2.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/20.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/21.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/22.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/23.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/24.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/25.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/26.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/27.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/3.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/35.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/37.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/39.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/4.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/5.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/6.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/7.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/8.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/9.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/90.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/91.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/92.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/93.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/94.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/95.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/96.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/97.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/98.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/99.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/next.gif~~ less more

Binary diff not shown

~~archmod/templates/icons/prev.gif~~ less more

Binary diff not shown

+0

-39

~~archmod/templates/index.html~~ less more

0		<html>
1		<head>
2		<script>var pageid="";</script>
3
4		<title><%title%></title>
5
6		<script>
7		function IsOpera(){return navigator.userAgent.indexOf("Opera")>-1}
8		var qs=location.search.substr(1);
9		var A=qs.split("&")
10		var B=null
11		var F="<%deftopic%>";
12		for(var i=0;i<A.length;i++){
13		B=A[i].split("=")
14		A[i]=[B[0],B[1]]
15		}
16		for(var j=0;j<A.length;j++){
17		if(A[j][0]=='page'){
18		F=A[j][1]
19		break
20		}
21		}
22		if (IsOpera()) F = '';</script>
23		</head>
24
25		<script>
26		document.write('<frameset rows="30,*" frameborder="no" framespacing="0" border="0" >')
27		document.write('<frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no" >')
28		if(F!='')F='?page='+F
29		document.write('<frame name="main" src="arch_frameset.html'+F+'">')
30		document.write('</frameset>')
31		</script>
32		<noscript>
33		<frameset rows="30,*" frameborder="no" framespacing="0" border="0" >
34		<frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no">
35		<frame name="main" src="arch_frameset.html" >
36		</frameset>
37		</noscript>
38		</html>

+0

-1

setup.cfg less more

0	0	[egg_info]
1	1	tag_build =
2	2	tag_date = 0
3		tag_svn_revision = 0
4	3

+5

-5

setup.py less more

0	0	#!/usr/bin/env python
1	1
2	2	from setuptools import setup, find_packages
3		import version
4	3
5	4	long_desc='''arCHMage is a reader and decompressor for CHM format'''
6	5

15	14
16	15	setup(
17	16	name='archmage',
18		version=version.getVersion(),
	17	version='0.4.0',
19	18	description='CHM decompressor',
20	19	maintainer='Mikhail Gusarov',
21	20	maintainer_email='dottedmag@dottedmag.net',

27	26	packages=find_packages(),
28	27	install_requires=[
29	28	'pychm',
30		'BeautifulSoup',
	29	'beautifulsoup4',
	30	'sgmllib3k',
31	31	],
32	32	entry_points={
33		'console_scripts': ['archmage = archmod.cli:main'],
	33	'console_scripts': ['archmage = archmage.cli:main'],
34	34	},
35	35	package_data={
36		'archmod': ['.conf', 'templates/.html', 'templates/*.css',
	36	'archmage': ['.conf', 'templates/.html', 'templates/*.css',
37	37	'templates/icons/*.gif'],
38	38	}
39	39	)

+0

-123

~~version.py~~ less more

0		# -- coding: utf-8 --
1
2		"""Calculates the current version number.
3
4		If possible, uses output of “git describe” modified to conform to the
5		visioning scheme that setuptools uses (see PEP 386). Releases must be
6		labelled with annotated tags (signed tags are annotated) of the following
7		format:
8
9		v<num>(.<num>)+ [ {a\|b\|c\|rc} <num> (.<num>)* ]
10
11		If “git describe” returns an error (likely because we're in an unpacked copy
12		of a release tarball, rather than a git working copy), or returns a tag that
13		does not match the above format, version is read from RELEASE-VERSION file.
14
15		To use this script, simply import it your setup.py file, and use the results
16		of getVersion() as your package version:
17
18		import version
19		setup(
20		version=version.getVersion(),
21		.
22		.
23		.
24		)
25
26		This will automatically update the RELEASE-VERSION file. The RELEASE-VERSION
27		file should not be checked into git but it should be included in sdist
28		tarballs (as should version.py file). To do this, run:
29
30		echo include RELEASE-VERSION version.py >>MANIFEST.in
31		echo RELEASE-VERSION >>.gitignore
32
33		With that setup, a new release can be labelled by simply invoking:
34
35		git tag -s v1.0
36		"""
37
38		__author__ = ('Douglas Creager <dcreager@dcreager.net>',
39		'Michal Nazarewicz <mina86@mina86.com>')
40		__license__ = 'This file is placed into the public domain.'
41		__maintainer__ = 'Michal Nazarewicz'
42		__email__ = 'mina86@mina86.com'
43
44		__all__ = ('getVersion')
45
46
47		import re
48		import subprocess
49		import sys
50
51
52		RELEASE_VERSION_FILE = 'RELEASE-VERSION'
53
54		# http://www.python.org/dev/peps/pep-0386/
55		_PEP386_SHORT_VERSION_RE = r'\d+(?:\.\d+)+(?:(?:[abc]\|rc)\d+(?:\.\d+)*)?'
56		_PEP386_VERSION_RE = r'^%s(?:\.post\d+)?(?:\.dev\d+)?$' % (
57		_PEP386_SHORT_VERSION_RE)
58		_GIT_DESCRIPTION_RE = r'^(?P<ver>%s)-(?P<commits>\d+)-g(?P<sha>[\da-f]+)$' % (
59		_PEP386_SHORT_VERSION_RE)
60
61
62		def readGitVersion():
63		try:
64		proc = subprocess.Popen(('git', 'describe', '--long',
65		'--match', '[0-9].'),
66		stdout=subprocess.PIPE, stderr=subprocess.PIPE)
67		data, _ = proc.communicate()
68		if proc.returncode:
69		return None
70		ver = data.splitlines()[0].strip()
71		except:
72		return None
73
74		if not ver:
75		return None
76		m = re.search(_GIT_DESCRIPTION_RE, ver)
77		if not m:
78		sys.stderr.write('version: git description (%s) is invalid, '
79		'ignoring\n' % ver)
80		return None
81
82		commits = int(m.group('commits'))
83		if not commits:
84		return m.group('ver')
85		else:
86		return '%s.post%d.dev%d' % (
87		m.group('ver'), commits, int(m.group('sha'), 16))
88
89
90		def readReleaseVersion():
91		try:
92		fd = open(RELEASE_VERSION_FILE)
93		try:
94		ver = fd.readline().strip()
95		finally:
96		fd.close()
97		if not re.search(_PEP386_VERSION_RE, ver):
98		sys.stderr.write('version: release version (%s) is invalid, '
99		'will use it anyway\n' % ver)
100		return ver
101		except:
102		return None
103
104
105		def writeReleaseVersion(version):
106		fd = open(RELEASE_VERSION_FILE, 'w')
107		fd.write('%s\n' % version)
108		fd.close()
109
110
111		def getVersion():
112		release_version = readReleaseVersion()
113		version = readGitVersion() or release_version
114		if not version:
115		raise ValueError('Cannot find the version number')
116		if version != release_version:
117		writeReleaseVersion(version)
118		return version
119
120
121		if __name__ == '__main__':
122		print(getVersion())