New upstream version 0.4.0
Mikhail Gusarov
4 years ago
0 | 0 | Copyright (c) 2003 Eugeny Korekin <az@ftc.ru> |
1 | 1 | Copyright (c) 2005-2009 Basil Shubin <basil.shubin@gmail.com> |
2 | Copyright (c) 2015 Mikhail Gusarov <dottedmag@dottedmag.net> | |
2 | Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net> |
0 | include archmod/arch.conf | |
1 | recursive-include archmod/templates *.html *.css *.gif | |
2 | include AUTHORS COPYING INSTALL NEWS README archmage.1 | |
3 | include RELEASE-VERSION version.py | |
0 | include archmage/arch.conf | |
1 | recursive-include archmage/templates *.html *.css *.gif | |
2 | include AUTHORS COPYING NEWS README.md archmage.1 |
0 | arCHMage 0.4 | |
1 | ============ | |
2 | Changes: | |
3 | ||
4 | * Works with Python 3.5+ (#10). | |
5 | ||
6 | Bugfixes: | |
7 | ||
8 | * Fix HTML conversion under Windows (#6). | |
9 | ||
10 | Removals: | |
11 | ||
12 | * mod_chm and option -p were removed. Extract CHM files to the filesystem and | |
13 | use a real HTTP server to serve them. | |
14 | ||
0 | 15 | arCHMage 0.3.1 |
1 | 16 | ============== |
2 | 17 | Bug fixes: |
0 | Metadata-Version: 1.1 | |
0 | Metadata-Version: 1.2 | |
1 | 1 | Name: archmage |
2 | Version: 0.3.1 | |
2 | Version: 0.4.0 | |
3 | 3 | Summary: CHM decompressor |
4 | 4 | Home-page: https://github.com/dottedmag/archmage |
5 | Author: Mikhail Gusarov | |
6 | Author-email: dottedmag@dottedmag.net | |
5 | Maintainer: Mikhail Gusarov | |
6 | Maintainer-email: dottedmag@dottedmag.net | |
7 | 7 | License: GPLv2+ |
8 | 8 | Description: arCHMage is a reader and decompressor for CHM format |
9 | 9 | Keywords: chm,HTML Help,Compiled HTML,Compressed HTML |
0 | arCHMage | |
1 | ======== | |
2 | ||
3 | arCHMage converts CHM files to HTML, plain text and PDF. CHM is the format used | |
4 | by Microsoft HTML Help, also known as Compiled HTML. | |
5 | ||
6 | [![Latest Version](https://img.shields.io/pypi/v/archmage.svg)](https://pypi.python.org/pypi/archmage/) | |
7 | [![Downloads](https://img.shields.io/pypi/dm/archmage.svg)](https://pypi.python.org/pypi/archmage/) | |
8 | [![License](https://img.shields.io/github/license/dottedmag/archmage.svg)](https://pypi.python.org/pypi/archmage/) | |
9 | ||
10 | Usage | |
11 | ===== | |
12 | ||
13 | Extract CHM content into directory | |
14 | ---------------------------------- | |
15 | ||
16 | archmage -x <chmfile> [output directory] | |
17 | ||
18 | Extraction does not overwrite existing directories. | |
19 | ||
20 | Dump HTML data from CHM | |
21 | ----------------------- | |
22 | ||
23 | archmage -d <chmfile> | |
24 | ||
25 | Convert CHM file into another format | |
26 | ------------------------------------ | |
27 | ||
28 | archmage -c (html|text|pdf) <chmfile> [output file] | |
29 | ||
30 | This feature requires `htmldoc(1)`, and `lynx(1)` or `elinks(1)` installed. | |
31 | ||
32 | Installation | |
33 | ============ | |
34 | ||
35 | pip install archmage | |
36 | ||
37 | Requirements | |
38 | ============ | |
39 | ||
40 | arCHMage has the following dependencies: | |
41 | ||
42 | * Python 3.5+ | |
43 | * PyCHM | |
44 | * BeautifulSoup4 | |
45 | ||
46 | Optional dependencies: | |
47 | ||
48 | * htmldoc - converting to plain text, single HTML, PDF formats | |
49 | (Debian/Ubuntu: `htmldoc`) | |
50 | * Lynx or ELinks - converting to plain text | |
51 | (Debian/Ubuntu: `lynx`) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net> | |
6 | # | |
7 | # This program is free software; you can redistribute it and/or modify it under | |
8 | # the terms of the GNU General Public License as published by the Free Software | |
9 | # Foundation; either version 2 of the License, or (at your option) any later | |
10 | # version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
13 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
15 | # details. | |
16 | # | |
17 | # You should have received a copy of the GNU General Public License along with | |
18 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
19 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | # | |
21 | ||
22 | import os | |
23 | import sys | |
24 | import re | |
25 | import shutil | |
26 | import errno | |
27 | import string | |
28 | import tempfile | |
29 | from enum import Enum, auto | |
30 | ||
31 | import archmage | |
32 | ||
33 | from archmage.CHMParser import SitemapFile, PageLister, ImageCatcher, TOCCounter#, HeadersCounter | |
34 | ||
35 | # import PyCHM bindings | |
36 | try: | |
37 | from chm import chmlib | |
38 | except ImportError as msg: | |
39 | sys.exit('ImportError: %s\nPlease check README file for system requirements.' % msg) | |
40 | ||
41 | # External file converters | |
42 | from archmage.chmtotext import chmtotext | |
43 | from archmage.htmldoc import htmldoc | |
44 | ||
45 | class Action(Enum): | |
46 | EXTRACT = auto() | |
47 | DUMPHTML = auto() | |
48 | CHM2TXT = auto() | |
49 | CHM2HTML = auto() | |
50 | CHM2PDF = auto() | |
51 | ||
52 | PARENT_RE = re.compile(r'(^|/|\\)\.\.(/|\\|$)') | |
53 | ||
54 | class CHMFile: | |
55 | """Class that represent CHM content from directory""" | |
56 | ||
57 | def __init__(self, name): | |
58 | self.cache = {} | |
59 | # Name of source directory with CHM content | |
60 | self.sourcename = name | |
61 | self._chm = chmlib.chm_open(name) | |
62 | # Import variables from config file into namespace | |
63 | exec(compile(open(archmage.config, "rb").read(), archmage.config, 'exec'), self.__dict__) | |
64 | ||
65 | # build regexp from the list of auxiliary files | |
66 | self.aux_re = '|'.join([ re.escape(s) for s in self.auxes ]) | |
67 | ||
68 | # Get and parse 'Table of Contents' | |
69 | try: | |
70 | self.topicstree = self.topics() | |
71 | except AttributeError: | |
72 | self.topicstree = None | |
73 | self.contents = SitemapFile(self.topicstree).parse() | |
74 | ||
75 | def close(self): | |
76 | chmlib.chm_close(self._chm) | |
77 | ||
78 | def entries(self): | |
79 | if 'entries' not in self.cache: | |
80 | self.cache['entries'] = self._entries() | |
81 | return self.cache['entries'] | |
82 | ||
83 | def _entries(self): | |
84 | def get_name(chmfile, ui, out): | |
85 | path = ui.path.decode('utf-8') | |
86 | if path != '/': | |
87 | out.append(path) | |
88 | return chmlib.CHM_ENUMERATOR_CONTINUE | |
89 | ||
90 | out = [] | |
91 | if chmlib.chm_enumerate(self._chm, chmlib.CHM_ENUMERATE_ALL, get_name, out) == 0: | |
92 | sys.exit('UnknownError: CHMLIB or PyCHM bug?') | |
93 | return out | |
94 | ||
95 | # retrieves the list of HTML files contained into the CHM file, **in order** | |
96 | # (that's the important bit). | |
97 | # (actually performed by the PageLister class) | |
98 | def html_files(self): | |
99 | if 'html_files' not in self.cache: | |
100 | self.cache['html_files'] = self._html_files() | |
101 | return self.cache['html_files'] | |
102 | ||
103 | def _html_files(self): | |
104 | lister = PageLister() | |
105 | lister.feed(self.topicstree) | |
106 | return lister.pages | |
107 | ||
108 | # retrieves the list of images urls contained into the CHM file. | |
109 | # (actually performed by the ImageCatcher class) | |
110 | def image_urls(self): | |
111 | if 'image_urls' not in self.cache: | |
112 | self.cache['image_urls'] = self._image_urls() | |
113 | return self.cache['image_urls'] | |
114 | ||
115 | def _image_urls(self): | |
116 | out = [] | |
117 | image_catcher = ImageCatcher() | |
118 | for file in self.html_files(): | |
119 | image_catcher.feed(CHMEntry(self, file).correct()) | |
120 | for image_url in image_catcher.imgurls: | |
121 | if not out.count(image_url): | |
122 | out.append(image_url) | |
123 | return out | |
124 | ||
125 | # retrieves a dictionary of actual file entries and corresponding urls into the CHM file | |
126 | def image_files(self): | |
127 | if 'image_files' not in self.cache: | |
128 | self.cache['image_files'] = self._image_files() | |
129 | return self.cache['image_files'] | |
130 | ||
131 | def _image_files(self): | |
132 | out = {} | |
133 | for image_url in self.image_urls(): | |
134 | for entry in self.entries(): | |
135 | if re.search(image_url, entry.lower()) and entry.lower() not in out: | |
136 | out.update({entry : image_url}) | |
137 | return out | |
138 | ||
139 | # Get topics file | |
140 | def topics(self): | |
141 | if 'topics' not in self.cache: | |
142 | self.cache['topics'] = self._topics() | |
143 | return self.cache['topics'] | |
144 | ||
145 | def _topics(self): | |
146 | for e in self.entries(): | |
147 | if e.lower().endswith('.hhc'): | |
148 | return CHMEntry(self, e, frontpage=self.frontpage()).get() | |
149 | ||
150 | # use first page as deftopic. Note: without heading slash | |
151 | def deftopic(self): | |
152 | if 'deftopic' not in self.cache: | |
153 | self.cache['deftopic'] = self._deftopic() | |
154 | return self.cache['deftopic'] | |
155 | ||
156 | def _deftopic(self): | |
157 | if self.html_files()[0].startswith('/'): | |
158 | return self.html_files()[0].replace('/', '', 1).lower() | |
159 | return self.html_files()[0].lower() | |
160 | ||
161 | # Get frontpage name | |
162 | def frontpage(self): | |
163 | if 'frontpage' not in self.cache: | |
164 | self.cache['frontpage'] = self._frontpage() | |
165 | return self.cache['frontpage'] | |
166 | ||
167 | def _frontpage(self): | |
168 | frontpage = os.path.join('/', 'index.html') | |
169 | index = 2 # index2.html and etc. | |
170 | for filename in self.entries(): | |
171 | if frontpage == filename: | |
172 | frontpage = os.path.join('/', ('index%s.html' % index)) | |
173 | index += 1 | |
174 | return frontpage | |
175 | ||
176 | # Get all templates files | |
177 | def templates(self): | |
178 | if 'templates' not in self.cache: | |
179 | self.cache['templates'] = self._templates() | |
180 | return self.cache['templates'] | |
181 | ||
182 | def _templates(self): | |
183 | out = [] | |
184 | for file in os.listdir(self.templates_dir): | |
185 | if os.path.isfile(os.path.join(self.templates_dir, file)): | |
186 | if os.path.join('/', file) not in self.entries(): | |
187 | out.append(os.path.join('/', file)) | |
188 | return out | |
189 | ||
190 | # Get ToC levels | |
191 | def toclevels(self): | |
192 | if 'toclevels' not in self.cache: | |
193 | self.cache['toclevels'] = self._toclevels() | |
194 | return self.cache['toclevels'] | |
195 | ||
196 | def _toclevels(self): | |
197 | counter = TOCCounter() | |
198 | counter.feed(self.topicstree) | |
199 | if counter.count > self.maxtoclvl: | |
200 | return self.maxtoclvl | |
201 | else: | |
202 | return counter.count | |
203 | ||
204 | def get_template(self, name): | |
205 | """Get template file by its name""" | |
206 | if name == self.frontpage(): | |
207 | tpl = open(os.path.join(self.templates_dir, 'index.html')).read() | |
208 | else: | |
209 | tpl = open(os.path.join(self.templates_dir, os.path.basename(name))).read() | |
210 | params = { | |
211 | 'title': self.title, | |
212 | 'contents': self.contents, | |
213 | 'deftopic': self.deftopic(), | |
214 | 'bcolor': self.bcolor, | |
215 | 'fcolor': self.fcolor, | |
216 | } | |
217 | return string.Template(tpl).substitute(params) | |
218 | ||
219 | def process_templates(self, destdir="."): | |
220 | """Process templates""" | |
221 | for template in self.templates(): | |
222 | open(os.path.join(destdir, os.path.basename(template)), 'w').write(self.get_template(template)) | |
223 | if self.frontpage() not in self.templates(): | |
224 | open(os.path.join(destdir, os.path.basename(self.frontpage())), 'w').write(self.get_template('index.html')) | |
225 | if not os.path.exists(os.path.join(destdir, 'icons/')): | |
226 | shutil.copytree(os.path.join(self.icons_dir), os.path.join(destdir, 'icons/')) | |
227 | ||
228 | def extract_entry(self, entry, output_file, destdir=".", correct=False): | |
229 | # process output entry, remove first '/' in entry name | |
230 | fname = output_file.lower().replace('/', '', 1) | |
231 | # get directory name for file fname if any | |
232 | dname = os.path.dirname(os.path.join(destdir, fname)) | |
233 | # if dname is a directory and it's not exist, than create it | |
234 | if dname and not os.path.exists(dname): | |
235 | os.makedirs(dname) | |
236 | # otherwise write a file from CHM entry | |
237 | if not os.path.isdir(os.path.join(destdir, fname)): | |
238 | # write CHM entry content into the file, corrected or as is | |
239 | if correct: | |
240 | open(os.path.join(destdir, fname), 'wb').write(CHMEntry(self, entry).correct()) | |
241 | else: | |
242 | open(os.path.join(destdir, fname), 'wb').write(CHMEntry(self, entry).get()) | |
243 | ||
244 | def extract_entries(self, entries=[], destdir=".", correct=False): | |
245 | """Extract raw CHM entries into the files""" | |
246 | for e in entries: | |
247 | # if entry is auxiliary file, than skip it | |
248 | if re.match(self.aux_re, e): | |
249 | continue | |
250 | if PARENT_RE.search(e): | |
251 | raise RuntimeError('Giving up on malicious name: %s' % e) | |
252 | self.extract_entry(e, output_file=e, destdir=destdir, correct=correct) | |
253 | ||
254 | def extract(self, destdir): | |
255 | """Extract CHM file content into FS""" | |
256 | try: | |
257 | # Create destination directory | |
258 | os.mkdir(destdir) | |
259 | # make raw content extraction | |
260 | self.extract_entries(entries=self.entries(), destdir=destdir) | |
261 | # process templates | |
262 | self.process_templates(destdir=destdir) | |
263 | except OSError as error: | |
264 | if error.errno == errno.EEXIST: | |
265 | sys.exit('%s is already exists' % destdir) | |
266 | ||
267 | def dump_html(self, output=sys.stdout): | |
268 | """Dump HTML data from CHM file into standard output""" | |
269 | for e in self.html_files(): | |
270 | # if entry is auxiliary file, than skip it | |
271 | if re.match(self.aux_re, e): | |
272 | continue | |
273 | print(CHMEntry(self, e).get(), file=output) | |
274 | ||
275 | def chm2text(self, output=sys.stdout): | |
276 | """Convert CHM into Single Text file""" | |
277 | for e in self.html_files(): | |
278 | # if entry is auxiliary file, than skip it | |
279 | if re.match(self.aux_re, e): | |
280 | continue | |
281 | # to use this function you should have 'lynx' or 'elinks' installed | |
282 | chmtotext(input=CHMEntry(self, e).get(), cmd=self.chmtotext, output=output) | |
283 | ||
284 | def htmldoc(self, output, format=Action.CHM2HTML): | |
285 | """CHM to other file formats converter using htmldoc""" | |
286 | # Extract CHM content into temporary directory | |
287 | output = output.replace(' ', '_') | |
288 | tempdir = tempfile.mkdtemp(prefix=output.rsplit('.', 1)[0]) | |
289 | self.extract_entries(entries=self.html_files(), destdir=tempdir, correct=True) | |
290 | # List of temporary files | |
291 | files = [ os.path.abspath(tempdir + file.lower()) for file in self.html_files() ] | |
292 | if format == Action.CHM2HTML: | |
293 | options = self.chmtohtml | |
294 | # change output from single html file to a directory with html file and images | |
295 | if self.image_files(): | |
296 | dirname = archmage.file2dir(output) | |
297 | if os.path.exists(dirname): | |
298 | sys.exit('%s is already exists' % dirname) | |
299 | # Extract image files | |
300 | os.mkdir(dirname) | |
301 | # Extract all images | |
302 | for key, value in list(self.image_files().items()): | |
303 | self.extract_entry(entry=key, output_file=value, destdir=dirname) | |
304 | # Fix output file name | |
305 | output = os.path.join(dirname, output) | |
306 | elif format == Action.CHM2PDF: | |
307 | options = self.chmtopdf | |
308 | if self.image_files(): | |
309 | # Extract all images | |
310 | for key, value in list(self.image_files().items()): | |
311 | self.extract_entry(entry=key, output_file=key.lower(), destdir=tempdir) | |
312 | htmldoc(files, self.htmldoc_exec, options, self.toclevels, output) | |
313 | # Remove temporary files | |
314 | shutil.rmtree(path=tempdir) | |
315 | ||
316 | class CHMEntry(object): | |
317 | """Class for CHM file entry""" | |
318 | ||
319 | def __init__(self, parent, name, frontpage='index.html'): | |
320 | # parent CHM file | |
321 | self.parent = parent | |
322 | # object inside CHM file | |
323 | self.name = name | |
324 | # frontpage name to substitute | |
325 | self.frontpage = os.path.basename(frontpage) | |
326 | ||
327 | def read(self): | |
328 | """Read CHM entry content""" | |
329 | result, ui = chmlib.chm_resolve_object(self.parent._chm, self.name.encode('utf-8')) | |
330 | if result != chmlib.CHM_RESOLVE_SUCCESS: | |
331 | return None | |
332 | ||
333 | size, content = chmlib.chm_retrieve_object(self.parent._chm, ui, 0, ui.length) | |
334 | if size == 0: | |
335 | return None | |
336 | return content | |
337 | ||
338 | def lower_links(self, text): | |
339 | """Links to lower case""" | |
340 | return re.sub(b'(?i)(href|src)\s*=\s*([^\s|>]+)', lambda m:m.group(0).lower(), text) | |
341 | ||
342 | def add_restoreframing_js(self, name, text): | |
343 | name = re.sub('/+', '/', name) | |
344 | depth = name.count('/') | |
345 | ||
346 | js = b"""<body><script language="javascript"> | |
347 | if (window.name != "content") | |
348 | document.write("<center><a href='%s%s?page=%s'>show framing</a></center>") | |
349 | </script>""" % ( b'../' * depth, self.frontpage.encode('utf8'), name.encode('utf8') ) | |
350 | ||
351 | return re.sub(b'(?i)<\s*body\s*>', js, text) | |
352 | ||
353 | def correct(self): | |
354 | """Get correct CHM entry content""" | |
355 | data = self.read() | |
356 | # If entry is a html page? | |
357 | if re.search('(?i)\.html?$', self.name) and data is not None: | |
358 | # lower-casing links if needed | |
359 | if self.parent.filename_case: | |
360 | data = self.lower_links(data) | |
361 | ||
362 | # Delete unwanted HTML elements. | |
363 | data = re.sub('<div .*teamlib\.gif.*\/div>', '', data) | |
364 | data = re.sub('<a href.*>\[ Team LiB \]<\/a>', '', data) | |
365 | data = re.sub('<table.*larrow\.gif.*rarrow\.gif.*<\/table>', '', data) | |
366 | data = re.sub('<a href.*next\.gif[^>]*><\/a>', '' ,data) | |
367 | data = re.sub('<a href.*previous\.gif[^>]*><\/a>', '', data) | |
368 | data = re.sub('<a href.*prev\.gif[^>]*><\/a>', '', data) | |
369 | data = re.sub('"[^"]*previous\.gif"', '""', data) | |
370 | data = re.sub('"[^"]*prev\.gif"', '""', data) | |
371 | data = re.sub('"[^"]*next\.gif"', '""', data) | |
372 | if data is not None: | |
373 | return data | |
374 | else: | |
375 | return '' | |
376 | ||
377 | def get(self): | |
378 | """Get CHM entry content""" | |
379 | # read entry content | |
380 | data = self.read() | |
381 | # If entry is a html page? | |
382 | if re.search('(?i)\.html?$', self.name) and data is not None: | |
383 | # lower-casing links if needed | |
384 | if self.parent.filename_case: | |
385 | data = self.lower_links(data) | |
386 | # restore framing if that option is set in config file | |
387 | if self.parent.restore_framing: | |
388 | data = self.add_restoreframing_js(self.name[1:], data) | |
389 | if data is not None: | |
390 | return data | |
391 | else: | |
392 | return '' |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net> | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free Software | |
8 | # Foundation; either version 2 of the License, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
14 | # details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along with | |
17 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | # | |
20 | ||
21 | import re | |
22 | import mimetypes | |
23 | import sgmllib, urllib.request, urllib.error, urllib.parse | |
24 | ||
25 | from bs4 import BeautifulSoup, UnicodeDammit | |
26 | from html.parser import HTMLParser | |
27 | from urllib.parse import urlparse | |
28 | ||
29 | START_TAG = '[' | |
30 | END_TAG = ']' | |
31 | ||
32 | ||
33 | class SitemapFile(object): | |
34 | """Sitemap file class""" | |
35 | ||
36 | def __init__(self, lines): | |
37 | # XXX: Cooking tasty beautiful soup ;-) | |
38 | if lines: | |
39 | soup = BeautifulSoup(lines, 'html.parser') | |
40 | lines = soup.prettify() | |
41 | # XXX: Removing empty tags | |
42 | lines = re.sub(re.compile(r'<ul>\s*</ul>', re.I | re.M), '', lines) | |
43 | lines = re.sub(re.compile(r'<li>\s*</li>', re.I | re.M), '', lines) | |
44 | self.lines = lines | |
45 | else: | |
46 | self.lines = None | |
47 | ||
48 | def parse(self): | |
49 | p = SitemapParser() | |
50 | if self.lines: | |
51 | p.feed(self.lines) | |
52 | # parsed text + last bracket | |
53 | return (p.parsed + '\n' + END_TAG) | |
54 | ||
55 | ||
56 | class TagStack(list): | |
57 | """from book of David Mertz 'Text Processing in Python'""" | |
58 | ||
59 | def append(self, tag): | |
60 | # Remove every paragraph-level tag if this is one | |
61 | if tag.lower() in ('p', 'blockquote'): | |
62 | self = TagStack([ t for t in super if t not in ('p', 'blockquote') ]) | |
63 | super(TagStack, self).append(tag) | |
64 | ||
65 | def pop(self, tag): | |
66 | # 'Pop' by tag from nearest position, not only last item | |
67 | self.reverse() | |
68 | try: | |
69 | pos = self.index(tag) | |
70 | except ValueError: | |
71 | raise Error('Tag not on stack') | |
72 | self[:] = self[pos + 1:] | |
73 | self.reverse() | |
74 | ||
75 | ||
76 | class SitemapParser(sgmllib.SGMLParser): | |
77 | """Class for parsing files in SiteMap format, such as .hhc""" | |
78 | ||
79 | def __init__(self): | |
80 | self.tagstack = TagStack() | |
81 | self.in_obj = False | |
82 | self.name = self.local = self.param = "" | |
83 | self.imagenumber = 1 | |
84 | self.parsed = "" | |
85 | sgmllib.SGMLParser.__init__(self) | |
86 | ||
87 | def unknown_starttag(self, tag, attrs): | |
88 | # first ul, start processing from here | |
89 | if tag == 'ul' and not self.tagstack: | |
90 | self.tagstack.append(tag) | |
91 | # First bracket | |
92 | self.parsed += '\n' + START_TAG | |
93 | ||
94 | # if inside ul | |
95 | elif self.tagstack: | |
96 | if tag == 'li': | |
97 | # append closing bracket if needed | |
98 | if self.tagstack[-1] != 'ul': | |
99 | self.parsed += END_TAG | |
100 | self.tagstack.pop('li') | |
101 | indent = ' ' * len(self.tagstack) | |
102 | ||
103 | if self.parsed != '\n' + START_TAG: | |
104 | self.parsed += ', ' | |
105 | ||
106 | self.parsed += '\n' + indent + START_TAG | |
107 | ||
108 | if tag == 'object': | |
109 | for x, y in attrs: | |
110 | if x.lower() == 'type' and y.lower() == 'text/sitemap': | |
111 | self.in_obj = True | |
112 | ||
113 | if tag.lower() == 'param' and self.in_obj: | |
114 | for x, y in attrs: | |
115 | if x.lower() == 'name': | |
116 | self.param = y.lower() | |
117 | elif x.lower() == 'value': | |
118 | if self.param == 'name' and not len(self.name): | |
119 | # XXX: Remove LF and/or CR signs from name | |
120 | self.name = y.replace('\n', '').replace('\r', '') | |
121 | # XXX: Un-escaping double quotes :-) | |
122 | self.name = self.name.replace('"', '\\"') | |
123 | elif self.param == 'local': | |
124 | # XXX: Change incorrect slashes in url | |
125 | self.local = y.lower().replace('\\', '/').replace('..\\', '') | |
126 | elif self.param == 'imagenumber': | |
127 | self.imagenumber = y | |
128 | self.tagstack.append(tag) | |
129 | ||
130 | def unknown_endtag(self, tag): | |
131 | # if inside ul | |
132 | if self.tagstack: | |
133 | if tag == 'ul': | |
134 | self.parsed += END_TAG | |
135 | if tag == 'object' and self.in_obj: | |
136 | # "Link Name", "URL", "Icon" | |
137 | self.parsed += "\"%s\", \"%s\", \"%s\"" % (self.name, self.local, self.imagenumber) | |
138 | # Set to default values | |
139 | self.in_obj = False | |
140 | self.name = self.local = "" | |
141 | self.imagenumber = 1 | |
142 | if tag != 'li': | |
143 | self.tagstack.pop(tag) | |
144 | ||
145 | ||
146 | class PageLister(sgmllib.SGMLParser): | |
147 | """ | |
148 | Parser of the chm.chm GetTopicsTree() method that retrieves the URL of the HTML | |
149 | page embedded in the CHM file. | |
150 | """ | |
151 | ||
152 | def reset(self): | |
153 | sgmllib.SGMLParser.reset(self) | |
154 | self.pages = [] | |
155 | ||
156 | def feed(self, data): | |
157 | sgmllib.SGMLParser.feed(self, UnicodeDammit(data).unicode_markup) | |
158 | ||
159 | def start_param(self, attrs): | |
160 | urlparam_flag = False | |
161 | for key, value in attrs: | |
162 | if key == 'name' and value.lower() == 'local': | |
163 | urlparam_flag = True | |
164 | if urlparam_flag and key == 'value': | |
165 | # Sometime url has incorrect slashes | |
166 | value = urllib.parse.unquote(urlparse(value.replace('\\', '/')).geturl()) | |
167 | value = '/' + re.sub("#.*$", '', value) | |
168 | # Avoid duplicates | |
169 | if not self.pages.count(value): | |
170 | self.pages.append(value) | |
171 | ||
172 | ||
173 | class ImageCatcher(sgmllib.SGMLParser): | |
174 | """ | |
175 | Finds image urls in the current html page, so to take them out from the chm file. | |
176 | """ | |
177 | ||
178 | def reset(self): | |
179 | sgmllib.SGMLParser.reset(self) | |
180 | self.imgurls = [] | |
181 | ||
182 | def start_img(self, attrs): | |
183 | for key, value in attrs: | |
184 | if key.lower() == 'src': | |
185 | # Avoid duplicates in the list of image URLs. | |
186 | if not self.imgurls.count('/' + value): | |
187 | self.imgurls.append('/' + value) | |
188 | ||
189 | def start_a(self, attrs): | |
190 | for key, value in attrs: | |
191 | if key.lower() == 'href': | |
192 | url = urlparse(value) | |
193 | value = urllib.parse.unquote(url.geturl()) | |
194 | # Remove unwanted crap | |
195 | value = '/' + re.sub("#.*$", '', value) | |
196 | # Check file's mimetype | |
197 | type = mimetypes.guess_type(value)[0] | |
198 | # Avoid duplicates in the list of image URLs. | |
199 | if not url.scheme and not self.imgurls.count(value) and \ | |
200 | type and re.search('image/.*', type): | |
201 | self.imgurls.append(value) | |
202 | ||
203 | ||
204 | class TOCCounter(HTMLParser): | |
205 | """Count Table of Contents levels""" | |
206 | ||
207 | count = 0 | |
208 | ||
209 | def __init__(self): | |
210 | self.tagstack = TagStack() | |
211 | HTMLParser.__init__(self) | |
212 | ||
213 | def handle_starttag(self, tag, attrs): | |
214 | self.tagstack.append(tag) | |
215 | ||
216 | def handle_endtag(self, tag): | |
217 | if self.tagstack: | |
218 | if tag.lower() == 'object': | |
219 | if self.count < self.tagstack.count('param'): | |
220 | self.count = self.tagstack.count('param') | |
221 | if tag.lower() != 'li': | |
222 | self.tagstack.pop(tag) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net> | |
6 | # | |
7 | # This program is free software; you can redistribute it and/or modify it under | |
8 | # the terms of the GNU General Public License as published by the Free Software | |
9 | # Foundation; either version 2 of the License, or (at your option) any later | |
10 | # version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
13 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
15 | # details. | |
16 | # | |
17 | # You should have received a copy of the GNU General Public License along with | |
18 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
19 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | # | |
21 | __all__ = ['CHM'] | |
22 | __version__ = '0.4.0' | |
23 | ||
24 | import sys, os, pkg_resources | |
25 | ||
26 | # what config file to use - local or a system wide? | |
27 | user_config = os.path.join(os.path.expanduser('~'), '.arch.conf') | |
28 | if os.path.exists(user_config): | |
29 | config = user_config | |
30 | else: | |
31 | config = pkg_resources.resource_filename('archmage', 'arch.conf') | |
32 | ||
33 | def file2dir(filename): | |
34 | """Convert file filename.chm to filename_html directory""" | |
35 | dirname = filename.rsplit('.', 1)[0] + '_' + 'html' | |
36 | return dirname |
0 | from os.path import basename, join | |
1 | import pkg_resources | |
2 | ||
3 | # Directory for templates | |
4 | templates_dir = pkg_resources.resource_filename('archmage', 'templates/') | |
5 | ||
6 | # Directory with icons | |
7 | icons_dir = join(templates_dir, 'icons') | |
8 | ||
9 | # List of auxiliary files, stored inside CHM file. | |
10 | # Those files would not be extracted. | |
11 | auxes = ('/#IDXHDR', '/#ITBITS', '/#STRINGS', '/#SYSTEM', '/#TOPICS', | |
12 | '/#URLSTR', '/#URLTBL', '/#WINDOWS', '/$FIftiMain', '/$OBJINST', | |
13 | '/$WWAssociativeLinks', '/$WWKeywordLinks', ':') | |
14 | ||
15 | # Title. That is value, which you want to see in browser title. | |
16 | # 'sourcename' is the name of source file. | |
17 | title = basename(sourcename) | |
18 | ||
19 | # Background and foreground colors for header. | |
20 | bcolor = '#63baff' | |
21 | fcolor = 'white' | |
22 | ||
23 | # Filenames inside chm stored in utf-8, but links can be in some | |
24 | # national codepage. If you set fs_encoding such links would be | |
25 | # converted to it. | |
26 | # | |
27 | # Default: fs_encoding = 'utf-8' | |
28 | fs_encoding = 'utf-8' | |
29 | ||
30 | # If your filesystem is case-sensitive, links in the html can point to | |
31 | # files that have differences in the case you need to set | |
32 | # filename_case to 1 in that case :-) | |
33 | # | |
34 | # Default: filename_case=1 | |
35 | filename_case = 1 | |
36 | ||
37 | # If you want to add javascript code for restore framing to every | |
38 | # page, set addframing. | |
39 | # | |
40 | # Default: restore_framing=1 | |
41 | restore_framing = 1 | |
42 | ||
43 | # Path to htmldoc executable | |
44 | # | |
45 | htmldoc_exec = '/usr/bin/htmldoc' | |
46 | ||
47 | # CHM2TEXT converting. Use following command to convert CHM content to plain | |
48 | # text file. Make sure that below apps are available on your system. | |
49 | #chmtotext = 'lynx -dump -stdin' | |
50 | chmtotext = '/usr/bin/elinks -dump' | |
51 | ||
52 | # CHM2HTML converting. Use following command to convert CHM content to a single | |
53 | # HTML file. Make sure that htmldoc is available on your system. | |
54 | chmtohtml = '-t html -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --linkstyle underline --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow --quiet' | |
55 | ||
56 | # CHM2PDF converting. Use following command to convert CHM content to a single | |
57 | # PDF file. Make sure that htmldoc is available on your system. | |
58 | chmtopdf = '-t pdf14 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet' | |
59 | ||
60 | # Maximum Table of Content levels for htmldoc utility. | |
61 | # | |
62 | # Default: maxtoclvl = 4 | |
63 | maxtoclvl = 4 |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # | |
5 | # This program is free software; you can redistribute it and/or modify it under | |
6 | # the terms of the GNU General Public License as published by the Free Software | |
7 | # Foundation; either version 2 of the License, or (at your option) any later | |
8 | # version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
12 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
13 | # details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License along with | |
16 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
17 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
18 | # | |
19 | ||
20 | """CHM to Text converter (using external tool: lynx or elinks)""" | |
21 | ||
22 | import sys | |
23 | import signal | |
24 | from subprocess import Popen, PIPE | |
25 | ||
26 | if sys.platform != "win32": | |
27 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) | |
28 | ||
29 | def chmtotext(input, cmd, output=sys.stdout): | |
30 | """CHM to Text converter""" | |
31 | proc = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True) | |
32 | proc.stdin.write(input) | |
33 | print(proc.communicate()[0], file=output) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # Copyright (c) 2015,2019 Mikhail Gusarov <dottedmag@dottedmag.net> | |
6 | # | |
7 | # This program is free software; you can redistribute it and/or modify it under | |
8 | # the terms of the GNU General Public License as published by the Free Software | |
9 | # Foundation; either version 2 of the License, or (at your option) any later | |
10 | # version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
13 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
15 | # details. | |
16 | # | |
17 | # You should have received a copy of the GNU General Public License along with | |
18 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
19 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
20 | # | |
21 | ||
22 | """arCHMage -- extensible reader and decompiler for files in the CHM format. | |
23 | ||
24 | Usage: %(program)s [options] <chmfile> [destdir|destfile] | |
25 | Where: | |
26 | ||
27 | -x / --extract | |
28 | Extracts CHM file into specified directory. If destination | |
29 | directory is omitted, than the new one will be created based | |
30 | on name of CHM file. This options is by default. | |
31 | ||
32 | -c format | |
33 | --convert=format | |
34 | Convert CHM file into specified file format. If destination | |
35 | file is omitted, than the new one will be created based | |
36 | on name of CHM file. Available formats: | |
37 | ||
38 | html - Single HTML file | |
39 | text - Plain Text file | |
40 | pdf - Adobe PDF file format | |
41 | ||
42 | -d / --dump | |
43 | Dump HTML data from CHM file into standard output. | |
44 | ||
45 | -V / --version | |
46 | Print version number and exit. | |
47 | ||
48 | -h / --help | |
49 | Print this text and exit. | |
50 | """ | |
51 | ||
52 | import os, sys | |
53 | import getopt | |
54 | ||
55 | import archmage | |
56 | from archmage.CHM import CHMFile, Action | |
57 | ||
58 | # Return codes | |
59 | OK = 0 | |
60 | ERROR = 1 | |
61 | ||
62 | program = sys.argv[0] | |
63 | ||
64 | # Miscellaneous auxiliary functions | |
65 | def message(code=OK, msg=''): | |
66 | outfp = sys.stdout | |
67 | if code == ERROR: | |
68 | outfp = sys.stderr | |
69 | if msg: | |
70 | print(msg, file=outfp) | |
71 | ||
72 | def usage(code=OK, msg=''): | |
73 | """Show application usage and quit""" | |
74 | message(code, __doc__ % globals()) | |
75 | message(code, msg) | |
76 | sys.exit(code) | |
77 | ||
78 | def output_format(mode): | |
79 | if mode == 'text': | |
80 | return CHM2TXT | |
81 | elif mode == 'html': | |
82 | return CHM2HTML | |
83 | elif mode == 'pdf': | |
84 | return CHM2PDF | |
85 | else: | |
86 | sys.exit('Invalid output file format: %s' % mode) | |
87 | ||
88 | def output_file(filename, mode): | |
89 | """Convert filename.chm to filename.output""" | |
90 | if mode == CHM2TXT: | |
91 | file_ext = 'txt' | |
92 | elif mode == CHM2HTML: | |
93 | file_ext = 'html' | |
94 | elif mode == CHM2PDF: | |
95 | file_ext = 'pdf' | |
96 | else: | |
97 | file_ext = 'output' | |
98 | output_filename = filename.rsplit('.', 1)[0] + '.' + file_ext | |
99 | return output_filename | |
100 | ||
101 | def parseargs(): | |
102 | try: | |
103 | opts, args = getopt.getopt(sys.argv[1:], 'xc:dp:Vh', | |
104 | ['extract', 'convert=', 'dump', 'port=', 'version', 'help']) | |
105 | except getopt.error as msg: | |
106 | usage(ERROR, msg) | |
107 | ||
108 | class Options: | |
109 | mode = None # EXTRACT or other | |
110 | chmfile = None # CHM File to view/extract | |
111 | output = None # Output file or directory | |
112 | ||
113 | options = Options() | |
114 | ||
115 | for opt, arg in opts: | |
116 | if opt in ('-h', '--help'): | |
117 | usage() | |
118 | elif opt in ('-V', '--version'): | |
119 | message(OK, archmage.__version__) | |
120 | sys.exit(OK) | |
121 | elif opt in ('-c', '--convert'): | |
122 | if options.mode is not None: | |
123 | sys.exit('-x and -c are mutually exclusive') | |
124 | options.mode = output_format(str(arg)) | |
125 | elif opt in ('-x', '--extract'): | |
126 | if options.mode is not None: | |
127 | sys.exit('-x and -c are mutually exclusive') | |
128 | options.mode = Action.EXTRACT | |
129 | elif opt in ('-d', '--dump'): | |
130 | if options.mode is not None: | |
131 | sys.exit('-d should be used without any other options') | |
132 | options.mode = Action.DUMPHTML | |
133 | else: | |
134 | assert False, (opt, arg) | |
135 | ||
136 | # Sanity checks | |
137 | if options.mode is None: | |
138 | # Set default option | |
139 | options.mode = Action.EXTRACT | |
140 | ||
141 | if not args: | |
142 | sys.exit('No CHM file was specified!') | |
143 | else: | |
144 | # Get CHM file name from command line | |
145 | options.chmfile = args.pop(0) | |
146 | ||
147 | # if CHM content should be extracted | |
148 | if options.mode == Action.EXTRACT: | |
149 | if not args: | |
150 | options.output = archmage.file2dir(options.chmfile) | |
151 | else: | |
152 | # get output directory from command line | |
153 | options.output = args.pop(0) | |
154 | # or converted into another file format | |
155 | elif options.mode in (Action.CHM2TXT, Action.CHM2HTML, Action.CHM2PDF): | |
156 | if not args: | |
157 | options.output = output_file(options.chmfile, options.mode) | |
158 | else: | |
159 | # get output filename from command line | |
160 | options.output = args.pop(0) | |
161 | ||
162 | # Any other arguments are invalid | |
163 | if args: | |
164 | sys.exit('Invalid arguments: ' + ', '.join(args)) | |
165 | ||
166 | return options | |
167 | ||
168 | ||
169 | def main(): | |
170 | options = parseargs() | |
171 | if not os.path.exists(options.chmfile): | |
172 | sys.exit('No such file: %s' % options.chmfile) | |
173 | ||
174 | if os.path.isdir(options.chmfile): | |
175 | sys.exit('A regular files is expected, got directory: %s' % options.chmfile) | |
176 | ||
177 | source = CHMFile(options.chmfile) | |
178 | ||
179 | if options.mode == Action.DUMPHTML: | |
180 | source.dump_html() | |
181 | elif options.mode == Action.CHM2TXT: | |
182 | if os.path.exists(options.output): | |
183 | sys.exit('%s is already exists' % options.output) | |
184 | source.chm2text(open(options.output, 'w')) | |
185 | elif options.mode in (Action.CHM2HTML, Action.CHM2PDF): | |
186 | source.htmldoc(options.output, options.mode) | |
187 | elif options.mode == Action.EXTRACT: | |
188 | source.extract(options.output) | |
189 | ||
190 | source.close() |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # | |
5 | # This program is free software; you can redistribute it and/or modify it under | |
6 | # the terms of the GNU General Public License as published by the Free Software | |
7 | # Foundation; either version 2 of the License, or (at your option) any later | |
8 | # version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
12 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
13 | # details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License along with | |
16 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
17 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
18 | # | |
19 | ||
20 | """Generic converter function""" | |
21 | ||
22 | import os | |
23 | import string | |
24 | import tempfile | |
25 | import subprocess | |
26 | import archmage | |
27 | ||
28 | ||
29 | def htmldoc(input, cmd, options, toclevels, output): | |
30 | """CHM to other format converter | |
31 | ||
32 | input - list of input html files | |
33 | cmd - full path to htmldoc command | |
34 | options - htmldoc options from arch.conf | |
35 | toclevels - number of ToC levels as htmldoc option | |
36 | output - output file (single html, ps, pdf and etc) | |
37 | """ | |
38 | if toclevels: | |
39 | toc = ('--toclevels %s' % (toclevels)) | |
40 | else: | |
41 | toc = ('--no-toc') | |
42 | options = options % {'output' : output, 'toc' : toc} | |
43 | if input: | |
44 | # Create a htmldoc file for batch processing | |
45 | f = tempfile.NamedTemporaryFile(delete=False) | |
46 | f.write('#HTMLDOC 1.8.27\n') | |
47 | f.write(options + '\n') | |
48 | f.write(string.join(input, '\n')) | |
49 | f.close() | |
50 | # Prepare command line to execute | |
51 | command = '%s --batch %s' % (cmd, f.name) | |
52 | subprocess.call(command, shell=True) | |
53 | # Unlink temporary htmldoc file | |
54 | os.unlink(f.name) |
0 | <html> | |
1 | <head> | |
2 | <title>$title</title> | |
3 | <LINK rel="Stylesheet" type="text/css" href="arch_css.css"> | |
4 | </head> | |
5 | ||
6 | <body onload="setInterval('getLoc()', 500);"> | |
7 | <script> | |
8 | var lastDoc; | |
9 | var contents = $contents; | |
10 | ||
11 | var w=window,d=document | |
12 | var icons={'0' : 'icons/0.gif','1' : 'icons/90.gif', | |
13 | '2' : 'icons/91.gif', '3' : 'icons/92.gif', '4' : 'icons/99.gif', | |
14 | '18' : 'icons/93.gif', '19' : 'icons/94.gif', '20' : 'icons/97.gif', | |
15 | '26' : 'icons/95.gif', '27' : 'icons/96.gif', '28' : 'icons/98.gif'} | |
16 | ||
17 | var dhtml=true | |
18 | try{if(d.body.innerHTML.length<=0)dhtml=false} | |
19 | catch(e){dhtml=false;} | |
20 | var tree=[]; | |
21 | ||
22 | get_element=d.all ? | |
23 | function(id){return d.all[id]} | |
24 | : | |
25 | function(id){return d.getElementById(id)} | |
26 | ||
27 | function get_img1(){ | |
28 | return icons[((this.childs.length ? 16 : 0)+(this.childs.length && this.opened ? 8 : 0)+(this.is_last()? 1 : 0)+(this.is_first()? 2 : 0)+2)] | |
29 | } | |
30 | function get_img2(){ | |
31 | n=this.cnt[2] | |
32 | if(n<9){ | |
33 | n=(this.opened ? ( n%2 ? parseInt(n)+1 : n ) : ( n%2 ? n : parseInt(n)-1 )) | |
34 | } | |
35 | return 'icons/'+n+'.gif' | |
36 | } | |
37 | function node(tree,n){ | |
38 | this.ind=tree.ind+1 | |
39 | this.cnt=tree.cnt[n+(this.ind ? 3 : 0)] | |
40 | if(!this.cnt)return | |
41 | this.tree=tree.tree | |
42 | this.parent=tree | |
43 | this.opened=!dhtml | |
44 | this.nind=this.tree.nodes.length | |
45 | this.tree.nodes[this.nind]=this | |
46 | tree.childs[n]=this | |
47 | this.childs=[] | |
48 | for(var i=0;i < this.cnt.length - 2;i++) | |
49 | new node(this,i) | |
50 | this.get_img1=get_img1 | |
51 | this.get_img2=get_img2 | |
52 | this.open=open | |
53 | this.select=select | |
54 | this.init=init | |
55 | this.is_last=function(){ | |
56 | return n==this.parent.childs.length - 1 | |
57 | } | |
58 | this.is_first=function(){ | |
59 | return(this.ind==0)&&(n==0)&&(!this.is_last()) | |
60 | } | |
61 | } | |
62 | ||
63 | function open(){ | |
64 | var childs=[] | |
65 | var el=get_element('divCont'+this.nind) | |
66 | if(!el)return | |
67 | if(!dhtml){ | |
68 | d.write(childs.join('')) | |
69 | for(var i=0;i < this.childs.length;i++){ | |
70 | d.write(this.childs[i].init()) | |
71 | this.childs[i].open() | |
72 | } | |
73 | } | |
74 | else{ | |
75 | if(!el.innerHTML){ | |
76 | for(var i=0;i < this.childs.length;i++) | |
77 | childs[i]=this.childs[i].init() | |
78 | el.innerHTML=childs.join('') | |
79 | } | |
80 | el.style.display=(this.opened ? 'none' : 'block') | |
81 | this.opened=!this.opened | |
82 | var img1=d.images['img1_'+this.nind],img2=d.images['img2_'+this.nind] | |
83 | if(img1)img1.src=this.get_img1() | |
84 | if(img2)img2.src=this.get_img2() | |
85 | } | |
86 | } | |
87 | ||
88 | ||
89 | function select(nind){ | |
90 | if(!nind){ | |
91 | var sel=this.tree.sel | |
92 | this.tree.sel=this | |
93 | if(sel)sel.select(true) | |
94 | } | |
95 | var img2=d.images['img2_'+this.nind] | |
96 | if(img2)img2.src=this.get_img2() | |
97 | get_element('el'+this.nind).style.fontWeight=nind ? 'normal' : 'bold' | |
98 | return Boolean(this.cnt[1]) | |
99 | } | |
100 | ||
101 | function init(){ | |
102 | var temp=[],par=this.parent | |
103 | for(var i=this.ind;i>0;i--){ | |
104 | temp[i]='<img src="'+icons[par.is_last()? 0 : 1]+'" border="0" align="absbottom">' | |
105 | par=par.parent | |
106 | } | |
107 | r='<table cellpadding="0" cellspacing="0" border="0">' | |
108 | r+='<tr><td nowrap>' | |
109 | r+=temp.join('') | |
110 | r+=(this.childs.length ?(!dhtml ? '' : '<a href="javascript: tree.toggle('+this.nind+')" >')+'<img src="'+this.get_img1()+'" border="0" align="absbottom" name="img1_'+this.nind+'">'+(!dhtml ? '' : '</a>'): '<img src="'+this.get_img1()+'" border="0" align="absbottom">') | |
111 | r+='<a href="'+this.cnt[1]+'" target="'+'content'+'"'+' title="'+this.cnt[0]+'" onclick="return tree.select('+this.nind+')" '+(!dhtml ? '' : ' ondblclick="tree.toggle('+this.nind+')"')+' class="small" id="el'+this.nind+'"><img src="'+this.get_img2()+'" border="0" align="absbottom" name="img2_'+this.nind+'"> '+this.cnt[0]+'</a>' | |
112 | r+='</td></tr></table>' | |
113 | r+=(this.childs.length ? '<div id="divCont'+this.nind+'" style="display:none"></div>' : '') | |
114 | return r | |
115 | } | |
116 | ||
117 | function draw_contents(cnt){ | |
118 | tree=this; | |
119 | tree.cnt=cnt; | |
120 | tree.tree=this; | |
121 | tree.nodes=[]; | |
122 | tree.sel=null; | |
123 | tree.ind=-1; | |
124 | ||
125 | tree.select=function(i){ | |
126 | return tree.nodes[i].select(); | |
127 | }; | |
128 | tree.toggle=function(i){ | |
129 | tree.nodes[i].open() | |
130 | }; | |
131 | tree.childs=[] | |
132 | for(var i=0;i<cnt.length;i++){ | |
133 | new node(tree,i) | |
134 | } | |
135 | tree.nind=0; | |
136 | ||
137 | for(var i=0;i < tree.childs.length;i++){ | |
138 | d.write(tree.childs[i].init()); | |
139 | if(!dhtml)tree.childs[i].open(); | |
140 | } | |
141 | } | |
142 | ||
143 | ||
144 | function getLoc(){ | |
145 | var doc = ""+parent.frames[1].location; | |
146 | if(doc != lastDoc){ | |
147 | var keyVals = new Array(); | |
148 | keyVals = doc.split("\/"); | |
149 | var targetPage = ""+keyVals[(keyVals.length-1)]; | |
150 | ||
151 | if(targetPage.indexOf("\#") > 0){ | |
152 | targetPage = targetPage.substr(0,targetPage.indexOf("\#")); | |
153 | } | |
154 | ||
155 | nodeCount = 0; | |
156 | while( (""+tree.nodes[nodeCount].cnt[1]).lastIndexOf(targetPage) < 0){ | |
157 | nodeCount++; | |
158 | } | |
159 | parentNode = tree.nodes[nodeCount].parent; | |
160 | if(parentNode != tree && parentNode.opened == false){ | |
161 | parentNode.open(); | |
162 | } | |
163 | tree.nodes[nodeCount].select(); | |
164 | lastDoc = doc; | |
165 | } | |
166 | } | |
167 | new draw_contents(contents); | |
168 | </script> | |
169 | </body> | |
170 | </html> |
0 | <html> | |
1 | <head> | |
2 | <title>$title</title> | |
3 | ||
4 | <script> | |
5 | var qs=location.search.substr(1) | |
6 | var A=qs.split("&") | |
7 | var B=null | |
8 | var F="$deftopic" | |
9 | for(var i=0;i<A.length;i++){B=A[i].split("=");A[i]=[B[0],B[1]]} | |
10 | for(var j=0;j<A.length;j++){if(A[j][0]=='page'){ F=A[j][1];break}} | |
11 | </script > | |
12 | </head> | |
13 | <script> | |
14 | document.write('<frameset cols="200,*" bordercolor="$bcolor" frameborder="yes" framespacing="2" >') | |
15 | document.write('<frame name="toc" src="arch_contents.html">') | |
16 | document.write('<frame name="content" src="'+F+'" >') | |
17 | document.write('</frameset>'); | |
18 | </script> | |
19 | <noscript> | |
20 | <frameset cols="200,*" bordercolor="$bcolor" frameborder="yes" framespacing="2" > | |
21 | <frame name="toc" src="arch_contents.html" > | |
22 | <frame name="content" src="$deftopic"> | |
23 | </frameset> | |
24 | </noscript> | |
25 | </html> |
0 | <html> | |
1 | <head> | |
2 | <title>$title</title> | |
3 | <LINK rel="Stylesheet" type="text/css" href="arch_css.css"> | |
4 | </head> | |
5 | <body bgcolor="$bcolor"> | |
6 | <table class='htable' cellpadding="0" cellspacing="0" width="100%"><td> | |
7 | <td align="center" width="100%"> | |
8 | <b><font size="large" color="$fcolor">$title</font></b> | |
9 | </table> | |
10 | </body> | |
11 | </html> |
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
0 | <html> | |
1 | <head> | |
2 | <script>var pageid="";</script> | |
3 | ||
4 | <title>$title</title> | |
5 | ||
6 | <script> | |
7 | var qs=location.search.substr(1); | |
8 | var A=qs.split("&") | |
9 | var B=null | |
10 | var F="$deftopic"; | |
11 | for(var i=0;i<A.length;i++){ | |
12 | B=A[i].split("=") | |
13 | A[i]=[B[0],B[1]] | |
14 | } | |
15 | for(var j=0;j<A.length;j++){ | |
16 | if(A[j][0]=='page'){ | |
17 | F=A[j][1] | |
18 | break | |
19 | } | |
20 | } | |
21 | </script> | |
22 | </head> | |
23 | ||
24 | <script> | |
25 | document.write('<frameset rows="30,*" frameborder="no" framespacing="0" border="0" >') | |
26 | document.write('<frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no" >') | |
27 | if(F!='')F='?page='+F | |
28 | document.write('<frame name="main" src="arch_frameset.html'+F+'">') | |
29 | document.write('</frameset>') | |
30 | </script> | |
31 | <noscript> | |
32 | <frameset rows="30,*" frameborder="no" framespacing="0" border="0" > | |
33 | <frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no"> | |
34 | <frame name="main" src="arch_frameset.html" > | |
35 | </frameset> | |
36 | </noscript> | |
37 | </html> |
20 | 20 | .B archmage |
21 | 21 | .I chmfile directory |
22 | 22 | .br |
23 | .B archmage | |
24 | \-p port | |
25 | .I chmfile | |
26 | 23 | .SH DESCRIPTION |
27 | 24 | This manual page documents briefly the |
28 | 25 | .B archmage |
38 | 35 | chmlib from GnoCHM project. |
39 | 36 | .SH USAGE |
40 | 37 | .PP |
41 | There is three ways to use arCHMage package now: | |
42 | .PP | |
43 | 1) Extract .chm to directory (directory will be created): | |
38 | Extract .chm to directory (directory will be created): | |
44 | 39 | |
45 | 40 | archmage <chmfile> <directory> |
46 | 41 | .PP |
47 | 2) Run as http-server, which will publish chm file contents on | |
48 | specified port: | |
49 | ||
50 | archmage \-p <port> <chmfile> | |
51 | .PP | |
52 | 3) Tune your apache to publish chm file contents if there is trailing | |
53 | slash in request to that file (you will need working mod_python for | |
54 | that): | |
55 | ||
56 | Add that lines to your httpd.conf: | |
57 | ||
58 | AddHandler python-program .chm | |
59 | .br | |
60 | PythonHandler archmod.mod_chm | |
61 | ||
62 | Restart apache. | |
63 | .PP | |
64 | Let's suppose, you have file sample.chm in DocumentRoot of your | |
65 | apache. After that tuning you can receive raw chm file, if you point | |
66 | your browser to | |
67 | ||
68 | http://yourserver/sample.chm | |
69 | .PP | |
70 | or you can view chm file on the fly if you point your browser to | |
71 | ||
72 | http://yourserver/sample.chm/ (note trailing slash) | |
73 | 42 | .SH SEE ALSO |
74 | 43 | .PP |
75 | 44 | arCHMage Home Page: http://archmage.sf.net |
0 | Metadata-Version: 1.1 | |
0 | Metadata-Version: 1.2 | |
1 | 1 | Name: archmage |
2 | Version: 0.3.1 | |
2 | Version: 0.4.0 | |
3 | 3 | Summary: CHM decompressor |
4 | 4 | Home-page: https://github.com/dottedmag/archmage |
5 | Author: Mikhail Gusarov | |
6 | Author-email: dottedmag@dottedmag.net | |
5 | Maintainer: Mikhail Gusarov | |
6 | Maintainer-email: dottedmag@dottedmag.net | |
7 | 7 | License: GPLv2+ |
8 | 8 | Description: arCHMage is a reader and decompressor for CHM format |
9 | 9 | Keywords: chm,HTML Help,Compiled HTML,Compressed HTML |
1 | 1 | COPYING |
2 | 2 | MANIFEST.in |
3 | 3 | NEWS |
4 | RELEASE-VERSION | |
4 | README.md | |
5 | 5 | archmage.1 |
6 | 6 | setup.py |
7 | version.py | |
7 | archmage/CHM.py | |
8 | archmage/CHMParser.py | |
9 | archmage/__init__.py | |
10 | archmage/arch.conf | |
11 | archmage/chmtotext.py | |
12 | archmage/cli.py | |
13 | archmage/htmldoc.py | |
8 | 14 | archmage.egg-info/PKG-INFO |
9 | 15 | archmage.egg-info/SOURCES.txt |
10 | 16 | archmage.egg-info/dependency_links.txt |
11 | 17 | archmage.egg-info/entry_points.txt |
12 | 18 | archmage.egg-info/requires.txt |
13 | 19 | archmage.egg-info/top_level.txt |
14 | archmod/CHM.py | |
15 | archmod/CHMParser.py | |
16 | archmod/CHMServer.py | |
17 | archmod/Cached.py | |
18 | archmod/__init__.py | |
19 | archmod/arch.conf | |
20 | archmod/chmtotext.py | |
21 | archmod/cli.py | |
22 | archmod/htmldoc.py | |
23 | archmod/mod_chm.py | |
24 | archmod/templates/arch_contents.html | |
25 | archmod/templates/arch_css.css | |
26 | archmod/templates/arch_frameset.html | |
27 | archmod/templates/arch_header.html | |
28 | archmod/templates/index.html | |
29 | archmod/templates/icons/0.gif | |
30 | archmod/templates/icons/1.gif | |
31 | archmod/templates/icons/10.gif | |
32 | archmod/templates/icons/11.gif | |
33 | archmod/templates/icons/12.gif | |
34 | archmod/templates/icons/13.gif | |
35 | archmod/templates/icons/14.gif | |
36 | archmod/templates/icons/15.gif | |
37 | archmod/templates/icons/16.gif | |
38 | archmod/templates/icons/17.gif | |
39 | archmod/templates/icons/18.gif | |
40 | archmod/templates/icons/19.gif | |
41 | archmod/templates/icons/2.gif | |
42 | archmod/templates/icons/20.gif | |
43 | archmod/templates/icons/21.gif | |
44 | archmod/templates/icons/22.gif | |
45 | archmod/templates/icons/23.gif | |
46 | archmod/templates/icons/24.gif | |
47 | archmod/templates/icons/25.gif | |
48 | archmod/templates/icons/26.gif | |
49 | archmod/templates/icons/27.gif | |
50 | archmod/templates/icons/3.gif | |
51 | archmod/templates/icons/35.gif | |
52 | archmod/templates/icons/37.gif | |
53 | archmod/templates/icons/39.gif | |
54 | archmod/templates/icons/4.gif | |
55 | archmod/templates/icons/5.gif | |
56 | archmod/templates/icons/6.gif | |
57 | archmod/templates/icons/7.gif | |
58 | archmod/templates/icons/8.gif | |
59 | archmod/templates/icons/9.gif | |
60 | archmod/templates/icons/90.gif | |
61 | archmod/templates/icons/91.gif | |
62 | archmod/templates/icons/92.gif | |
63 | archmod/templates/icons/93.gif | |
64 | archmod/templates/icons/94.gif | |
65 | archmod/templates/icons/95.gif | |
66 | archmod/templates/icons/96.gif | |
67 | archmod/templates/icons/97.gif | |
68 | archmod/templates/icons/98.gif | |
69 | archmod/templates/icons/99.gif | |
70 | archmod/templates/icons/next.gif | |
71 | archmod/templates/icons/prev.gif⏎ | |
20 | archmage/templates/arch_contents.html | |
21 | archmage/templates/arch_css.css | |
22 | archmage/templates/arch_frameset.html | |
23 | archmage/templates/arch_header.html | |
24 | archmage/templates/index.html | |
25 | archmage/templates/icons/0.gif | |
26 | archmage/templates/icons/1.gif | |
27 | archmage/templates/icons/10.gif | |
28 | archmage/templates/icons/11.gif | |
29 | archmage/templates/icons/12.gif | |
30 | archmage/templates/icons/13.gif | |
31 | archmage/templates/icons/14.gif | |
32 | archmage/templates/icons/15.gif | |
33 | archmage/templates/icons/16.gif | |
34 | archmage/templates/icons/17.gif | |
35 | archmage/templates/icons/18.gif | |
36 | archmage/templates/icons/19.gif | |
37 | archmage/templates/icons/2.gif | |
38 | archmage/templates/icons/20.gif | |
39 | archmage/templates/icons/21.gif | |
40 | archmage/templates/icons/22.gif | |
41 | archmage/templates/icons/23.gif | |
42 | archmage/templates/icons/24.gif | |
43 | archmage/templates/icons/25.gif | |
44 | archmage/templates/icons/26.gif | |
45 | archmage/templates/icons/27.gif | |
46 | archmage/templates/icons/3.gif | |
47 | archmage/templates/icons/35.gif | |
48 | archmage/templates/icons/37.gif | |
49 | archmage/templates/icons/39.gif | |
50 | archmage/templates/icons/4.gif | |
51 | archmage/templates/icons/5.gif | |
52 | archmage/templates/icons/6.gif | |
53 | archmage/templates/icons/7.gif | |
54 | archmage/templates/icons/8.gif | |
55 | archmage/templates/icons/9.gif | |
56 | archmage/templates/icons/90.gif | |
57 | archmage/templates/icons/91.gif | |
58 | archmage/templates/icons/92.gif | |
59 | archmage/templates/icons/93.gif | |
60 | archmage/templates/icons/94.gif | |
61 | archmage/templates/icons/95.gif | |
62 | archmage/templates/icons/96.gif | |
63 | archmage/templates/icons/97.gif | |
64 | archmage/templates/icons/98.gif | |
65 | archmage/templates/icons/99.gif | |
66 | archmage/templates/icons/next.gif | |
67 | archmage/templates/icons/prev.gif⏎ |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free Software | |
8 | # Foundation; either version 2 of the License, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
14 | # details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along with | |
17 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | # | |
20 | ||
21 | import os | |
22 | import sys | |
23 | import re | |
24 | import shutil | |
25 | import errno | |
26 | import string | |
27 | import tempfile | |
28 | ||
29 | import archmod | |
30 | ||
31 | from archmod.CHMParser import SitemapFile, PageLister, ImageCatcher, TOCCounter#, HeadersCounter | |
32 | from archmod.Cached import Cached | |
33 | ||
34 | # import PyCHM bindings | |
35 | try: | |
36 | from chm import chmlib | |
37 | except ImportError, msg: | |
38 | sys.exit('ImportError: %s\nPlease check README file for system requirements.' % msg) | |
39 | ||
40 | # External file converters | |
41 | from archmod.chmtotext import chmtotext | |
42 | from archmod.htmldoc import htmldoc | |
43 | ||
44 | PARENT_RE = re.compile(r'(^|/|\\)\.\.(/|\\|$)') | |
45 | ||
46 | class CHMDir(Cached): | |
47 | """Class that represent CHM content from directory""" | |
48 | ||
49 | def __init__(self, name): | |
50 | # Name of source directory with CHM content | |
51 | self.sourcename = name | |
52 | # Import variables from config file into namespace | |
53 | execfile(archmod.config, self.__dict__) | |
54 | ||
55 | # build regexp from the list of auxiliary files | |
56 | self.aux_re = '|'.join([ re.escape(s) for s in self.auxes ]) | |
57 | ||
58 | # Get and parse 'Table of Contents' | |
59 | try: | |
60 | self.topicstree = self.get_entry(self.topics) | |
61 | except AttributeError: | |
62 | self.topicstree = None | |
63 | self.contents = SitemapFile(self.topicstree).parse() | |
64 | ||
65 | def _getitem(self, name): | |
66 | # Get all entries | |
67 | if name == 'entries': | |
68 | entries = [] | |
69 | for fname in archmod.listdir(self.sourcename): | |
70 | name = '/' + fname | |
71 | if os.path.isdir(self.sourcename + name): | |
72 | name += '/' | |
73 | entries.append(name) | |
74 | return entries | |
75 | # retrieves the list of HTML files contained into the CHM file, **in order** (that's the important bit). | |
76 | # (actually performed by the PageLister class) | |
77 | if name == 'html_files': | |
78 | lister = PageLister() | |
79 | lister.feed(self.topicstree) | |
80 | return lister.pages | |
81 | # retrieves the list of images urls contained into the CHM file. | |
82 | # (actually performed by the ImageCatcher class) | |
83 | if name == 'image_urls': | |
84 | image_urls = [] | |
85 | image_catcher = ImageCatcher() | |
86 | for file in self.html_files: | |
87 | image_catcher.feed(CHMEntry(self, file).correct()) | |
88 | for image_url in image_catcher.imgurls: | |
89 | if not image_urls.count(image_url): | |
90 | image_urls.append(image_url) | |
91 | return image_urls | |
92 | # retrieves a dictionary of actual file entries and corresponding urls into the CHM file | |
93 | if name == 'image_files': | |
94 | image_files = {} | |
95 | for image_url in self.image_urls: | |
96 | for entry in self.entries: | |
97 | if re.search(image_url, entry.lower()) and not image_files.has_key(entry.lower()): | |
98 | image_files.update({entry : image_url}) | |
99 | return image_files | |
100 | # Get topics file | |
101 | if name == 'topics': | |
102 | for e in self.entries: | |
103 | if e.lower().endswith('.hhc'): | |
104 | return e | |
105 | if name == 'deftopic': | |
106 | # use first page as deftopic. Note: without heading slash | |
107 | if self.html_files[0].startswith('/'): | |
108 | return self.html_files[0].replace('/', '', 1).lower() | |
109 | return self.html_files[0].lower() | |
110 | # Get index file | |
111 | if name == 'index': | |
112 | for e in self.entries: | |
113 | if e.lower().endswith('.hhk'): | |
114 | return e | |
115 | # Get frontpage name | |
116 | if name == 'frontpage': | |
117 | frontpage = os.path.join('/', 'index.html') | |
118 | index = 2 # index2.html and etc. | |
119 | for filename in self.entries: | |
120 | if frontpage == filename: | |
121 | frontpage = os.path.join('/', ('index%s.html' % index)) | |
122 | index += 1 | |
123 | return frontpage | |
124 | # Get all templates files | |
125 | if name == 'templates': | |
126 | templates = [] | |
127 | for file in os.listdir(self.templates_dir): | |
128 | if os.path.isfile(os.path.join(self.templates_dir, file)): | |
129 | if os.path.join('/', file) not in self.entries: | |
130 | templates.append(os.path.join('/', file)) | |
131 | return templates | |
132 | # Get ToC levels | |
133 | if name == 'toclevels': | |
134 | counter = TOCCounter() | |
135 | counter.feed(self.topicstree) | |
136 | if counter.count > self.maxtoclvl: | |
137 | return self.maxtoclvl | |
138 | else: | |
139 | return counter.count | |
140 | raise AttributeError(name) | |
141 | ||
142 | def get_entry(self, name): | |
143 | """Get CHM entry by name""" | |
144 | # show index page or any other substitute | |
145 | if name == '/': | |
146 | name = self.frontpage | |
147 | if name in self.templates or name == self.frontpage: | |
148 | return self.get_template(name) | |
149 | if name.lower() in [ os.path.join('/icons', icon.lower()) for icon in os.listdir(self.icons_dir) ]: | |
150 | return open(os.path.join(self.icons_dir, os.path.basename(name))).read() | |
151 | for e in self.entries: | |
152 | if e.lower() == name.lower(): | |
153 | return CHMEntry(self, e, frontpage=self.frontpage).get() | |
154 | else: | |
155 | archmod.message(archmod.ERROR, 'NameError: There is no %s' % name) | |
156 | ||
157 | def sub_mytag(self, re): | |
158 | """Replacing tagname with attribute""" | |
159 | try: | |
160 | res = eval('self.' + re.group(1)) | |
161 | except: | |
162 | try: | |
163 | res = eval(re.group(1)) | |
164 | except: | |
165 | res = '' | |
166 | return res | |
167 | ||
168 | def get_template(self, name): | |
169 | """Get template file by it's name""" | |
170 | if name == self.frontpage: | |
171 | tpl = open(os.path.join(self.templates_dir, os.path.basename('index.html'))).read() | |
172 | else: | |
173 | tpl = open(os.path.join(self.templates_dir, os.path.basename(name))).read() | |
174 | return re.sub('\<%(.+?)%\>', self.sub_mytag, tpl) | |
175 | ||
176 | def process_templates(self, destdir="."): | |
177 | """Process templates""" | |
178 | for template in self.templates: | |
179 | open(os.path.join(destdir, os.path.basename(template)), 'w').write(self.get_template(template)) | |
180 | if self.frontpage not in self.templates: | |
181 | open(os.path.join(destdir, os.path.basename(self.frontpage)), 'w').write(self.get_template('index.html')) | |
182 | if not os.path.exists(os.path.join(destdir, 'icons/')): | |
183 | shutil.copytree(os.path.join(self.icons_dir), os.path.join(destdir, 'icons/')) | |
184 | ||
185 | def extract_entry(self, entry, output_file, destdir=".", correct=False): | |
186 | # process output entry, remove first '/' in entry name | |
187 | fname = string.lower(output_file).replace('/', '', 1) | |
188 | # get directory name for file fname if any | |
189 | dname = os.path.dirname(os.path.join(destdir, fname)) | |
190 | # if dname is a directory and it's not exist, than create it | |
191 | if dname and not os.path.exists(dname): | |
192 | os.makedirs(dname) | |
193 | # otherwise write a file from CHM entry | |
194 | if not os.path.isdir(os.path.join(destdir, fname)): | |
195 | # filename encoding conversion | |
196 | if self.fs_encoding: | |
197 | fname = fname.decode('utf-8').encode(self.fs_encoding) | |
198 | # write CHM entry content into the file, corrected or as is | |
199 | if correct: | |
200 | open(os.path.join(destdir, fname), 'w').writelines(CHMEntry(self, entry).correct()) | |
201 | else: | |
202 | open(os.path.join(destdir, fname), 'w').writelines(CHMEntry(self, entry).get()) | |
203 | ||
204 | def extract_entries(self, entries=[], destdir=".", correct=False): | |
205 | """Extract raw CHM entries into the files""" | |
206 | for e in entries: | |
207 | # if entry is auxiliary file, than skip it | |
208 | if re.match(self.aux_re, e): | |
209 | continue | |
210 | if PARENT_RE.search(e): | |
211 | raise RuntimeError('Giving up on malicious name: %s' % e) | |
212 | self.extract_entry(e, output_file=e, destdir=destdir, correct=correct) | |
213 | ||
214 | def extract(self, destdir): | |
215 | """Extract CHM file content into FS""" | |
216 | try: | |
217 | # Create destination directory | |
218 | os.mkdir(destdir) | |
219 | # make raw content extraction | |
220 | self.extract_entries(entries=self.entries, destdir=destdir) | |
221 | # process templates | |
222 | self.process_templates(destdir=destdir) | |
223 | except OSError, error: | |
224 | if error[0] == errno.EEXIST: | |
225 | sys.exit('%s is already exists' % destdir) | |
226 | ||
227 | def dump_html(self, output=sys.stdout): | |
228 | """Dump HTML data from CHM file into standard output""" | |
229 | for e in self.html_files: | |
230 | # if entry is auxiliary file, than skip it | |
231 | if re.match(self.aux_re, e): | |
232 | continue | |
233 | print >> output, CHMEntry(self, e).get() | |
234 | ||
235 | def chm2text(self, output=sys.stdout): | |
236 | """Convert CHM into Single Text file""" | |
237 | for e in self.html_files: | |
238 | # if entry is auxiliary file, than skip it | |
239 | if re.match(self.aux_re, e): | |
240 | continue | |
241 | # to use this function you should have 'lynx' or 'elinks' installed | |
242 | chmtotext(input=CHMEntry(self, e).get(), cmd=self.chmtotext, output=output) | |
243 | ||
244 | def htmldoc(self, output, format=archmod.CHM2HTML): | |
245 | """CHM to other file formats converter using htmldoc""" | |
246 | # Extract CHM content into temporary directory | |
247 | output = output.replace(' ', '_') | |
248 | tempdir = tempfile.mkdtemp(prefix=output.rsplit('.', 1)[0]) | |
249 | self.extract_entries(entries=self.html_files, destdir=tempdir, correct=True) | |
250 | # List of temporary files | |
251 | files = [ os.path.abspath(tempdir + file.lower()) for file in self.html_files ] | |
252 | if format == archmod.CHM2HTML: | |
253 | options = self.chmtohtml | |
254 | # change output from single html file to a directory with html file and images | |
255 | if self.image_files: | |
256 | dirname = archmod.file2dir(output) | |
257 | if os.path.exists(dirname): | |
258 | sys.exit('%s is already exists' % dirname) | |
259 | # Extract image files | |
260 | os.mkdir(dirname) | |
261 | # Extract all images | |
262 | for key, value in self.image_files.items(): | |
263 | self.extract_entry(entry=key, output_file=value, destdir=dirname) | |
264 | # Fix output file name | |
265 | output = os.path.join(dirname, output) | |
266 | elif format == archmod.CHM2PDF: | |
267 | options = self.chmtopdf | |
268 | if self.image_files: | |
269 | # Extract all images | |
270 | for key, value in self.image_files.items(): | |
271 | self.extract_entry(entry=key, output_file=key.lower(), destdir=tempdir) | |
272 | htmldoc(files, self.htmldoc_exec, options, self.toclevels, output) | |
273 | # Remove temporary files | |
274 | shutil.rmtree(path=tempdir) | |
275 | ||
276 | ||
277 | class CHMFile(CHMDir): | |
278 | """CHM file class derived from CHMDir""" | |
279 | ||
280 | def _getitem(self, name): | |
281 | # Overriding CHMDir.entries attribute | |
282 | if name == 'entries': | |
283 | entries = [] | |
284 | # get CHM file content and process it | |
285 | for name in self._get_names(self._handler): | |
286 | if (name == '/'): | |
287 | continue | |
288 | entries.append(name) | |
289 | return entries | |
290 | if name == '_handler': | |
291 | return chmlib.chm_open(self.sourcename) | |
292 | return super(CHMFile, self)._getitem(name) | |
293 | ||
294 | def __delattr__(self, name): | |
295 | # Closes CHM file handler on class destroying | |
296 | if name == '_handler': | |
297 | chmlib.chm_close(self._handler) | |
298 | return super(CHMFile, self).__delattr__(name) | |
299 | ||
300 | def _get_names(self, chmfile): | |
301 | """Get object's names inside CHM file""" | |
302 | def get_name(chmfile, ui, content): | |
303 | content.append(ui.path) | |
304 | return chmlib.CHM_ENUMERATOR_CONTINUE | |
305 | ||
306 | chmdir = [] | |
307 | if (chmlib.chm_enumerate(chmfile, chmlib.CHM_ENUMERATE_ALL, get_name, chmdir)) == 0: | |
308 | sys.exit('UnknownError: CHMLIB or PyCHM bug?') | |
309 | return chmdir | |
310 | ||
311 | ||
312 | class CHMEntry(object): | |
313 | """Class for CHM file entry""" | |
314 | ||
315 | def __init__(self, parent, name, frontpage='index.html'): | |
316 | # parent CHM file | |
317 | self.parent = parent | |
318 | # object inside CHM file | |
319 | self.name = name | |
320 | # frontpage name to substitute | |
321 | self.frontpage = os.path.basename(frontpage) | |
322 | ||
323 | def read(self): | |
324 | """Read CHM entry content""" | |
325 | # Check where parent instance is CHMFile or CHMDir | |
326 | if isinstance(self.parent, CHMFile): | |
327 | result, ui = chmlib.chm_resolve_object(self.parent._handler, self.name) | |
328 | if (result != chmlib.CHM_RESOLVE_SUCCESS): | |
329 | return None | |
330 | ||
331 | size, content = chmlib.chm_retrieve_object(self.parent._handler, ui, 0l, ui.length) | |
332 | if (size == 0): | |
333 | return None | |
334 | return content | |
335 | else: | |
336 | return open(self.parent.sourcename + self.name).read() | |
337 | ||
338 | def lower_links(self, text): | |
339 | """Links to lower case""" | |
340 | return re.sub('(?i)(href|src)\s*=\s*([^\s|>]+)', lambda m:m.group(0).lower(), text) | |
341 | ||
342 | def add_restoreframing_js(self, name, text): | |
343 | name = re.sub('/+', '/', name) | |
344 | depth = name.count('/') | |
345 | ||
346 | js = """<body><script language="javascript"> | |
347 | if ((window.name != "content") && (navigator.userAgent.indexOf("Opera") <= -1) ) | |
348 | document.write("<center><a href='%s%s?page=%s'>show framing</a></center>") | |
349 | </script>""" % ( '../' * depth, self.frontpage, name ) | |
350 | ||
351 | return re.sub('(?i)<\s*body\s*>', js, text) | |
352 | ||
353 | def correct(self): | |
354 | """Get correct CHM entry content""" | |
355 | data = self.read() | |
356 | # If entry is a html page? | |
357 | if re.search('(?i)\.html?$', self.name) and data is not None: | |
358 | # lower-casing links if needed | |
359 | if self.parent.filename_case: | |
360 | data = self.lower_links(data) | |
361 | ||
362 | # Delete unwanted HTML elements. | |
363 | data = re.sub('<div .*teamlib\.gif.*\/div>', '', data) | |
364 | data = re.sub('<a href.*>\[ Team LiB \]<\/a>', '', data) | |
365 | data = re.sub('<table.*larrow\.gif.*rarrow\.gif.*<\/table>', '', data) | |
366 | data = re.sub('<a href.*next\.gif[^>]*><\/a>', '' ,data) | |
367 | data = re.sub('<a href.*previous\.gif[^>]*><\/a>', '', data) | |
368 | data = re.sub('<a href.*prev\.gif[^>]*><\/a>', '', data) | |
369 | data = re.sub('"[^"]*previous\.gif"', '""', data) | |
370 | data = re.sub('"[^"]*prev\.gif"', '""', data) | |
371 | data = re.sub('"[^"]*next\.gif"', '""', data) | |
372 | if data is not None: | |
373 | return data | |
374 | else: | |
375 | return '' | |
376 | ||
377 | def get(self): | |
378 | """Get CHM entry content""" | |
379 | # read entry content | |
380 | data = self.read() | |
381 | # If entry is a html page? | |
382 | if re.search('(?i)\.html?$', self.name) and data is not None: | |
383 | # lower-casing links if needed | |
384 | if self.parent.filename_case: | |
385 | data = self.lower_links(data) | |
386 | # restore framing if that option is set in config file | |
387 | if self.parent.restore_framing: | |
388 | data = self.add_restoreframing_js(self.name[1:], data) | |
389 | if data is not None: | |
390 | return data | |
391 | else: | |
392 | return '' |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # | |
5 | # This program is free software; you can redistribute it and/or modify it under | |
6 | # the terms of the GNU General Public License as published by the Free Software | |
7 | # Foundation; either version 2 of the License, or (at your option) any later | |
8 | # version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
12 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
13 | # details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License along with | |
16 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
17 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
18 | # | |
19 | ||
20 | import re | |
21 | import mimetypes | |
22 | import sgmllib, urllib2 | |
23 | ||
24 | from BeautifulSoup import BeautifulSoup | |
25 | from HTMLParser import HTMLParser, HTMLParseError | |
26 | from urlparse import urlparse | |
27 | ||
28 | from archmod import COMMASPACE, LF, CR | |
29 | ||
30 | START_TAG = '[' | |
31 | END_TAG = ']' | |
32 | ||
33 | ||
34 | class SitemapFile(object): | |
35 | """Sitemap file class""" | |
36 | ||
37 | def __init__(self, lines): | |
38 | # XXX: Cooking tasty beautiful soup ;-) | |
39 | if lines: | |
40 | soup = BeautifulSoup(lines) | |
41 | lines = soup.prettify() | |
42 | # XXX: Removing empty tags | |
43 | lines = re.sub(re.compile(r'<ul>\s*</ul>', re.I | re.M), '', lines) | |
44 | lines = re.sub(re.compile(r'<li>\s*</li>', re.I | re.M), '', lines) | |
45 | self.lines = lines | |
46 | else: | |
47 | self.lines = None | |
48 | ||
49 | def parse(self): | |
50 | p = SitemapParser() | |
51 | if self.lines: | |
52 | p.feed(self.lines) | |
53 | # parsed text + last bracket | |
54 | return (p.parsed + LF + END_TAG) | |
55 | ||
56 | ||
57 | class TagStack(list): | |
58 | """from book of David Mertz 'Text Processing in Python'""" | |
59 | ||
60 | def append(self, tag): | |
61 | # Remove every paragraph-level tag if this is one | |
62 | if tag.lower() in ('p', 'blockquote'): | |
63 | self = TagStack([ t for t in super if t not in ('p', 'blockquote') ]) | |
64 | super(TagStack, self).append(tag) | |
65 | ||
66 | def pop(self, tag): | |
67 | # 'Pop' by tag from nearest position, not only last item | |
68 | self.reverse() | |
69 | try: | |
70 | pos = self.index(tag) | |
71 | except ValueError: | |
72 | raise HTMLParseError, 'Tag not on stack' | |
73 | self[:] = self[pos + 1:] | |
74 | self.reverse() | |
75 | ||
76 | ||
77 | class SitemapParser(sgmllib.SGMLParser): | |
78 | """Class for parsing files in SiteMap format, such as .hhc""" | |
79 | ||
80 | def __init__(self): | |
81 | self.tagstack = TagStack() | |
82 | self.in_obj = False | |
83 | self.name = self.local = self.param = "" | |
84 | self.imagenumber = 1 | |
85 | self.parsed = "" | |
86 | sgmllib.SGMLParser.__init__(self) | |
87 | ||
88 | def unknown_starttag(self, tag, attrs): | |
89 | # first ul, start processing from here | |
90 | if tag == 'ul' and not self.tagstack: | |
91 | self.tagstack.append(tag) | |
92 | # First bracket | |
93 | self.parsed += LF + START_TAG | |
94 | ||
95 | # if inside ul | |
96 | elif self.tagstack: | |
97 | if tag == 'li': | |
98 | # append closing bracket if needed | |
99 | if self.tagstack[-1] != 'ul': | |
100 | self.parsed += END_TAG | |
101 | self.tagstack.pop('li') | |
102 | indent = ' ' * len(self.tagstack) | |
103 | ||
104 | if self.parsed != LF + START_TAG: | |
105 | self.parsed += COMMASPACE | |
106 | ||
107 | self.parsed += LF + indent + START_TAG | |
108 | ||
109 | if tag == 'object': | |
110 | for x, y in attrs: | |
111 | if x.lower() == 'type' and y.lower() == 'text/sitemap': | |
112 | self.in_obj = True | |
113 | ||
114 | if tag.lower() == 'param' and self.in_obj: | |
115 | for x, y in attrs: | |
116 | if x.lower() == 'name': | |
117 | self.param = y.lower() | |
118 | elif x.lower() == 'value': | |
119 | if self.param == 'name' and not len(self.name): | |
120 | # XXX: Remove LF and/or CR signs from name | |
121 | self.name = y.replace(LF, '').replace(CR, '') | |
122 | # XXX: Un-escaping double quotes :-) | |
123 | self.name = self.name.replace('"', '\\"') | |
124 | elif self.param == 'local': | |
125 | # XXX: Change incorrect slashes in url | |
126 | self.local = y.lower().replace('\\', '/').replace('..\\', '') | |
127 | elif self.param == 'imagenumber': | |
128 | self.imagenumber = y | |
129 | self.tagstack.append(tag) | |
130 | ||
131 | def unknown_endtag(self, tag): | |
132 | # if inside ul | |
133 | if self.tagstack: | |
134 | if tag == 'ul': | |
135 | self.parsed += END_TAG | |
136 | if tag == 'object' and self.in_obj: | |
137 | # "Link Name", "URL", "Icon" | |
138 | self.parsed += "\"%s\", \"%s\", \"%s\"" % (self.name, self.local, self.imagenumber) | |
139 | # Set to default values | |
140 | self.in_obj = False | |
141 | self.name = self.local = "" | |
142 | self.imagenumber = 1 | |
143 | if tag != 'li': | |
144 | self.tagstack.pop(tag) | |
145 | ||
146 | ||
147 | class PageLister(sgmllib.SGMLParser): | |
148 | """ | |
149 | Parser of the chm.chm GetTopicsTree() method that retrieves the URL of the HTML | |
150 | page embedded in the CHM file. | |
151 | """ | |
152 | ||
153 | def reset(self): | |
154 | sgmllib.SGMLParser.reset(self) | |
155 | self.pages = [] | |
156 | ||
157 | def start_param(self, attrs): | |
158 | urlparam_flag = False | |
159 | for key, value in attrs: | |
160 | if key == 'name' and value.lower() == 'local': | |
161 | urlparam_flag = True | |
162 | if urlparam_flag and key == 'value': | |
163 | # Sometime url has incorrect slashes | |
164 | value = urllib2.unquote(urlparse(value.replace('\\', '/')).geturl()) | |
165 | value = '/' + re.sub("#.*$", '', value) | |
166 | # Avoid duplicates | |
167 | if not self.pages.count(value): | |
168 | self.pages.append(value) | |
169 | ||
170 | ||
171 | class ImageCatcher(sgmllib.SGMLParser): | |
172 | """ | |
173 | Finds image urls in the current html page, so to take them out from the chm file. | |
174 | """ | |
175 | ||
176 | def reset(self): | |
177 | sgmllib.SGMLParser.reset(self) | |
178 | self.imgurls = [] | |
179 | ||
180 | def start_img(self, attrs): | |
181 | for key, value in attrs: | |
182 | if key.lower() == 'src': | |
183 | # Avoid duplicates in the list of image URLs. | |
184 | if not self.imgurls.count('/' + value): | |
185 | self.imgurls.append('/' + value) | |
186 | ||
187 | def start_a(self, attrs): | |
188 | for key, value in attrs: | |
189 | if key.lower() == 'href': | |
190 | url = urlparse(value) | |
191 | value = urllib2.unquote(url.geturl()) | |
192 | # Remove unwanted crap | |
193 | value = '/' + re.sub("#.*$", '', value) | |
194 | # Check file's mimetype | |
195 | type = mimetypes.guess_type(value)[0] | |
196 | # Avoid duplicates in the list of image URLs. | |
197 | if not url.scheme and not self.imgurls.count(value) and \ | |
198 | type and re.search('image/.*', type): | |
199 | self.imgurls.append(value) | |
200 | ||
201 | ||
202 | class TOCCounter(HTMLParser): | |
203 | """Count Table of Contents levels""" | |
204 | ||
205 | count = 0 | |
206 | ||
207 | def __init__(self): | |
208 | self.tagstack = TagStack() | |
209 | HTMLParser.__init__(self) | |
210 | ||
211 | def handle_starttag(self, tag, attrs): | |
212 | self.tagstack.append(tag) | |
213 | ||
214 | def handle_endtag(self, tag): | |
215 | if self.tagstack: | |
216 | if tag.lower() == 'object': | |
217 | if self.count < self.tagstack.count('param'): | |
218 | self.count = self.tagstack.count('param') | |
219 | if tag.lower() != 'li': | |
220 | self.tagstack.pop(tag) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free Software | |
8 | # Foundation; either version 2 of the License, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
14 | # details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along with | |
17 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | # | |
20 | ||
21 | import urllib | |
22 | import mimetypes | |
23 | ||
24 | from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler | |
25 | ||
26 | import archmod | |
27 | ||
28 | ||
29 | class CHMServer(HTTPServer): | |
30 | """HTTP Server that handle Compressed HTML""" | |
31 | ||
32 | def __init__(self, CHM, name='', port=8000): | |
33 | self.address = (name, port) | |
34 | self.httpd = HTTPServer(self.address, CHMRequestHandler) | |
35 | self.httpd.CHM = CHM | |
36 | self.address = (name, port) | |
37 | ||
38 | def run(self): | |
39 | self.httpd.serve_forever() | |
40 | ||
41 | ||
42 | class CHMRequestHandler(BaseHTTPRequestHandler): | |
43 | """This class handle HTTP request for CHMServer""" | |
44 | ||
45 | def do_GET(self): | |
46 | pagename = urllib.unquote(self.path.split('?')[0]) | |
47 | if pagename == '/': | |
48 | mimetype = 'text/html' | |
49 | else: | |
50 | mimetype = mimetypes.guess_type(pagename)[0] | |
51 | ||
52 | self.send_response(200) | |
53 | self.send_header('Content-type', mimetype) | |
54 | self.end_headers() | |
55 | ||
56 | # get html data from CHM instance and write it into output | |
57 | try: | |
58 | self.wfile.write(self.server.CHM.get_entry(pagename)) | |
59 | except NameError, msg: | |
60 | archmod.message(archmod.ERROR, 'NameError: %s' % msg) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # | |
5 | # This program is free software; you can redistribute it and/or modify it under | |
6 | # the terms of the GNU General Public License as published by the Free Software | |
7 | # Foundation; either version 2 of the License, or (at your option) any later | |
8 | # version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
12 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
13 | # details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License along with | |
16 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
17 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
18 | # | |
19 | ||
20 | class Cached(object): | |
21 | """Provides caching storage for data access decoration. | |
22 | Usage: | |
23 | class CachedClass(Cached): | |
24 | def _getitem(self, name): | |
25 | # implement data getting routine, such as db access | |
26 | ||
27 | CachedClass().attribute1 # returns value as if _getitem('attribute1') was called | |
28 | CachedClass().attribute2 # returns value as if _getitem('attribute2') was called | |
29 | CachedClass().__doc__ # returns real docstring | |
30 | """ | |
31 | ||
32 | def __new__(classtype, *args, **kwargs): | |
33 | __instance = object.__new__(classtype, *args, **kwargs) | |
34 | __instance.cache = {} | |
35 | return __instance | |
36 | ||
37 | # to be implemented by contract in the descendant classes | |
38 | def _getitem(self, name): | |
39 | raise Exception(NotImplemented) | |
40 | ||
41 | def __getattribute__(self, name): | |
42 | try: | |
43 | return object.__getattribute__(self, name) | |
44 | except: | |
45 | if not self.cache.has_key(name): | |
46 | self.cache[name] = self._getitem(name) | |
47 | return self.cache[name] |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free Software | |
8 | # Foundation; either version 2 of the License, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
14 | # details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along with | |
17 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | # | |
20 | __all__ = ['CHM', 'CHMServer', 'mod_chm'] | |
21 | __version__ = '0.2.4' | |
22 | ||
23 | import sys, os, pkg_resources | |
24 | ||
25 | # Return codes | |
26 | OK = 0 | |
27 | ERROR = 1 | |
28 | ||
29 | # Global variables | |
30 | EXTRACT = 1 # Extract CHM content | |
31 | HTTPSERVER = 2 # Act as standalone HTTP server | |
32 | DUMPHTML = 3 # Dump CHM file as plain text | |
33 | CHM2TXT = 4 # Convert CHM file into Single Text file | |
34 | CHM2HTML = 5 # Convert CHM file into Single HTML file | |
35 | CHM2PDF = 6 # Convert CHM file into PDF Document | |
36 | #CHM2PS = 7 # Convert CHM file into PDF Document | |
37 | ||
38 | # Special characters | |
39 | COMMASPACE = ', ' | |
40 | LF = '\n' | |
41 | CR = '\r' | |
42 | ||
43 | # what config file to use - local or a system wide? | |
44 | user_config = os.path.join(os.path.expanduser('~'), '.arch.conf') | |
45 | if os.path.exists(user_config): | |
46 | config = user_config | |
47 | else: | |
48 | config = pkg_resources.resource_filename('archmod', 'arch.conf') | |
49 | ||
50 | # Miscellaneous auxiliary functions | |
51 | def message(code=OK, msg=''): | |
52 | outfp = sys.stdout | |
53 | if code == ERROR: | |
54 | outfp = sys.stderr | |
55 | if msg: | |
56 | print >> outfp, msg | |
57 | ||
58 | def file2dir(filename): | |
59 | """Convert file filename.chm to filename_html directory""" | |
60 | dirname = filename.rsplit('.', 1)[0] + '_' + 'html' | |
61 | return dirname | |
62 | ||
63 | def output_format(mode): | |
64 | if mode == 'text': | |
65 | return CHM2TXT | |
66 | elif mode == 'html': | |
67 | return CHM2HTML | |
68 | elif mode == 'pdf': | |
69 | return CHM2PDF | |
70 | # elif mode == 'ps': | |
71 | # return CHM2PS | |
72 | else: | |
73 | sys.exit('Invalid output file format: %s' % mode) | |
74 | ||
75 | def output_file(filename, mode): | |
76 | """Convert filename.chm to filename.output""" | |
77 | if mode == CHM2TXT: | |
78 | file_ext = 'txt' | |
79 | elif mode == CHM2HTML: | |
80 | file_ext = 'html' | |
81 | elif mode == CHM2PDF: | |
82 | file_ext = 'pdf' | |
83 | # elif mode == CHM2PS: | |
84 | # file_ext = 'ps' | |
85 | else: | |
86 | file_ext = 'output' | |
87 | output_filename = filename.rsplit('.', 1)[0] + '.' + file_ext | |
88 | return output_filename | |
89 | ||
90 | # Our own listdir method :) | |
91 | def listdir(dir): | |
92 | def f(res, dir, files): | |
93 | for e in files: | |
94 | d = '/'.join(dir.split('/')[1:]) | |
95 | if d: d += '/' | |
96 | res.append(d + e) | |
97 | res = [] | |
98 | os.path.walk(dir, f, res) | |
99 | return res |
0 | # Directory for templates, all files in that directory will be parsed | |
1 | # and <%.+%> occurencies will be replaced with values from that | |
2 | # file. For example, <%title%>, will be substituted by value of title | |
3 | # variable. | |
4 | # There is also some special variables, which have default values: | |
5 | # contents - list, which represents chm file contents and deftopic - | |
6 | # name of default page. | |
7 | from os.path import basename, join | |
8 | import pkg_resources | |
9 | ||
10 | templates_dir = pkg_resources.resource_filename('archmod', 'templates/') | |
11 | ||
12 | # Directory with icons | |
13 | icons_dir = join(templates_dir, 'icons') | |
14 | ||
15 | # List of auxiliary files, stored inside CHM file. | |
16 | # Those files would not be extracted. | |
17 | auxes = ('/#IDXHDR', '/#ITBITS', '/#STRINGS', '/#SYSTEM', '/#TOPICS', | |
18 | '/#URLSTR', '/#URLTBL', '/#WINDOWS', '/$FIftiMain', '/$OBJINST', | |
19 | '/$WWAssociativeLinks', '/$WWKeywordLinks', ':') | |
20 | ||
21 | # Title. That is value, which you want to see in browser title. | |
22 | # 'sourcename' is the name of source file. | |
23 | title = basename(sourcename) | |
24 | ||
25 | # Background and foreground colors for header. | |
26 | bcolor = '#63baff' | |
27 | fcolor = 'white' | |
28 | ||
29 | # Filenames inside chm stored in utf-8, but links can be in some | |
30 | # national codepage. If you set fs_encoding such links would be | |
31 | # converted to it. | |
32 | # | |
33 | # Default: fs_encoding = 'utf-8' | |
34 | fs_encoding = 'utf-8' | |
35 | ||
36 | # If your filesystem is case-sensitive, links in the html can point to | |
37 | # files that have differences in the case you need to set | |
38 | # filename_case to 1 in that case :-) | |
39 | # | |
40 | # Default: filename_case=1 | |
41 | filename_case = 1 | |
42 | ||
43 | # If you want to add javascript code for restore framing to every | |
44 | # page, set addframing. | |
45 | # | |
46 | # Default: restore_framing=1 | |
47 | restore_framing = 1 | |
48 | ||
49 | # Path to htmldoc executable | |
50 | # | |
51 | htmldoc_exec = '/usr/bin/htmldoc' | |
52 | ||
53 | # CHM2TEXT converting. Use following command to convert CHM content to plain | |
54 | # text file. Make sure that below apps are available on your system. | |
55 | #chmtotext = 'lynx -dump -stdin' | |
56 | chmtotext = '/usr/bin/elinks -dump' | |
57 | ||
58 | # CHM2HTML converting. Use following command to convert CHM content to a single | |
59 | # HTML file. Make sure that htmldoc is available on your system. | |
60 | chmtohtml = '-t html -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --linkstyle underline --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow --quiet' | |
61 | ||
62 | # CHM2PDF converting. Use following command to convert CHM content to a single | |
63 | # PDF file. Make sure that htmldoc is available on your system. | |
64 | chmtopdf = '-t pdf14 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle plain --size Universal --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --links --embedfonts --pagemode outline --pagelayout single --firstpage c1 --pageeffect none --pageduration 10 --effectduration 1.0 --no-encryption --permissions all --owner-password "" --user-password "" --browserwidth 680 --no-strict --no-overflow --quiet' | |
65 | ||
66 | # CHM2PS converting. Use following command to convert CHM content to a single | |
67 | # PostScript file. Make sure that htmldoc is available on your system. | |
68 | #chmtops = '-t ps2 -f "%(output)s" --book %(toc)s --no-numbered --toctitle "Table of Contents" --title --textcolor "#000000" --linkcolor "#0000ff" --linkstyle underline --size A4 --left 1.00in --right 0.50in --top 0.50in --bottom 0.50in --header .t. --header1 ... --footer h.1 --nup 1 --tocheader .t. --tocfooter ..i --portrait --color --no-pscommands --no-xrxcomments --compression=1 --jpeg=0 --fontsize 11.0 --fontspacing 1.2 --headingfont Helvetica --bodyfont Times --headfootsize 11.0 --headfootfont Helvetica --charset iso-8859-1 --browserwidth 680 --no-strict --no-overflow --quiet' | |
69 | ||
70 | # Maximum Table of Content levels for htmldoc utility. | |
71 | # | |
72 | # Default: maxtoclvl = 4 | |
73 | maxtoclvl = 4 |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # | |
5 | # This program is free software; you can redistribute it and/or modify it under | |
6 | # the terms of the GNU General Public License as published by the Free Software | |
7 | # Foundation; either version 2 of the License, or (at your option) any later | |
8 | # version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
12 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
13 | # details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License along with | |
16 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
17 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
18 | # | |
19 | ||
20 | """CHM to Text converter (using external tool: lynx or elinks)""" | |
21 | ||
22 | import sys | |
23 | import signal | |
24 | from subprocess import Popen, PIPE | |
25 | ||
26 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) | |
27 | ||
28 | ||
29 | def chmtotext(input, cmd, output=sys.stdout): | |
30 | """CHM to Text converter""" | |
31 | proc = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True) | |
32 | proc.stdin.write(input) | |
33 | print >> output, proc.communicate()[0] |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free Software | |
8 | # Foundation; either version 2 of the License, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
14 | # details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along with | |
17 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | # | |
20 | ||
21 | """arCHMage -- extensible reader and decompiler for files in the CHM format. | |
22 | ||
23 | Usage: %(program)s [options] <chmfile> [destdir|destfile] | |
24 | Where: | |
25 | ||
26 | -x / --extract | |
27 | Extracts CHM file into specified directory. If destination | |
28 | directory is omitted, than the new one will be created based | |
29 | on name of CHM file. This options is by default. | |
30 | ||
31 | -c format | |
32 | --convert=format | |
33 | Convert CHM file into specified file format. If destination | |
34 | file is omitted, than the new one will be created based | |
35 | on name of CHM file. Available formats: | |
36 | ||
37 | html - Single HTML file | |
38 | text - Plain Text file | |
39 | pdf - Adobe PDF file format | |
40 | ||
41 | -p number | |
42 | --port=number | |
43 | Acts as HTTP server on specified port number, so you can read | |
44 | CHM file with your favorite browser. You can specify a directory | |
45 | with decompressed content. | |
46 | ||
47 | -d / --dump | |
48 | Dump HTML data from CHM file into standard output. | |
49 | ||
50 | -V / --version | |
51 | Print version number and exit. | |
52 | ||
53 | -h / --help | |
54 | Print this text and exit. | |
55 | """ | |
56 | ||
57 | import os, sys | |
58 | import getopt | |
59 | ||
60 | import archmod | |
61 | from archmod.CHM import CHMFile, CHMDir | |
62 | from archmod.CHMServer import CHMServer | |
63 | ||
64 | ||
65 | program = sys.argv[0] | |
66 | ||
67 | def usage(code=archmod.OK, msg=''): | |
68 | """Show application usage and quit""" | |
69 | archmod.message(code, __doc__ % globals()) | |
70 | archmod.message(code, msg) | |
71 | sys.exit(code) | |
72 | ||
73 | ||
74 | def parseargs(): | |
75 | try: | |
76 | opts, args = getopt.getopt(sys.argv[1:], 'xc:dp:Vh', | |
77 | ['extract', 'convert=', 'dump', 'port=', 'version', 'help']) | |
78 | except getopt.error, msg: | |
79 | usage(archmod.ERROR, msg) | |
80 | ||
81 | class Options: | |
82 | mode = None # EXTRACT or HTTPSERVER or other | |
83 | port = None # HTTP port number | |
84 | chmfile = None # CHM File to view/extract | |
85 | output = None # Output file or directory | |
86 | ||
87 | options = Options() | |
88 | ||
89 | for opt, arg in opts: | |
90 | if opt in ('-h', '--help'): | |
91 | usage() | |
92 | elif opt in ('-V', '--version'): | |
93 | archmod.message(archmod.OK, archmod.__version__) | |
94 | sys.exit(archmod.OK) | |
95 | elif opt in ('-p', '--port'): | |
96 | if options.mode is not None: | |
97 | sys.exit('-x and -p or -c are mutually exclusive') | |
98 | options.mode = archmod.HTTPSERVER | |
99 | try: | |
100 | options.port = int(arg) | |
101 | except ValueError, msg: | |
102 | sys.exit('Invalid port number: %s' % msg) | |
103 | elif opt in ('-c', '--convert'): | |
104 | if options.mode is not None: | |
105 | sys.exit('-x and -p or -c are mutually exclusive') | |
106 | options.mode = archmod.output_format(str(arg)) | |
107 | elif opt in ('-x', '--extract'): | |
108 | if options.mode is not None: | |
109 | sys.exit('-x and -p or -c are mutually exclusive') | |
110 | options.mode = archmod.EXTRACT | |
111 | elif opt in ('-d', '--dump'): | |
112 | if options.mode is not None: | |
113 | sys.exit('-d should be used without any other options') | |
114 | options.mode = archmod.DUMPHTML | |
115 | else: | |
116 | assert False, (opt, arg) | |
117 | ||
118 | # Sanity checks | |
119 | if options.mode is None: | |
120 | # Set default option | |
121 | options.mode = archmod.EXTRACT | |
122 | ||
123 | if not args: | |
124 | sys.exit('No CHM file was specified!') | |
125 | else: | |
126 | # Get CHM file name from command line | |
127 | options.chmfile = args.pop(0) | |
128 | ||
129 | # if CHM content should be extracted | |
130 | if options.mode == archmod.EXTRACT: | |
131 | if not args: | |
132 | options.output = archmod.file2dir(options.chmfile) | |
133 | else: | |
134 | # get output directory from command line | |
135 | options.output = args.pop(0) | |
136 | # or converted into another file format | |
137 | elif options.mode in (archmod.CHM2TXT, archmod.CHM2HTML, archmod.CHM2PDF): | |
138 | if not args: | |
139 | options.output = archmod.output_file(options.chmfile, options.mode) | |
140 | else: | |
141 | # get output filename from command line | |
142 | options.output = args.pop(0) | |
143 | ||
144 | # Any other arguments are invalid | |
145 | if args: | |
146 | sys.exit('Invalid arguments: ' + archmod.COMMASPACE.join(args)) | |
147 | ||
148 | return options | |
149 | ||
150 | ||
151 | def main(): | |
152 | options = parseargs() | |
153 | if not os.path.exists(options.chmfile): | |
154 | sys.exit('No such file: %s' % options.chmfile) | |
155 | ||
156 | # Check where is argument a CHM file or directory with decompressed | |
157 | # content. Depending on results make 'source' instance of CHMFile or | |
158 | # CHMDir class. | |
159 | source = os.path.isfile(options.chmfile) and \ | |
160 | CHMFile(options.chmfile) or CHMDir(options.chmfile) | |
161 | ||
162 | if options.mode == archmod.HTTPSERVER: | |
163 | CHMServer(source, port=options.port).run() | |
164 | elif options.mode == archmod.DUMPHTML: | |
165 | source.dump_html() | |
166 | elif options.mode == archmod.CHM2TXT: | |
167 | if os.path.exists(options.output): | |
168 | sys.exit('%s is already exists' % options.output) | |
169 | source.chm2text(open(options.output, 'w')) | |
170 | elif options.mode in (archmod.CHM2HTML, archmod.CHM2PDF): | |
171 | source.htmldoc(options.output, options.mode) | |
172 | elif options.mode == archmod.EXTRACT: | |
173 | source.extract(options.output) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2009 Basil Shubin <bashu@users.sourceforge.net> | |
4 | # | |
5 | # This program is free software; you can redistribute it and/or modify it under | |
6 | # the terms of the GNU General Public License as published by the Free Software | |
7 | # Foundation; either version 2 of the License, or (at your option) any later | |
8 | # version. | |
9 | # | |
10 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
12 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
13 | # details. | |
14 | # | |
15 | # You should have received a copy of the GNU General Public License along with | |
16 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
17 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
18 | # | |
19 | ||
20 | """Generic converter function""" | |
21 | ||
22 | import os | |
23 | import string | |
24 | import tempfile | |
25 | import subprocess | |
26 | import archmod | |
27 | ||
28 | ||
29 | def htmldoc(input, cmd, options, toclevels, output): | |
30 | """CHM to other format converter | |
31 | ||
32 | input - list of input html files | |
33 | cmd - full path to htmldoc command | |
34 | options - htmldoc options from arch.conf | |
35 | toclevels - number of ToC levels as htmldoc option | |
36 | output - output file (single html, ps, pdf and etc) | |
37 | """ | |
38 | if toclevels: | |
39 | toc = ('--toclevels %s' % (toclevels)) | |
40 | else: | |
41 | toc = ('--no-toc') | |
42 | options = options % {'output' : output, 'toc' : toc} | |
43 | if input: | |
44 | # Create a htmldoc file for batch processing | |
45 | f = tempfile.NamedTemporaryFile(delete=False) | |
46 | f.write('#HTMLDOC 1.8.27' + archmod.LF) | |
47 | f.write(options + archmod.LF) | |
48 | f.write(string.join(input, archmod.LF)) | |
49 | f.close() | |
50 | # Prepare command line to execute | |
51 | command = '%s --batch %s' % (cmd, f.name) | |
52 | subprocess.call(command, shell=True) | |
53 | # Unlink temporary htmldoc file | |
54 | os.unlink(f.name) |
0 | # -*- coding: utf-8 -*- | |
1 | # | |
2 | # archmage -- CHM decompressor | |
3 | # Copyright (c) 2003 Eugeny Korekin <aaaz@users.sourceforge.net> | |
4 | # Copyright (c) 2005-2009 Basil Shubin <bashu@users.sourceforge.net> | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it under | |
7 | # the terms of the GNU General Public License as published by the Free Software | |
8 | # Foundation; either version 2 of the License, or (at your option) any later | |
9 | # version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
14 | # details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along with | |
17 | # this program; if not, write to the Free Software Foundation, Inc., 51 Franklin | |
18 | # Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | # | |
20 | ||
21 | from mod_python import apache | |
22 | from mimetypes import guess_type | |
23 | from archmod.CHM import CHMFile | |
24 | ||
25 | chmfile = None | |
26 | chmname = None | |
27 | ||
28 | ||
29 | def handler(req): | |
30 | source = req.filename | |
31 | pagename = req.path_info | |
32 | ||
33 | global chmfile, chmname | |
34 | ||
35 | if chmname != source: | |
36 | chmfile = CHMFile(source) | |
37 | ||
38 | chmname = source | |
39 | ||
40 | if pagename: | |
41 | try: | |
42 | page = chmfile.get_entry(pagename) | |
43 | except: | |
44 | return apache.HTTP_NOT_FOUND | |
45 | ||
46 | if pagename == '/': | |
47 | mimetype = 'text/html' | |
48 | else: | |
49 | mimetype = guess_type(pagename)[0] or 'application/octet-stream' | |
50 | ||
51 | req.content_type = mimetype | |
52 | req.send_http_header() | |
53 | ||
54 | req.write(page) | |
55 | else: | |
56 | mimetype = 'application/chm' | |
57 | req.content_type = mimetype | |
58 | req.send_http_header() | |
59 | file = open(source, 'rb') | |
60 | while 1: | |
61 | tmp = file.read(4096) | |
62 | if len(tmp) == 0: | |
63 | break | |
64 | req.write(tmp) | |
65 | return apache.OK |
0 | <html> | |
1 | <head> | |
2 | <title><%title%></title> | |
3 | <LINK rel="Stylesheet" type="text/css" href="arch_css.css"> | |
4 | </head> | |
5 | ||
6 | <body onload="setInterval('getLoc()', 500);"> | |
7 | <script> | |
8 | var lastDoc; | |
9 | var contents = <%contents%>; | |
10 | ||
11 | var w=window,d=document | |
12 | var icons={'0' : 'icons/0.gif','1' : 'icons/90.gif', | |
13 | '2' : 'icons/91.gif', '3' : 'icons/92.gif', '4' : 'icons/99.gif', | |
14 | '18' : 'icons/93.gif', '19' : 'icons/94.gif', '20' : 'icons/97.gif', | |
15 | '26' : 'icons/95.gif', '27' : 'icons/96.gif', '28' : 'icons/98.gif'} | |
16 | ||
17 | var dhtml=true | |
18 | try{if(d.body.innerHTML.length<=0)dhtml=false} | |
19 | catch(e){dhtml=false;} | |
20 | var tree=[]; | |
21 | ||
22 | get_element=d.all ? | |
23 | function(id){return d.all[id]} | |
24 | : | |
25 | function(id){return d.getElementById(id)} | |
26 | ||
27 | function get_img1(){ | |
28 | return icons[((this.childs.length ? 16 : 0)+(this.childs.length && this.opened ? 8 : 0)+(this.is_last()? 1 : 0)+(this.is_first()? 2 : 0)+2)] | |
29 | } | |
30 | function get_img2(){ | |
31 | n=this.cnt[2] | |
32 | if(n<9){ | |
33 | n=(this.opened ? ( n%2 ? parseInt(n)+1 : n ) : ( n%2 ? n : parseInt(n)-1 )) | |
34 | } | |
35 | return 'icons/'+n+'.gif' | |
36 | } | |
37 | function node(tree,n){ | |
38 | this.ind=tree.ind+1 | |
39 | this.cnt=tree.cnt[n+(this.ind ? 3 : 0)] | |
40 | if(!this.cnt)return | |
41 | this.tree=tree.tree | |
42 | this.parent=tree | |
43 | this.opened=!dhtml | |
44 | this.nind=this.tree.nodes.length | |
45 | this.tree.nodes[this.nind]=this | |
46 | tree.childs[n]=this | |
47 | this.childs=[] | |
48 | for(var i=0;i < this.cnt.length - 2;i++) | |
49 | new node(this,i) | |
50 | this.get_img1=get_img1 | |
51 | this.get_img2=get_img2 | |
52 | this.open=open | |
53 | this.select=select | |
54 | this.init=init | |
55 | this.is_last=function(){ | |
56 | return n==this.parent.childs.length - 1 | |
57 | } | |
58 | this.is_first=function(){ | |
59 | return(this.ind==0)&&(n==0)&&(!this.is_last()) | |
60 | } | |
61 | } | |
62 | ||
63 | function open(){ | |
64 | var childs=[] | |
65 | var el=get_element('divCont'+this.nind) | |
66 | if(!el)return | |
67 | if(!dhtml){ | |
68 | d.write(childs.join('')) | |
69 | for(var i=0;i < this.childs.length;i++){ | |
70 | d.write(this.childs[i].init()) | |
71 | this.childs[i].open() | |
72 | } | |
73 | } | |
74 | else{ | |
75 | if(!el.innerHTML){ | |
76 | for(var i=0;i < this.childs.length;i++) | |
77 | childs[i]=this.childs[i].init() | |
78 | el.innerHTML=childs.join('') | |
79 | } | |
80 | el.style.display=(this.opened ? 'none' : 'block') | |
81 | this.opened=!this.opened | |
82 | var img1=d.images['img1_'+this.nind],img2=d.images['img2_'+this.nind] | |
83 | if(img1)img1.src=this.get_img1() | |
84 | if(img2)img2.src=this.get_img2() | |
85 | } | |
86 | } | |
87 | ||
88 | ||
89 | function select(nind){ | |
90 | if(!nind){ | |
91 | var sel=this.tree.sel | |
92 | this.tree.sel=this | |
93 | if(sel)sel.select(true) | |
94 | } | |
95 | var img2=d.images['img2_'+this.nind] | |
96 | if(img2)img2.src=this.get_img2() | |
97 | get_element('el'+this.nind).style.fontWeight=nind ? 'normal' : 'bold' | |
98 | return Boolean(this.cnt[1]) | |
99 | } | |
100 | ||
101 | function init(){ | |
102 | var temp=[],par=this.parent | |
103 | for(var i=this.ind;i>0;i--){ | |
104 | temp[i]='<img src="'+icons[par.is_last()? 0 : 1]+'" border="0" align="absbottom">' | |
105 | par=par.parent | |
106 | } | |
107 | r='<table cellpadding="0" cellspacing="0" border="0">' | |
108 | r+='<tr><td nowrap>' | |
109 | r+=temp.join('') | |
110 | r+=(this.childs.length ?(!dhtml ? '' : '<a href="javascript: tree.toggle('+this.nind+')" >')+'<img src="'+this.get_img1()+'" border="0" align="absbottom" name="img1_'+this.nind+'">'+(!dhtml ? '' : '</a>'): '<img src="'+this.get_img1()+'" border="0" align="absbottom">') | |
111 | r+='<a href="'+this.cnt[1]+'" target="'+'content'+'"'+' title="'+this.cnt[0]+'" onclick="return tree.select('+this.nind+')" '+(!dhtml ? '' : ' ondblclick="tree.toggle('+this.nind+')"')+' class="small" id="el'+this.nind+'"><img src="'+this.get_img2()+'" border="0" align="absbottom" name="img2_'+this.nind+'"> '+this.cnt[0]+'</a>' | |
112 | r+='</td></tr></table>' | |
113 | r+=(this.childs.length ? '<div id="divCont'+this.nind+'" style="display:none"></div>' : '') | |
114 | return r | |
115 | } | |
116 | ||
117 | function draw_contents(cnt){ | |
118 | tree=this; | |
119 | tree.cnt=cnt; | |
120 | tree.tree=this; | |
121 | tree.nodes=[]; | |
122 | tree.sel=null; | |
123 | tree.ind=-1; | |
124 | ||
125 | tree.select=function(i){ | |
126 | return tree.nodes[i].select(); | |
127 | }; | |
128 | tree.toggle=function(i){ | |
129 | tree.nodes[i].open() | |
130 | }; | |
131 | tree.childs=[] | |
132 | for(var i=0;i<cnt.length;i++){ | |
133 | new node(tree,i) | |
134 | } | |
135 | tree.nind=0; | |
136 | ||
137 | for(var i=0;i < tree.childs.length;i++){ | |
138 | d.write(tree.childs[i].init()); | |
139 | if(!dhtml)tree.childs[i].open(); | |
140 | } | |
141 | } | |
142 | ||
143 | ||
144 | function getLoc(){ | |
145 | var doc = ""+parent.frames[1].location; | |
146 | if(doc != lastDoc){ | |
147 | var keyVals = new Array(); | |
148 | keyVals = doc.split("\/"); | |
149 | var targetPage = ""+keyVals[(keyVals.length-1)]; | |
150 | ||
151 | if(targetPage.indexOf("\#") > 0){ | |
152 | targetPage = targetPage.substr(0,targetPage.indexOf("\#")); | |
153 | } | |
154 | ||
155 | nodeCount = 0; | |
156 | while( (""+tree.nodes[nodeCount].cnt[1]).lastIndexOf(targetPage) < 0){ | |
157 | nodeCount++; | |
158 | } | |
159 | parentNode = tree.nodes[nodeCount].parent; | |
160 | if(parentNode != tree && parentNode.opened == false){ | |
161 | parentNode.open(); | |
162 | } | |
163 | tree.nodes[nodeCount].select(); | |
164 | lastDoc = doc; | |
165 | } | |
166 | } | |
167 | new draw_contents(contents); | |
168 | </script> | |
169 | </body> | |
170 | </html> |
0 | <html> | |
1 | <head> | |
2 | <title><%title%></title> | |
3 | ||
4 | <script> | |
5 | var qs=location.search.substr(1) | |
6 | var A=qs.split("&") | |
7 | var B=null | |
8 | var F="<%deftopic%>" | |
9 | for(var i=0;i<A.length;i++){B=A[i].split("=");A[i]=[B[0],B[1]]} | |
10 | for(var j=0;j<A.length;j++){if(A[j][0]=='page'){ F=A[j][1];break}} | |
11 | </script > | |
12 | </head> | |
13 | <script> | |
14 | document.write('<frameset cols="200,*" bordercolor="<%bcolor%>" frameborder="yes" framespacing="2" >') | |
15 | document.write('<frame name="toc" src="arch_contents.html">') | |
16 | document.write('<frame name="content" src="'+F+'" >') | |
17 | document.write('</frameset>'); | |
18 | </script> | |
19 | <noscript> | |
20 | <frameset cols="200,*" bordercolor="<%bcolor%>" frameborder="yes" framespacing="2" > | |
21 | <frame name="toc" src="arch_contents.html" > | |
22 | <frame name="content" src="<%deftopic%>"> | |
23 | </frameset> | |
24 | </noscript> | |
25 | </html> |
0 | <html> | |
1 | <head> | |
2 | <title><%title%></title> | |
3 | <LINK rel="Stylesheet" type="text/css" href="arch_css.css"> | |
4 | </head> | |
5 | <body bgcolor="<%bcolor%>"> | |
6 | <table class='htable' cellpadding="0" cellspacing="0" width="100%"><td> | |
7 | <td align="center" width="100%"> | |
8 | <b><font size="large" color="<%fcolor%>"><%title%></font></b> | |
9 | </table> | |
10 | </body> | |
11 | </html> |
0 | <html> | |
1 | <head> | |
2 | <script>var pageid="";</script> | |
3 | ||
4 | <title><%title%></title> | |
5 | ||
6 | <script> | |
7 | function IsOpera(){return navigator.userAgent.indexOf("Opera")>-1} | |
8 | var qs=location.search.substr(1); | |
9 | var A=qs.split("&") | |
10 | var B=null | |
11 | var F="<%deftopic%>"; | |
12 | for(var i=0;i<A.length;i++){ | |
13 | B=A[i].split("=") | |
14 | A[i]=[B[0],B[1]] | |
15 | } | |
16 | for(var j=0;j<A.length;j++){ | |
17 | if(A[j][0]=='page'){ | |
18 | F=A[j][1] | |
19 | break | |
20 | } | |
21 | } | |
22 | if (IsOpera()) F = '';</script> | |
23 | </head> | |
24 | ||
25 | <script> | |
26 | document.write('<frameset rows="30,*" frameborder="no" framespacing="0" border="0" >') | |
27 | document.write('<frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no" >') | |
28 | if(F!='')F='?page='+F | |
29 | document.write('<frame name="main" src="arch_frameset.html'+F+'">') | |
30 | document.write('</frameset>') | |
31 | </script> | |
32 | <noscript> | |
33 | <frameset rows="30,*" frameborder="no" framespacing="0" border="0" > | |
34 | <frame name="header" src="arch_header.html" frameborder="no" noresize="yes" scrolling="no"> | |
35 | <frame name="main" src="arch_frameset.html" > | |
36 | </frameset> | |
37 | </noscript> | |
38 | </html> |
0 | 0 | #!/usr/bin/env python |
1 | 1 | |
2 | 2 | from setuptools import setup, find_packages |
3 | import version | |
4 | 3 | |
5 | 4 | long_desc='''arCHMage is a reader and decompressor for CHM format''' |
6 | 5 | |
15 | 14 | |
16 | 15 | setup( |
17 | 16 | name='archmage', |
18 | version=version.getVersion(), | |
17 | version='0.4.0', | |
19 | 18 | description='CHM decompressor', |
20 | 19 | maintainer='Mikhail Gusarov', |
21 | 20 | maintainer_email='dottedmag@dottedmag.net', |
27 | 26 | packages=find_packages(), |
28 | 27 | install_requires=[ |
29 | 28 | 'pychm', |
30 | 'BeautifulSoup', | |
29 | 'beautifulsoup4', | |
30 | 'sgmllib3k', | |
31 | 31 | ], |
32 | 32 | entry_points={ |
33 | 'console_scripts': ['archmage = archmod.cli:main'], | |
33 | 'console_scripts': ['archmage = archmage.cli:main'], | |
34 | 34 | }, |
35 | 35 | package_data={ |
36 | 'archmod': ['*.conf', 'templates/*.html', 'templates/*.css', | |
36 | 'archmage': ['*.conf', 'templates/*.html', 'templates/*.css', | |
37 | 37 | 'templates/icons/*.gif'], |
38 | 38 | } |
39 | 39 | ) |
0 | # -*- coding: utf-8 -*- | |
1 | ||
2 | """Calculates the current version number. | |
3 | ||
4 | If possible, uses output of “git describe” modified to conform to the | |
5 | visioning scheme that setuptools uses (see PEP 386). Releases must be | |
6 | labelled with annotated tags (signed tags are annotated) of the following | |
7 | format: | |
8 | ||
9 | v<num>(.<num>)+ [ {a|b|c|rc} <num> (.<num>)* ] | |
10 | ||
11 | If “git describe” returns an error (likely because we're in an unpacked copy | |
12 | of a release tarball, rather than a git working copy), or returns a tag that | |
13 | does not match the above format, version is read from RELEASE-VERSION file. | |
14 | ||
15 | To use this script, simply import it your setup.py file, and use the results | |
16 | of getVersion() as your package version: | |
17 | ||
18 | import version | |
19 | setup( | |
20 | version=version.getVersion(), | |
21 | . | |
22 | . | |
23 | . | |
24 | ) | |
25 | ||
26 | This will automatically update the RELEASE-VERSION file. The RELEASE-VERSION | |
27 | file should *not* be checked into git but it *should* be included in sdist | |
28 | tarballs (as should version.py file). To do this, run: | |
29 | ||
30 | echo include RELEASE-VERSION version.py >>MANIFEST.in | |
31 | echo RELEASE-VERSION >>.gitignore | |
32 | ||
33 | With that setup, a new release can be labelled by simply invoking: | |
34 | ||
35 | git tag -s v1.0 | |
36 | """ | |
37 | ||
38 | __author__ = ('Douglas Creager <dcreager@dcreager.net>', | |
39 | 'Michal Nazarewicz <mina86@mina86.com>') | |
40 | __license__ = 'This file is placed into the public domain.' | |
41 | __maintainer__ = 'Michal Nazarewicz' | |
42 | __email__ = 'mina86@mina86.com' | |
43 | ||
44 | __all__ = ('getVersion') | |
45 | ||
46 | ||
47 | import re | |
48 | import subprocess | |
49 | import sys | |
50 | ||
51 | ||
52 | RELEASE_VERSION_FILE = 'RELEASE-VERSION' | |
53 | ||
54 | # http://www.python.org/dev/peps/pep-0386/ | |
55 | _PEP386_SHORT_VERSION_RE = r'\d+(?:\.\d+)+(?:(?:[abc]|rc)\d+(?:\.\d+)*)?' | |
56 | _PEP386_VERSION_RE = r'^%s(?:\.post\d+)?(?:\.dev\d+)?$' % ( | |
57 | _PEP386_SHORT_VERSION_RE) | |
58 | _GIT_DESCRIPTION_RE = r'^(?P<ver>%s)-(?P<commits>\d+)-g(?P<sha>[\da-f]+)$' % ( | |
59 | _PEP386_SHORT_VERSION_RE) | |
60 | ||
61 | ||
62 | def readGitVersion(): | |
63 | try: | |
64 | proc = subprocess.Popen(('git', 'describe', '--long', | |
65 | '--match', '[0-9]*.*'), | |
66 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
67 | data, _ = proc.communicate() | |
68 | if proc.returncode: | |
69 | return None | |
70 | ver = data.splitlines()[0].strip() | |
71 | except: | |
72 | return None | |
73 | ||
74 | if not ver: | |
75 | return None | |
76 | m = re.search(_GIT_DESCRIPTION_RE, ver) | |
77 | if not m: | |
78 | sys.stderr.write('version: git description (%s) is invalid, ' | |
79 | 'ignoring\n' % ver) | |
80 | return None | |
81 | ||
82 | commits = int(m.group('commits')) | |
83 | if not commits: | |
84 | return m.group('ver') | |
85 | else: | |
86 | return '%s.post%d.dev%d' % ( | |
87 | m.group('ver'), commits, int(m.group('sha'), 16)) | |
88 | ||
89 | ||
90 | def readReleaseVersion(): | |
91 | try: | |
92 | fd = open(RELEASE_VERSION_FILE) | |
93 | try: | |
94 | ver = fd.readline().strip() | |
95 | finally: | |
96 | fd.close() | |
97 | if not re.search(_PEP386_VERSION_RE, ver): | |
98 | sys.stderr.write('version: release version (%s) is invalid, ' | |
99 | 'will use it anyway\n' % ver) | |
100 | return ver | |
101 | except: | |
102 | return None | |
103 | ||
104 | ||
105 | def writeReleaseVersion(version): | |
106 | fd = open(RELEASE_VERSION_FILE, 'w') | |
107 | fd.write('%s\n' % version) | |
108 | fd.close() | |
109 | ||
110 | ||
111 | def getVersion(): | |
112 | release_version = readReleaseVersion() | |
113 | version = readGitVersion() or release_version | |
114 | if not version: | |
115 | raise ValueError('Cannot find the version number') | |
116 | if version != release_version: | |
117 | writeReleaseVersion(version) | |
118 | return version | |
119 | ||
120 | ||
121 | if __name__ == '__main__': | |
122 | print(getVersion()) |