1 | 1 |
# -*- coding: utf-8 -*-
|
2 | 2 |
#
|
3 | 3 |
# dtrx -- Intelligently extract various archive types.
|
4 | |
# Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
|
5 | |
# Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
|
|
4 |
# Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
|
|
5 |
# Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
|
6 | 6 |
#
|
7 | 7 |
# This program is free software; you can redistribute it and/or modify it
|
8 | 8 |
# under the terms of the GNU General Public License as published by the
|
|
20 | 20 |
# Python 2.3 string methods: 'rfind', 'rindex', 'rjust', 'rstrip'
|
21 | 21 |
|
22 | 22 |
import errno
|
|
23 |
import fcntl
|
23 | 24 |
import logging
|
24 | 25 |
import mimetypes
|
25 | 26 |
import optparse
|
|
28 | 29 |
import shutil
|
29 | 30 |
import signal
|
30 | 31 |
import stat
|
|
32 |
import string
|
|
33 |
import struct
|
31 | 34 |
import subprocess
|
32 | 35 |
import sys
|
33 | 36 |
import tempfile
|
|
37 |
import termios
|
34 | 38 |
import textwrap
|
35 | 39 |
import traceback
|
36 | 40 |
|
|
39 | 43 |
except NameError:
|
40 | 44 |
from sets import Set as set
|
41 | 45 |
|
42 | |
VERSION = "6.4"
|
|
46 |
VERSION = "6.5"
|
43 | 47 |
VERSION_BANNER = """dtrx version %s
|
44 | |
Copyright ⓒ 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
|
45 | |
Copyright ⓒ 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
|
|
48 |
Copyright © 2006-2009 Brett Smith <brettcsmith@brettcsmith.org>
|
|
49 |
Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
|
46 | 50 |
|
47 | 51 |
This program is free software; you can redistribute it and/or modify it
|
48 | 52 |
under the terms of the GNU General Public License as published by the
|
|
167 | 171 |
return index
|
168 | 172 |
return None
|
169 | 173 |
|
|
174 |
def add_process(self, processes, command, stdin, stdout):
|
|
175 |
try:
|
|
176 |
processes.append(subprocess.Popen(command, stdin=stdin,
|
|
177 |
stdout=stdout,
|
|
178 |
stderr=self.stderr))
|
|
179 |
except OSError, error:
|
|
180 |
if error.errno == errno.ENOENT:
|
|
181 |
raise ExtractorUnusable("could not run %s" % (command[0],))
|
|
182 |
raise
|
|
183 |
|
170 | 184 |
def run_pipes(self, final_stdout=None):
|
171 | 185 |
if not self.pipes:
|
172 | 186 |
return
|
173 | 187 |
elif final_stdout is None:
|
174 | |
# FIXME: Buffering this might be dumb.
|
175 | |
final_stdout = tempfile.TemporaryFile()
|
|
188 |
final_stdout = open('/dev/null', 'w')
|
176 | 189 |
num_pipes = len(self.pipes)
|
177 | 190 |
last_pipe = num_pipes - 1
|
178 | 191 |
processes = []
|
|
185 | 198 |
stdout = final_stdout
|
186 | 199 |
else:
|
187 | 200 |
stdout = subprocess.PIPE
|
188 | |
try:
|
189 | |
processes.append(subprocess.Popen(command, stdin=stdin,
|
190 | |
stdout=stdout,
|
191 | |
stderr=self.stderr))
|
192 | |
except OSError, error:
|
193 | |
if error.errno == errno.ENOENT:
|
194 | |
raise ExtractorUnusable("could not run %s" % (command[0],))
|
195 | |
raise
|
|
201 |
self.add_process(processes, command, stdin, stdout)
|
196 | 202 |
self.exit_codes = [pipe.wait() for pipe in processes]
|
197 | 203 |
self.archive.close()
|
198 | 204 |
for index in range(last_pipe):
|
|
284 | 290 |
self.archive.close()
|
285 | 291 |
os.chdir(old_path)
|
286 | 292 |
|
287 | |
def get_filenames(self):
|
288 | |
self.pipe(self.list_pipe, "listing")
|
289 | |
self.run_pipes()
|
|
293 |
def get_filenames(self, internal=False):
|
|
294 |
if not internal:
|
|
295 |
self.pipe(self.list_pipe, "listing")
|
|
296 |
processes = []
|
|
297 |
stdin = self.archive
|
|
298 |
for command in [pipe[0] for pipe in self.pipes]:
|
|
299 |
self.add_process(processes, command, stdin, subprocess.PIPE)
|
|
300 |
stdin = processes[-1].stdout
|
|
301 |
get_output_line = processes[-1].stdout.readline
|
|
302 |
while True:
|
|
303 |
line = get_output_line()
|
|
304 |
if not line:
|
|
305 |
break
|
|
306 |
yield line.rstrip('\n')
|
|
307 |
self.exit_codes = [pipe.wait() for pipe in processes]
|
|
308 |
self.archive.close()
|
|
309 |
for process in processes:
|
|
310 |
process.stdout.close()
|
290 | 311 |
self.check_success(False)
|
291 | |
self.archive.seek(0, 0)
|
292 | |
while True:
|
293 | |
line = self.archive.readline()
|
294 | |
if not line:
|
295 | |
self.archive.close()
|
296 | |
return
|
297 | |
yield line.rstrip('\n')
|
298 | 312 |
|
299 | 313 |
|
300 | 314 |
class CompressionExtractor(BaseExtractor):
|
|
376 | 390 |
|
377 | 391 |
class DebExtractor(TarExtractor):
|
378 | 392 |
file_type = 'Debian package'
|
|
393 |
data_re = re.compile(r'^data\.tar\.[a-z0-9]+$')
|
379 | 394 |
|
380 | 395 |
def prepare(self):
|
381 | |
self.pipe(['ar', 'p', self.filename, 'data.tar.gz'],
|
382 | |
"data.tar.gz extraction")
|
383 | |
self.pipe(['zcat'], "data.tar.gz decompression")
|
|
396 |
self.pipe(['ar', 't', self.filename], "finding package data file")
|
|
397 |
for filename in self.get_filenames(internal=True):
|
|
398 |
if self.data_re.match(filename):
|
|
399 |
data_filename = filename
|
|
400 |
break
|
|
401 |
else:
|
|
402 |
raise ExtractorError(".deb contains no data.tar file")
|
|
403 |
self.archive.seek(0, 0)
|
|
404 |
self.pipes.pop()
|
|
405 |
# self.pipes = start_pipes
|
|
406 |
encoding = mimetypes.guess_type(data_filename)[1]
|
|
407 |
if not encoding:
|
|
408 |
raise ExtractorError("data.tar file has unrecognized encoding")
|
|
409 |
self.pipe(['ar', 'p', self.filename, data_filename],
|
|
410 |
"extracting data.tar from .deb")
|
|
411 |
self.pipe([self.decoders[encoding]], "decoding data.tar")
|
384 | 412 |
|
385 | 413 |
def basename(self):
|
386 | 414 |
pieces = os.path.basename(self.filename).split('_')
|
|
470 | 498 |
if fn_index is not None:
|
471 | 499 |
break
|
472 | 500 |
else:
|
473 | |
fn_index = line.rindex(' ') + 1
|
|
501 |
fn_index = string.rindex(line, ' ') + 1
|
474 | 502 |
elif fn_index is not None:
|
475 | 503 |
yield line[fn_index:]
|
476 | 504 |
self.archive.close()
|
|
660 | 688 |
|
661 | 689 |
class BasePolicy(object):
|
662 | 690 |
try:
|
663 | |
width = int(os.environ['COLUMNS'])
|
664 | |
except (KeyError, ValueError):
|
|
691 |
size = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ,
|
|
692 |
struct.pack("HHHH", 0, 0, 0, 0))
|
|
693 |
width = struct.unpack("HHHH", size)[1]
|
|
694 |
except IOError:
|
665 | 695 |
width = 80
|
666 | |
wrapper = textwrap.TextWrapper(width=width - 1)
|
667 | |
|
|
696 |
width = width - 1
|
|
697 |
choice_wrapper = textwrap.TextWrapper(width=width, initial_indent=' * ',
|
|
698 |
subsequent_indent=' ',
|
|
699 |
break_long_words=False)
|
|
700 |
|
668 | 701 |
def __init__(self, options):
|
669 | 702 |
self.current_policy = None
|
670 | 703 |
if options.batch:
|
|
672 | 705 |
else:
|
673 | 706 |
self.permanent_policy = None
|
674 | 707 |
|
675 | |
def wrap(self, question, filename):
|
676 | |
# Note: This function assumes the filename is the first thing in the
|
677 | |
# question text, and that's the only place it appears.
|
678 | |
if len(self.wrapper.wrap(filename + ' a')) > 1:
|
679 | |
return [filename] + self.wrapper.wrap(question[3:])
|
680 | |
return self.wrapper.wrap(question % (filename,))
|
681 | |
|
682 | 708 |
def ask_question(self, question):
|
683 | |
question = question + self.choices
|
|
709 |
question = question + ["You can:"]
|
|
710 |
for choice in self.choices:
|
|
711 |
question.extend(self.choice_wrapper.wrap(choice))
|
684 | 712 |
while True:
|
685 | 713 |
print "\n".join(question)
|
686 | 714 |
try:
|
|
692 | 720 |
except KeyError:
|
693 | 721 |
print
|
694 | 722 |
|
|
723 |
def wrap(self, question, *args):
|
|
724 |
words = question.split()
|
|
725 |
for arg in args:
|
|
726 |
words[words.index('%s')] = arg
|
|
727 |
result = [words.pop(0)]
|
|
728 |
for word in words:
|
|
729 |
extend = '%s %s' % (result[-1], word)
|
|
730 |
if len(extend) > self.width:
|
|
731 |
result.append(word)
|
|
732 |
else:
|
|
733 |
result[-1] = extend
|
|
734 |
return result
|
|
735 |
|
695 | 736 |
def __cmp__(self, other):
|
696 | 737 |
return cmp(self.current_policy, other)
|
697 | 738 |
|
|
699 | 740 |
class OneEntryPolicy(BasePolicy):
|
700 | 741 |
answers = {'h': EXTRACT_HERE, 'i': EXTRACT_WRAP, 'r': EXTRACT_RENAME,
|
701 | 742 |
'': EXTRACT_WRAP}
|
702 | |
choices = ["You can:",
|
703 | |
" * extract it Inside another directory",
|
704 | |
" * extract it and Rename the directory",
|
705 | |
" * extract it Here"]
|
|
743 |
choice_template = ["extract the %s _I_nside a new directory named %s",
|
|
744 |
"extract the %s and _R_ename it %s",
|
|
745 |
"extract the %s _H_ere"]
|
706 | 746 |
prompt = "What do you want to do? (I/r/h) "
|
707 | 747 |
|
708 | 748 |
def __init__(self, options):
|
|
723 | 763 |
raise ValueError("bad value %s for default policy" % (default,))
|
724 | 764 |
|
725 | 765 |
def prep(self, archive_filename, extractor):
|
726 | |
question = self.wrap(("%%s contains one %s, but its name " +
|
727 | |
"doesn't match.") %
|
728 | |
(extractor.content_type,), archive_filename)
|
|
766 |
question = self.wrap(
|
|
767 |
"%s contains one %s but its name doesn't match.",
|
|
768 |
archive_filename, extractor.content_type)
|
729 | 769 |
question.append(" Expected: " + extractor.basename())
|
730 | 770 |
question.append(" Actual: " + extractor.content_name)
|
|
771 |
choice_vars = (extractor.content_type, extractor.basename())
|
|
772 |
self.choices = [text % choice_vars[:text.count('%s')]
|
|
773 |
for text in self.choice_template]
|
731 | 774 |
self.current_policy = (self.permanent_policy or
|
732 | 775 |
self.ask_question(question))
|
733 | 776 |
|
|
738 | 781 |
class RecursionPolicy(BasePolicy):
|
739 | 782 |
answers = {'o': RECURSE_ONCE, 'a': RECURSE_ALWAYS, 'n': RECURSE_NOT_NOW,
|
740 | 783 |
'v': RECURSE_NEVER, 'l': RECURSE_LIST, '': RECURSE_NOT_NOW}
|
741 | |
choices = ["You can:",
|
742 | |
" * Always extract included archives",
|
743 | |
" * extract included archives this Once",
|
744 | |
" * choose Not to extract included archives",
|
745 | |
" * neVer extract included archives",
|
746 | |
" * List included archives"]
|
|
784 |
choices = ["_A_lways extract included archives during this session",
|
|
785 |
"extract included archives this _O_nce",
|
|
786 |
"choose _N_ot to extract included archives this once",
|
|
787 |
"ne_V_er extract included archives during this session",
|
|
788 |
"_L_ist included archives"]
|
747 | 789 |
prompt = "What do you want to do? (a/o/N/v/l) "
|
748 | 790 |
|
749 | 791 |
def __init__(self, options):
|
|
758 | 800 |
if (self.permanent_policy is not None) or (archive_count == 0):
|
759 | 801 |
self.current_policy = self.permanent_policy or RECURSE_NOT_NOW
|
760 | 802 |
return
|
761 | |
question = self.wrap(("%%s contains %s other archive file(s), " +
|
762 | |
"out of %s file(s) total.") %
|
763 | |
(archive_count, extractor.file_count),
|
764 | |
current_filename)
|
|
803 |
question = self.wrap(
|
|
804 |
"%s contains %s other archive file(s), out of %s file(s) total.",
|
|
805 |
current_filename, archive_count, extractor.file_count)
|
765 | 806 |
if target == '.':
|
766 | 807 |
target = ''
|
767 | 808 |
included_root = extractor.included_root
|
|
839 | 880 |
for extension in ext_info.get('extensions', ()):
|
840 | 881 |
extension_map.setdefault(extension, []).append((ext_name, None))
|
841 | 882 |
|
842 | |
for mapping in (('tar', 'bzip2', 'tar.bz2'),
|
|
883 |
for mapping in (('tar', 'bzip2', 'tar.bz2', 'tbz2', 'tb2', 'tbz'),
|
843 | 884 |
('tar', 'gzip', 'tar.gz', 'tgz'),
|
|
885 |
('tar', 'lzma', 'tar.lzma', 'tlz'),
|
|
886 |
('tar', 'compress', 'tar.Z', 'taz'),
|
844 | 887 |
('compress', 'gzip', 'Z', 'gz'),
|
845 | 888 |
('compress', 'bzip2', 'bz2'),
|
846 | 889 |
('compress', 'lzma', 'lzma')):
|
|
935 | 978 |
self.options = options
|
936 | 979 |
self.filenames = filenames
|
937 | 980 |
self.target = None
|
|
981 |
self.do_print = False
|
938 | 982 |
|
939 | 983 |
def report(self, function, *args):
|
940 | 984 |
try:
|
|
944 | 988 |
logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
|
945 | 989 |
return error
|
946 | 990 |
|
|
991 |
def show_filename(self, filename):
|
|
992 |
if len(self.filenames) < 2:
|
|
993 |
return
|
|
994 |
elif self.do_print:
|
|
995 |
print
|
|
996 |
else:
|
|
997 |
self.do_print = True
|
|
998 |
print "%s:" % (filename,)
|
|
999 |
|
947 | 1000 |
|
948 | 1001 |
class ExtractionAction(BaseAction):
|
949 | 1002 |
handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
|
950 | 1003 |
BombHandler]
|
951 | |
|
952 | |
def __init__(self, options, filenames):
|
953 | |
BaseAction.__init__(self, options, filenames)
|
954 | |
self.did_print = False
|
955 | 1004 |
|
956 | 1005 |
def get_handler(self, extractor):
|
957 | 1006 |
if extractor.content_type in ONE_ENTRY_UNKNOWN:
|
|
966 | 1015 |
def show_extraction(self, extractor):
|
967 | 1016 |
if self.options.log_level > logging.INFO:
|
968 | 1017 |
return
|
969 | |
elif self.did_print:
|
970 | |
print
|
971 | |
else:
|
972 | |
self.did_print = True
|
973 | |
print "%s:" % (self.current_filename,)
|
|
1018 |
self.show_filename(self.current_filename)
|
974 | 1019 |
if extractor.contents is None:
|
975 | 1020 |
print self.current_handler.target
|
976 | 1021 |
return
|
|
1006 | 1051 |
|
1007 | 1052 |
|
1008 | 1053 |
class ListAction(BaseAction):
|
1009 | |
def __init__(self, options, filenames):
|
1010 | |
BaseAction.__init__(self, options, filenames)
|
1011 | |
self.count = 0
|
1012 | |
|
1013 | |
def get_list(self, extractor):
|
1014 | |
# Note: The reason I'm getting all the filenames up front is
|
1015 | |
# because if we run into trouble partway through the archive, we'll
|
1016 | |
# try another extractor. So before we display anything we have to
|
1017 | |
# be sure this one is successful. We maybe don't have to be quite
|
1018 | |
# this conservative but this is the easy way out for now.
|
1019 | |
self.filelist = list(extractor.get_filenames())
|
1020 | |
|
1021 | |
def show_list(self, filename):
|
1022 | |
self.count += 1
|
1023 | |
if len(self.filenames) != 1:
|
1024 | |
if self.count > 1:
|
1025 | |
print
|
1026 | |
print "%s:" % (filename,)
|
1027 | |
print '\n'.join(self.filelist)
|
1028 | |
|
|
1054 |
def list_filenames(self, extractor, filename):
|
|
1055 |
# We get a line first to make sure there's not going to be some
|
|
1056 |
# basic error before we show what filename we're listing.
|
|
1057 |
filename_lister = extractor.get_filenames()
|
|
1058 |
try:
|
|
1059 |
first_line = filename_lister.next()
|
|
1060 |
except StopIteration:
|
|
1061 |
self.show_filename(filename)
|
|
1062 |
else:
|
|
1063 |
self.did_list = True
|
|
1064 |
self.show_filename(filename)
|
|
1065 |
print first_line
|
|
1066 |
for line in filename_lister:
|
|
1067 |
print line
|
|
1068 |
|
1029 | 1069 |
def run(self, filename, extractor):
|
1030 | |
return (self.report(self.get_list, extractor) or
|
1031 | |
self.report(self.show_list, filename))
|
|
1070 |
self.did_list = False
|
|
1071 |
error = self.report(self.list_filenames, extractor, filename)
|
|
1072 |
if error and self.did_list:
|
|
1073 |
logger.error("lister failed: ignore above listing for %s" %
|
|
1074 |
(filename,))
|
|
1075 |
return error
|
1032 | 1076 |
|
1033 | 1077 |
|
1034 | 1078 |
class ExtractorApplication(object):
|