# Copyright 2009 One Laptop Per Child
# Author: Sayamindu Dasgupta <sayamindu@laptop.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import zipfile
import tempfile
import os
import xml.etree.ElementTree as etree
import shutil
import navmap
import epubinfo
class _Epub(object):
def __init__(self, filepath):
self._filepath = filepath
self._zobject = None
self._opfpath = None
self._ncxpath = None
self._basepath = None
self._tempdir = tempfile.mkdtemp()
if not self._verify():
print 'Warning: This does not seem to be a valid epub file'
self._get_opf()
self._get_ncx()
ncxfile = self._zobject.open(self._ncxpath)
opffile = self._zobject.open(self._opfpath)
self._navmap = navmap.NavMap(opffile, ncxfile, self._basepath)
opffile = self._zobject.open(self._opfpath)
self._info = epubinfo.EpubInfo(opffile)
self._unzip()
def _unzip(self):
# This is broken upto python 2.7
#self._zobject.extractall(path = self._tempdir)
orig_cwd = os.getcwd()
os.chdir(self._tempdir)
for name in self._zobject.namelist():
# Some weird zip file entries start with a slash,
# and we don't want to write to the root directory
if name.startswith(os.path.sep):
name = name[1:]
if name.endswith(os.path.sep) or name.endswith('\\'):
os.makedirs(name)
else:
self._zobject.extract(name)
os.chdir(orig_cwd)
def _get_opf(self):
containerfile = self._zobject.open('META-INF/container.xml')
tree = etree.parse(containerfile)
root = tree.getroot()
for element in root.iterfind(
'.//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile'):
if element.get('media-type') == 'application/oebps-package+xml':
self._opfpath = element.get('full-path')
if self._opfpath.rpartition('/')[0]:
self._basepath = self._opfpath.rpartition('/')[0] + '/'
else:
self._basepath = ''
containerfile.close()
def _get_ncx(self):
opffile = self._zobject.open(self._opfpath)
tree = etree.parse(opffile)
root = tree.getroot()
spine = root.find('.//{http://www.idpf.org/2007/opf}spine')
tocid = spine.get('toc')
for element in root.iterfind('.//{http://www.idpf.org/2007/opf}item'):
if element.get('id') == tocid:
self._ncxpath = self._basepath + element.get('href')
opffile.close()
def _verify(self):
'''
Method to crudely check to verify that what we
are dealing with is a epub file or not
'''
if not os.path.exists(self._filepath):
return False
self._zobject = zipfile.ZipFile(self._filepath)
if not 'mimetype' in self._zobject.namelist():
return False
mtypefile = self._zobject.open('mimetype')
mimetype = mtypefile.readline()
# Some files seem to have trailing characters
if not mimetype.startswith('application/epub+zip'):
return False
return True
def get_toc_model(self):
'''
Returns a GtkTreeModel representation of the
Epub table of contents
'''
return self._navmap.get_gtktreestore()
def get_flattoc(self):
'''
Returns a flat (linear) list of files to be
rendered.
'''
return self._navmap.get_flattoc()
def get_basedir(self):
'''
Returns the base directory where the contents of the
epub has been unzipped
'''
return self._tempdir
def get_info(self):
'''
Returns a EpubInfo object title
'''
return self._info.title
def close(self):
'''
Cleans up (closes open zip files and deletes
uncompressed content of Epub.
Please call this when a file is being closed or during
application exit.
'''
self._zobject.close()
shutil.rmtree(self._tempdir)