Codebase list sugar-read-activity / debian/123-1 epubview / epub.py
debian/123-1

Tree @debian/123-1 (Download .tar.gz)

epub.py @debian/123-1raw · history · blame

# Copyright 2009 One Laptop Per Child
# Author: Sayamindu Dasgupta <sayamindu@laptop.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import zipfile
import tempfile
import os
import xml.etree.ElementTree as etree
import shutil
import logging

from . import navmap
from . import epubinfo


class _Epub(object):

    def __init__(self, _file):
        """
        _file: can be either a path to a file (a string) or a file-like object.
        """
        self._file = _file
        self._zobject = None
        self._opfpath = None
        self._ncxpath = None
        self._basepath = None
        self._tempdir = tempfile.mkdtemp()

        if not self._verify():
            print('Warning: This does not seem to be a valid epub file')

        self._get_opf()
        self._get_ncx()

        ncxfile = self._zobject.open(self._ncxpath)
        opffile = self._zobject.open(self._opfpath)
        self._navmap = navmap.NavMap(opffile, ncxfile, self._basepath)

        opffile = self._zobject.open(self._opfpath)
        self._info = epubinfo.EpubInfo(opffile)

        self._unzip()

    def _unzip(self):
        # This is broken upto python 2.7
        # self._zobject.extractall(path = self._tempdir)
        orig_cwd = os.getcwd()
        os.chdir(self._tempdir)
        for name in self._zobject.namelist():
            # Some weird zip file entries start with a slash,
            # and we don't want to write to the root directory
            try:
                if name.startswith(os.path.sep):
                    name = name[1:]
                if name.endswith(os.path.sep) or name.endswith('\\'):
                    os.makedirs(name)
            except:
                logging.error('ERROR unziping %s', name)
            else:
                self._zobject.extract(name)
        os.chdir(orig_cwd)

    def _get_opf(self):
        containerfile = self._zobject.open('META-INF/container.xml')

        tree = etree.parse(containerfile)
        root = tree.getroot()

        r_id = './/{urn:oasis:names:tc:opendocument:xmlns:container}rootfile'
        for element in root.iterfind(r_id):
            if element.get('media-type') == 'application/oebps-package+xml':
                self._opfpath = element.get('full-path')

        if self._opfpath.rpartition('/')[0]:
            self._basepath = self._opfpath.rpartition('/')[0] + '/'
        else:
            self._basepath = ''

        containerfile.close()

    def _get_ncx(self):
        opffile = self._zobject.open(self._opfpath)

        tree = etree.parse(opffile)
        root = tree.getroot()

        spine = root.find('.//{http://www.idpf.org/2007/opf}spine')
        tocid = spine.get('toc')

        for element in root.iterfind('.//{http://www.idpf.org/2007/opf}item'):
            if element.get('id') == tocid:
                self._ncxpath = self._basepath + element.get('href')

        opffile.close()

    def _verify(self):
        '''
        Method to crudely check to verify that what we
        are dealing with is a epub file or not
        '''
        if isinstance(self._file, str):
            if not os.path.exists(self._file):
                return False

        self._zobject = zipfile.ZipFile(self._file)

        if 'mimetype' not in self._zobject.namelist():
            return False

        mtypefile = self._zobject.open('mimetype')
        mimetype = mtypefile.readline()

        # Some files seem to have trailing characters
        if not mimetype.startswith(b'application/epub+zip'):
            return False

        return True

    def get_toc_model(self):
        '''
        Returns a GtkTreeModel representation of the
        Epub table of contents
        '''
        return self._navmap.get_gtktreestore()

    def get_flattoc(self):
        '''
        Returns a flat (linear) list of files to be
        rendered.
        '''
        return self._navmap.get_flattoc()

    def get_basedir(self):
        '''
        Returns the base directory where the contents of the
        epub has been unzipped
        '''
        return self._tempdir

    def get_info(self):
        '''
        Returns a EpubInfo object title
        '''
        return self._info.title

    def write(self, file_path):
        '''Create the ZIP archive.
        The mimetype must be the first file in the archive
        and it must not be compressed.'''

        # The EPUB must contain the META-INF and mimetype files at the root, so
        # we'll create the archive in the working directory first
        # and move it later
        current_dir = os.getcwd()
        os.chdir(self._tempdir)

        # Open a new zipfile for writing
        epub = zipfile.ZipFile(file_path, 'w')

        # Add the mimetype file first and set it to be uncompressed
        epub.write('mimetype', compress_type=zipfile.ZIP_STORED)

        # For the remaining paths in the EPUB, add all of their files
        # using normal ZIP compression
        self._scan_dir('.', epub)

        epub.close()
        os.chdir(current_dir)

    def _scan_dir(self, path, epub_file):
        for p in os.listdir(path):
            logging.error('add file %s', p)
            if os.path.isdir(os.path.join(path, p)):
                self._scan_dir(os.path.join(path, p), epub_file)
            else:
                if p != 'mimetype':
                    epub_file.write(
                        os.path.join(path, p),
                        compress_type=zipfile.ZIP_DEFLATED)

    def close(self):
        '''
        Cleans up (closes open zip files and deletes
        uncompressed content of Epub.
        Please call this when a file is being closed or during
        application exit.
        '''
        self._zobject.close()
        shutil.rmtree(self._tempdir)