← Back to team overview

openlp-core team mailing list archive

[Merge] lp:~bastian-germann/openlp/pymupdf into lp:openlp

 

Bastian Germann has proposed merging lp:~bastian-germann/openlp/pymupdf into lp:openlp.

Commit message:
Add PyMuPDF as additional PDF controller and missing mupdf file formats

Requested reviews:
  Tomas Groth (tomasgroth)
  Phill (phill-ridout)

For more details, see:
https://code.launchpad.net/~bastian-germann/openlp/pymupdf/+merge/366835

PyMuPDF itself is GPLv3+ licenced. However, MuPDF is AGPLv3+ licenced. You can argue that calling the executable via subprocess does not make OpenLP a derivative work of MuPDF, however, using the library this argument does not hold. So if the new code is used, people running OpenLP with PyMuPDF have to comply with AGPLv3+. That means the source code of the actual running MuPDF has to be provided to remote users (if there are any). For the original MuPDF versions this is done by MuPDF's authors. If someone has changes to MuPDF, these changes would have to be provided via a network service to remote users.

People who have private changes to MuPDF will probably know about their obligations.
As a reminder I used agpl-pdf as a keyword for the dependency.
-- 
Your team OpenLP Core is subscribed to branch lp:openlp.
=== modified file 'openlp/plugins/presentations/lib/messagelistener.py'
--- openlp/plugins/presentations/lib/messagelistener.py	2019-04-13 13:00:22 +0000
+++ openlp/plugins/presentations/lib/messagelistener.py	2019-05-02 13:53:57 +0000
@@ -333,7 +333,7 @@
         # the conversion has already been done at this point.
         file_type = file_path.suffix.lower()[1:]
         if file_type in PDF_CONTROLLER_FILETYPES:
-            log.debug('Converting from pdf/xps/oxps to images for serviceitem with file {name}'.format(name=file_path))
+            log.debug('Converting from pdf/xps/oxps/epub/cbz/fb2 to images for serviceitem with file {name}'.format(name=file_path))
             # Create a copy of the original item, and then clear the original item so it can be filled with images
             item_cpy = copy.copy(item)
             item.__init__(None)

=== modified file 'openlp/plugins/presentations/lib/pdfcontroller.py'
--- openlp/plugins/presentations/lib/pdfcontroller.py	2019-04-13 13:00:22 +0000
+++ openlp/plugins/presentations/lib/pdfcontroller.py	2019-05-02 13:53:57 +0000
@@ -34,9 +34,15 @@
 if is_win():
     from subprocess import STARTUPINFO, STARTF_USESHOWWINDOW
 
+try:
+    import fitz
+    PYMUPDF_AVAILABLE = True
+except ImportError:
+    PYMUPDF_AVAILABLE = False
+
 log = logging.getLogger(__name__)
 
-PDF_CONTROLLER_FILETYPES = ['pdf', 'xps', 'oxps']
+PDF_CONTROLLER_FILETYPES = ['pdf', 'xps', 'oxps', 'epub', 'cbz', 'fb2']
 
 
 class PdfController(PresentationController):
@@ -121,6 +127,9 @@
                 self.mudrawbin = program_path
             elif program_type == 'mutool':
                 self.mutoolbin = program_path
+        elif PYMUPDF_AVAILABLE:
+            self.also_supports = ['xps', 'oxps', 'epub', 'cbz', 'fb2']
+            return True
         else:
             # Fallback to autodetection
             application_path = AppLocation.get_directory(AppLocation.AppDir)
@@ -147,12 +156,11 @@
                     elif (application_path / 'mutool').is_file():
                         self.mutoolbin = application_path / 'mutool'
         if self.mudrawbin or self.mutoolbin:
-            self.also_supports = ['xps', 'oxps']
+            self.also_supports = ['xps', 'oxps', 'epub', 'cbz', 'fb2']
             return True
         elif self.gsbin:
             return True
-        else:
-            return False
+        return False
 
     def kill(self):
         """
@@ -276,6 +284,16 @@
                                        '-r{res}'.format(res=resolution), '-dTextAlphaBits=4', '-dGraphicsAlphaBits=4',
                                        '-sOutputFile={output}'.format(output=temp_dir_path / 'mainslide%03d.png'),
                                        str(self.file_path)], startupinfo=self.startupinfo)
+            elif PYMUPDF_AVAILABLE:
+                log.debug('loading presentation using PyMuPDF')
+                pdf = fitz.open(str(self.file_path))
+                for i, page in enumerate(pdf, start=1):
+                    src_size = page.bound().round()
+                    # keep aspect ratio
+                    scale = min(size.width() / src_size.width, size.height() / src_size.height)
+                    m = fitz.Matrix(scale, scale)
+                    page.getPixmap(m, alpha=False).writeImage(str(temp_dir_path / 'mainslide{:03d}.png'.format(i)))
+                pdf.close()
             created_files = sorted(temp_dir_path.glob('*'))
             for image_path in created_files:
                 if image_path.is_file():

=== modified file 'scripts/appveyor.yml'
--- scripts/appveyor.yml	2019-04-02 00:05:46 +0000
+++ scripts/appveyor.yml	2019-05-02 13:53:57 +0000
@@ -16,11 +16,7 @@
 
 install:
   # Install dependencies from pypi
-  - "%PYTHON%\\python.exe -m pip install sqlalchemy alembic appdirs chardet beautifulsoup4 lxml Mako mysql-connector-python pytest mock pyodbc psycopg2 pypiwin32 websockets asyncio waitress six webob requests QtAwesome PyQt5 PyQtWebEngine pymediainfo"
-  # Download and unpack mupdf
-  - appveyor DownloadFile https://mupdf.com/downloads/archive/mupdf-1.14.0-windows.zip
-  - 7z x mupdf-1.14.0-windows.zip
-  - cp mupdf-1.14.0-windows/mutool.exe openlp-branch/mutool.exe
+  - "%PYTHON%\\python.exe -m pip install sqlalchemy alembic appdirs chardet beautifulsoup4 lxml Mako mysql-connector-python pytest mock pyodbc psycopg2 pypiwin32 websockets asyncio waitress six webob requests QtAwesome PyQt5 PyQtWebEngine pymediainfo PyMuPDF"
 
 build: off
 

=== modified file 'setup.py'
--- setup.py	2019-04-13 13:00:22 +0000
+++ setup.py	2019-05-02 13:53:57 +0000
@@ -187,6 +187,7 @@
         'websockets'
     ],
     extras_require={
+        'agpl-pdf': ['PyMuPDF'],
         'darkstyle': ['QDarkStyle'],
         'mysql': ['mysql-connector-python'],
         'odbc': ['pyodbc'],
@@ -200,6 +201,7 @@
     tests_require=[
         'nose2',
         'pylint',
+        'PyMuPDF',
         'pyodbc',
         'pysword',
         'python-xlib; platform_system=="Linux"'

=== modified file 'tests/functional/openlp_plugins/presentations/test_mediaitem.py'
--- tests/functional/openlp_plugins/presentations/test_mediaitem.py	2019-04-13 13:00:22 +0000
+++ tests/functional/openlp_plugins/presentations/test_mediaitem.py	2019-05-02 13:53:57 +0000
@@ -65,7 +65,7 @@
         pdf_controller = MagicMock()
         pdf_controller.enabled.return_value = True
         pdf_controller.supports = ['pdf']
-        pdf_controller.also_supports = ['xps', 'oxps']
+        pdf_controller.also_supports = ['xps', 'oxps', 'epub', 'cbz', 'fb2']
         # Mock the controllers.
         self.media_item.controllers = {
             'Impress': impress_controller,
@@ -85,6 +85,9 @@
         assert '*.pdf' in self.media_item.on_new_file_masks, 'The file mask should contain the pdf extension'
         assert '*.xps' in self.media_item.on_new_file_masks, 'The file mask should contain the xps extension'
         assert '*.oxps' in self.media_item.on_new_file_masks, 'The file mask should contain the oxps extension'
+        assert '*.epub' in self.media_item.on_new_file_masks, 'The file mask should contain the epub extension'
+        assert '*.cbz' in self.media_item.on_new_file_masks, 'The file mask should contain the cbz extension'
+        assert '*.fb2' in self.media_item.on_new_file_masks, 'The file mask should contain the fb2 extension'
 
     def test_clean_up_thumbnails(self):
         """

=== modified file 'tests/functional/openlp_plugins/presentations/test_pdfcontroller.py'
--- tests/functional/openlp_plugins/presentations/test_pdfcontroller.py	2019-04-13 13:00:22 +0000
+++ tests/functional/openlp_plugins/presentations/test_pdfcontroller.py	2019-05-02 13:53:57 +0000
@@ -23,6 +23,7 @@
 This module contains tests for the PdfController
 """
 import os
+from shutil import which
 from tempfile import mkdtemp
 from unittest import SkipTest, TestCase
 from unittest.mock import MagicMock, patch
@@ -39,7 +40,8 @@
 
 
 __default_settings__ = {
-    'presentations/enable_pdf_program': False,
+    'presentations/enable_pdf_program': True,
+    'presentations/pdf_program': None,
     'presentations/thumbnail_scheme': ''
 }
 
@@ -113,17 +115,16 @@
         # THEN: The name of the presentation controller should be correct
         assert 'Pdf' == controller.name, 'The name of the presentation controller should be correct'
 
-    def test_load_pdf(self):
+    def load_pdf(self, exe_path):
         """
-        Test loading of a Pdf using the PdfController
+        Test loading a Pdf using the PdfController
         """
         # GIVEN: A Pdf-file
         test_file_path = RESOURCE_PATH / 'presentations' / 'pdf_test1.pdf'
 
         # WHEN: The Pdf is loaded
+        Settings().setValue('presentations/pdf_program', exe_path)
         controller = PdfController(plugin=self.mock_plugin)
-        if not controller.check_available():
-            raise SkipTest('Could not detect mudraw or ghostscript, so skipping PDF test')
         controller.temp_folder = self.temp_folder_path
         controller.thumbnail_folder = self.thumbnail_folder_path
         document = PdfDocument(controller, test_file_path)
@@ -133,23 +134,22 @@
         assert loaded is True, 'The loading of the PDF should succeed.'
         assert 3 == document.get_slide_count(), 'The pagecount of the PDF should be 3.'
 
-    def test_load_pdf_pictures(self):
+    def load_pdf_pictures(self, exe_path):
         """
-        Test loading of a Pdf and check size of generate pictures
+        Test loading a Pdf and check the generated pictures' size
         """
         # GIVEN: A Pdf-file
         test_file_path = RESOURCE_PATH / 'presentations' / 'pdf_test1.pdf'
 
         # WHEN: The Pdf is loaded
+        Settings().setValue('presentations/pdf_program', exe_path)
         controller = PdfController(plugin=self.mock_plugin)
-        if not controller.check_available():
-            raise SkipTest('Could not detect mudraw or ghostscript, so skipping PDF test')
         controller.temp_folder = self.temp_folder_path
         controller.thumbnail_folder = self.thumbnail_folder_path
         document = PdfDocument(controller, test_file_path)
         loaded = document.load_presentation()
 
-        # THEN: The load should succeed and pictures should be created and have been scales to fit the screen
+        # THEN: The load should succeed and pictures should be created and have been scaled to fit the screen
         assert loaded is True, 'The loading of the PDF should succeed.'
         image = QtGui.QImage(os.path.join(str(self.temp_folder_path), 'pdf_test1.pdf', 'mainslide001.png'))
         # Based on the converter used the resolution will differ a bit
@@ -163,6 +163,19 @@
             assert image.height() == height, 'The height should be {height}'.format(height=height)
             assert image.width() == width, 'The width should be {width}'.format(width=width)
 
+    def test_load_pdf(self):
+        """
+        Test loading a Pdf with each of the installed backends
+        """
+        for exe_name in ['gs', 'mutool', 'mudraw']:
+            exe_path = which(exe_name)
+            if exe_path:
+                self.load_pdf(exe_path)
+                self.load_pdf_pictures(exe_path)
+        # PyMuPDF
+        self.load_pdf(None)
+        self.load_pdf_pictures(None)
+
     @patch('openlp.plugins.presentations.lib.pdfcontroller.check_binary_exists')
     def test_process_check_binary_mudraw(self, mocked_check_binary_exists):
         """


Follow ups