← Back to team overview

zeitgeist team mailing list archive

[Merge] lp:~thekorn/zeitgeist/fix-586524-mimetypes-api into lp:zeitgeist

 

Markus Korn has proposed merging lp:~thekorn/zeitgeist/fix-586524-mimetypes-api into lp:zeitgeist.

Requested reviews:
  Zeitgeist Framework Team (zeitgeist)
Related bugs:
  #586524 Add mimetype and interpretation helper utils to public API
  https://bugs.launchpad.net/bugs/586524


This branch adds a zeitgeist.mimetypes module as a fix of bug 586524.
I tried to follow the logic of the existing code in libzeitgeist as much as possible, also the testcases do exactly the same checks.
See my proposed commit message for more information.

I added this RegExpr helper class to the module to make Michal's work easier to write a tool which generates the corresponding libzeitgeist code.
-- 
https://code.launchpad.net/~thekorn/zeitgeist/fix-586524-mimetypes-api/+merge/34397
Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/fix-586524-mimetypes-api into lp:zeitgeist.
=== added file 'test/mimetypes-test.py'
--- test/mimetypes-test.py	1970-01-01 00:00:00 +0000
+++ test/mimetypes-test.py	2010-09-02 10:41:42 +0000
@@ -0,0 +1,47 @@
+#!/usr/bin/python
+# -.- coding: utf-8 -.-
+
+# Update python path to use local zeitgeist module
+import sys
+import os
+import unittest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from zeitgeist.mimetypes import interpretation_for_mimetype, manifestation_for_uri
+from zeitgeist.datamodel import Interpretation, Manifestation
+
+
+class MimetypesTest(unittest.TestCase):
+    
+    def test_textplain(self):
+        self.assertEquals(
+            Interpretation.TEXT_DOCUMENT, interpretation_for_mimetype("text/plain")
+        )
+    
+    def test_mime_none(self):
+        self.assertEquals(None, interpretation_for_mimetype("boobarbaz"))
+    
+    def test_mime_regex(self):
+        self.assertEquals(
+            Interpretation.DOCUMENT,
+            interpretation_for_mimetype("application/x-applix-FOOOOBAR!")
+        )
+        self.assertEquals(
+            Interpretation.SPREADSHEET,
+            interpretation_for_mimetype("application/x-applix-spreadsheet")
+        )
+        
+class SchemeTest(unittest.TestCase):
+    
+    def test_scheme_file(self):
+        self.assertEquals(
+            Manifestation.FILE_DATA_OBJECT,
+            manifestation_for_uri("file:///tmp/foo.txt")
+        )
+        
+    def test_scheme_none(self):
+        self.assertEquals(None, manifestation_for_uri("boo:///tmp/foo.txt"))
+        
+	
+if __name__ == '__main__':
+	unittest.main()

=== modified file 'zeitgeist/Makefile.am'
--- zeitgeist/Makefile.am	2009-11-27 20:32:54 +0000
+++ zeitgeist/Makefile.am	2010-09-02 10:41:42 +0000
@@ -3,7 +3,8 @@
 app_PYTHON = \
 	__init__.py \
 	datamodel.py \
-	client.py
+	client.py \
+	mimetypes.py
 
 nodist_app_PYTHON = _config.py
 

=== added file 'zeitgeist/mimetypes.py'
--- zeitgeist/mimetypes.py	1970-01-01 00:00:00 +0000
+++ zeitgeist/mimetypes.py	2010-09-02 10:41:42 +0000
@@ -0,0 +1,200 @@
+# -.- coding: utf-8 -.-
+
+# Zeitgeist
+#
+# Copyright © 2010 Markus Korn <thekorn@xxxxxx>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import re
+
+from datamodel import Interpretation, Manifestation
+
+__all__ = [
+    "interpretation_for_mimetype",
+    "manifestation_for_uri",
+]
+
+class RegExpr(object):
+    """ Helper class which holds a compiled regular expression
+    and its pattern."""
+    
+    def __init__(self, pattern):
+        self.pattern = pattern
+        self.regex = re.compile(self.pattern)
+        
+    def __str__(self):
+        return self.pattern
+        
+    def __getattr__(self, name):
+        return getattr(self.regex, name)
+        
+        
+def make_regex_tuple(*items):
+    return tuple((RegExpr(k), v) for k, v in items)
+
+def interpretation_for_mimetype(mimetype):
+    """ get interpretation for a given mimetype, returns :const:`None`
+    if none of the predefined interpretations matches
+    """
+    interpretation = MIMES.get(mimetype, None)
+    if interpretation is not None:
+        return interpretation
+    for pattern, interpretation in MIMES_REGEX:
+        if pattern.match(mimetype):
+            return interpretation
+    return None
+    
+def manifestation_for_uri(uri):
+    """ Lookup Manifestation for a given uri based on the scheme part,
+    returns :const:`None` if no suitable manifestation is found
+    """
+    for scheme, manifestation in SCHEMES:
+        if uri.startswith(scheme):
+            return manifestation
+    return None
+    
+    
+MIMES = {
+    # x-applix-*
+    "application/x-applix-word": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/x-applix-spreadsheet": Interpretation.SPREADSHEET,
+    "application/x-applix-presents": Interpretation.PRESENTATION,
+    # x-kword, x-kspread, x-kpresenter, x-killustrator
+    "application/x-kword": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/x-kspread": Interpretation.SPREADSHEET,
+    "application/x-kpresenter": Interpretation.PRESENTATION,
+    "application/x-killustrator": Interpretation.VECTOR_IMAGE,
+    # MS
+    "application/ms-powerpoint": Interpretation.PRESENTATION,
+    "application/vnd.ms-powerpoint": Interpretation.PRESENTATION,
+    "application/msword": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/msexcel": Interpretation.SPREADSHEET,
+    "application/ms-excel": Interpretation.SPREADSHEET,
+    "application/vnd.ms-excel": Interpretation.SPREADSHEET,
+    # pdf, postscript et al
+    "application/pdf": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/postscript": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/ps": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/rtf": Interpretation.PAGINATED_TEXT_DOCUMENT,
+
+    # Gnome office
+    "application/x-abiword": Interpretation.PAGINATED_TEXT_DOCUMENT,
+    "application/x-gnucash": Interpretation.SPREADSHEET,
+    "application/x-gnumeric": Interpretation.SPREADSHEET,
+
+    # TeX stuff
+    "text/x-tex": Interpretation.SOURCE_CODE,
+    "text/x-latex": Interpretation.SOURCE_CODE,
+
+    # Plain text
+    "text/plain": Interpretation.TEXT_DOCUMENT,
+  
+    # HTML files on disk are always HTML_DOCUMENTS while online we should
+    # assume them to be WEBSITEs. By default we anticipate local files...
+    "text/html": Interpretation.HTML_DOCUMENT,
+
+    # Image types
+    "application/vnd.corel-draw": Interpretation.VECTOR_IMAGE,
+    "image/jpeg": Interpretation.RASTER_IMAGE,
+    "image/png": Interpretation.RASTER_IMAGE,
+    "image/tiff": Interpretation.RASTER_IMAGE,
+    "image/gif": Interpretation.RASTER_IMAGE,
+    "image/x-xcf": Interpretation.RASTER_IMAGE,
+    "image/svg+xml": Interpretation.VECTOR_IMAGE,
+    
+    # Audio
+    "application/ogg": Interpretation.AUDIO,
+    "audio/x-scpls": Interpretation.MEDIA_LIST,
+
+    # Development files
+    "application/ecmascript": Interpretation.SOURCE_CODE,
+    "application/javascript": Interpretation.SOURCE_CODE,
+    "application/x-csh": Interpretation.SOURCE_CODE,
+    "application/x-designer": Interpretation.SOURCE_CODE,
+    "application/x-desktop": Interpretation.SOURCE_CODE,
+    "application/x-dia-diagram": Interpretation.SOURCE_CODE,
+    "application/x-fluid": Interpretation.SOURCE_CODE,
+    "application/x-glade": Interpretation.SOURCE_CODE,
+    "application/xhtml+xml": Interpretation.SOURCE_CODE,
+    "application/x-java-archive": Interpretation.SOURCE_CODE,
+    "application/x-m4": Interpretation.SOURCE_CODE,
+    "application/xml": Interpretation.SOURCE_CODE,
+    "application/x-object": Interpretation.SOURCE_CODE,
+    "application/x-perl": Interpretation.SOURCE_CODE,
+    "application/x-php": Interpretation.SOURCE_CODE,
+    "application/x-ruby": Interpretation.SOURCE_CODE,
+    "application/x-shellscript": Interpretation.SOURCE_CODE,
+    "application/x-sql": Interpretation.SOURCE_CODE,
+    "text/css": Interpretation.SOURCE_CODE,
+    "text/x-c": Interpretation.SOURCE_CODE,
+    "text/x-c++": Interpretation.SOURCE_CODE,
+    "text/x-chdr": Interpretation.SOURCE_CODE,
+    "text/x-copying": Interpretation.SOURCE_CODE,
+    "text/x-credits": Interpretation.SOURCE_CODE,
+    "text/x-csharp": Interpretation.SOURCE_CODE,
+    "text/x-c++src": Interpretation.SOURCE_CODE,
+    "text/x-csrc": Interpretation.SOURCE_CODE,
+    "text/x-dsrc": Interpretation.SOURCE_CODE,
+    "text/x-eiffel": Interpretation.SOURCE_CODE,
+    "text/x-gettext-translation": Interpretation.SOURCE_CODE,
+    "text/x-gettext-translation-template": Interpretation.SOURCE_CODE,
+    "text/x-haskell": Interpretation.SOURCE_CODE,
+    "text/x-idl": Interpretation.SOURCE_CODE,
+    "text/x-java": Interpretation.SOURCE_CODE,
+    "text/x-lisp": Interpretation.SOURCE_CODE,
+    "text/x-lua": Interpretation.SOURCE_CODE,
+    "text/x-makefile": Interpretation.SOURCE_CODE,
+    "text/x-objcsrc": Interpretation.SOURCE_CODE,
+    "text/x-ocaml": Interpretation.SOURCE_CODE,
+    "text/x-pascal": Interpretation.SOURCE_CODE,
+    "text/x-patch": Interpretation.SOURCE_CODE,
+    "text/x-python": Interpretation.SOURCE_CODE,
+    "text/x-sql": Interpretation.SOURCE_CODE,
+    "text/x-tcl": Interpretation.SOURCE_CODE,
+    "text/x-troff": Interpretation.SOURCE_CODE,
+    "text/x-vala": Interpretation.SOURCE_CODE,
+    "text/x-vhdl": Interpretation.SOURCE_CODE,
+    "text/x-m4": Interpretation.SOURCE_CODE,
+}
+
+MIMES_REGEX = make_regex_tuple(
+    # Star Office and OO.org
+    ("application/vnd.oasis.opendocument.text.*", Interpretation.PAGINATED_TEXT_DOCUMENT),
+    ("application/vnd.oasis.opendocument.presentation.*", Interpretation.PRESENTATION),
+    ("application/vnd.oasis.opendocument.spreadsheet.*", Interpretation.SPREADSHEET),
+    ("application/vnd.oasis.opendocument.graphics.*", Interpretation.VECTOR_IMAGE),
+    ("application/vnd\\..*", Interpretation.DOCUMENT),
+    # x-applix-*
+    ("application/x-applix-.*", Interpretation.DOCUMENT),
+    # MS
+    ("application/vnd.ms-excel.*", Interpretation.SPREADSHEET),
+    ("application/vnd.ms-powerpoint.*", Interpretation.PRESENTATION),
+    # TeX stuff
+    (".*/x-dvi", Interpretation. PAGINATED_TEXT_DOCUMENT),
+    # Image types
+    ("image/.*", Interpretation.IMAGE),
+    # Audio
+    ("audio/.*", Interpretation.AUDIO),
+    # Video
+    ("video/.*", Interpretation.VIDEO),
+)
+
+SCHEMES = tuple((
+    ("file://", Manifestation.FILE_DATA_OBJECT),
+    ("http://";, Manifestation.FILE_DATA_OBJECT.REMOTE_DATA_OBJECT),
+    ("https://";, Manifestation.FILE_DATA_OBJECT.REMOTE_DATA_OBJECT),
+    ("ssh://", Manifestation.FILE_DATA_OBJECT.REMOTE_DATA_OBJECT),
+    ("sftp://";, Manifestation.FILE_DATA_OBJECT.REMOTE_DATA_OBJECT),
+))


Follow ups