← Back to team overview

harvest-dev team mailing list archive

[Merge] lp:~dholbach/harvest/581732 into lp:harvest

 

Daniel Holbach has proposed merging lp:~dholbach/harvest/581732 into lp:harvest.

Requested reviews:
  harvest-dev (harvest-dev)

-- 
https://code.launchpad.net/~dholbach/harvest/581732/+merge/40949
Your team harvest-dev is requested to review the proposed merge of lp:~dholbach/harvest/581732 into lp:harvest.
=== modified file 'harvest/common/opportunity_lists.py'
--- harvest/common/opportunity_lists.py	2010-10-15 11:54:34 +0000
+++ harvest/common/opportunity_lists.py	2010-11-16 12:13:02 +0000
@@ -5,6 +5,37 @@
 import csv
 import os
 
+def read_csv(url, sock):
+    if url.endswith(".csv.gz"):
+        import StringIO
+        import gzip
+        data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).readlines()
+    else:
+        data = sock.readlines()
+    lines = filter(lambda a: a.strip()!="", data)
+    return [map(unicode, [a for a in l]) for l in csv.reader(lines)]
+
+def read_json(url, sock):
+    import json
+    if url.endswith(".json.gz"):
+        import StringIO
+        import gzip
+        data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).read()
+    else:
+        data = sock.read()
+    entries = json.loads(data)
+    return entries
+
+def convert_from_csv(entries):
+    # gitweb,http://launchpad.net/bugs/675294,675294
+    # {"source_package": "xine-lib", "short_description": 123456, "link": "https://launchpad.net/bugs/123456"}
+    data = []
+    for entry in entries:
+        data += [{"source_package": entry[0],
+                  "link": entry[1],
+                  "short_description": entry[2]}]
+    return data
+
 def read_entries(url, last_updated):
     import time
     import datetime
@@ -18,6 +49,7 @@
         return [None, None]
 
     datetime_lm = None
+    data = None
     if sock.info().has_key("Last-Modified") and sock.info()["Last-Modified"]:
         lm_string = sock.info()["Last-Modified"]
         if lm_string:
@@ -28,15 +60,12 @@
         sock.close()
         return [None,None]
 
-    if url.endswith(".csv.gz"):
-        import StringIO
-        import gzip
-        data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).readlines()
-    else:
-        data = sock.readlines()
-    lines = filter(lambda a: a.strip()!="", data)
+    if url.endswith(".csv") or url.endswith(".csv.gz"):
+        data = convert_from_csv(read_csv(url, sock))
+    if url.endswith(".json") or url.endswith(".json.gz"):
+        data = read_json(url, sock)
     sock.close()
-    return ([map(unicode, [a for a in l]) for l in csv.reader(lines)], datetime_lm)
+    return (data, datetime_lm)
 
 def unify_list_entry(entry):
     for e in entry:

=== modified file 'harvest/opportunities/management/commands/updatelists.py'
--- harvest/opportunities/management/commands/updatelists.py	2010-11-02 15:51:05 +0000
+++ harvest/opportunities/management/commands/updatelists.py	2010-11-16 12:13:02 +0000
@@ -41,12 +41,9 @@
 
     def chop_name(self, list_url):
         filename = os.path.basename(list_url)
-        if filename.endswith(".csv"):
-            return filename.split(".csv")[0]
-        if filename.endswith(".csv.gz"):
-            return filename.split(".csv.gz")[0]
-        if filename.endswith(".cgi"):
-            return filename.split(".cgi")[0]
+        for extension in [ ".csv", ".csv.gz", ".cgi", ".json", ".json.gz" ]:
+            if filename.endswith(extension):
+                return filename.split(extension)[0]
         return filename
 
     def handle_noargs(self, **options):

=== modified file 'harvest/opportunities/management/commands/updateopportunities.py'
--- harvest/opportunities/management/commands/updateopportunities.py	2010-10-11 14:45:14 +0000
+++ harvest/opportunities/management/commands/updateopportunities.py	2010-11-16 12:13:02 +0000
@@ -22,15 +22,15 @@
             if entries:
                 logger.debug("Has entries and was updated at %s." % new_date)
                 op_list.last_updated = new_date
-                for (sourcepackage, url, description) in filter(lambda a: len(a)==3, entries):
-                    sp, created = models.SourcePackage.objects.get_or_create(name=sourcepackage)
+                for entry in entries:
+                    sp, created = models.SourcePackage.objects.get_or_create(name=entry["source_package"])
                     try:
-                        opportunity = models.Opportunity.objects.get(description=description,
-                                                          url=url, sourcepackage=sp,
+                        opportunity = models.Opportunity.objects.get(description=entry["short_description"],
+                                                          url=entry["link"], sourcepackage=sp,
                                                           opportunitylist=op_list)
                     except models.Opportunity.DoesNotExist:
-                        opportunity = models.Opportunity(description=description,
-                                                         url=url, sourcepackage=sp,
+                        opportunity = models.Opportunity(description=entry["short_description"],
+                                                         url=entry["link"], sourcepackage=sp,
                                                          since=op_list.last_updated,
                                                          opportunitylist=op_list,
                                                          experience=op_list.experience)