harvest-dev team mailing list archive
-
harvest-dev team
-
Mailing list archive
-
Message #00485
[Merge] lp:~dholbach/harvest/581732 into lp:harvest
Daniel Holbach has proposed merging lp:~dholbach/harvest/581732 into lp:harvest.
Requested reviews:
harvest-dev (harvest-dev)
--
https://code.launchpad.net/~dholbach/harvest/581732/+merge/40949
Your team harvest-dev is requested to review the proposed merge of lp:~dholbach/harvest/581732 into lp:harvest.
=== modified file 'harvest/common/opportunity_lists.py'
--- harvest/common/opportunity_lists.py 2010-10-15 11:54:34 +0000
+++ harvest/common/opportunity_lists.py 2010-11-16 12:13:02 +0000
@@ -5,6 +5,37 @@
import csv
import os
+def read_csv(url, sock):
+ if url.endswith(".csv.gz"):
+ import StringIO
+ import gzip
+ data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).readlines()
+ else:
+ data = sock.readlines()
+ lines = filter(lambda a: a.strip()!="", data)
+ return [map(unicode, [a for a in l]) for l in csv.reader(lines)]
+
+def read_json(url, sock):
+ import json
+ if url.endswith(".json.gz"):
+ import StringIO
+ import gzip
+ data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).read()
+ else:
+ data = sock.read()
+ entries = json.loads(data)
+ return entries
+
+def convert_from_csv(entries):
+ # gitweb,http://launchpad.net/bugs/675294,675294
+ # {"source_package": "xine-lib", "short_description": 123456, "link": "https://launchpad.net/bugs/123456"}
+ data = []
+ for entry in entries:
+ data += [{"source_package": entry[0],
+ "link": entry[1],
+ "short_description": entry[2]}]
+ return data
+
def read_entries(url, last_updated):
import time
import datetime
@@ -18,6 +49,7 @@
return [None, None]
datetime_lm = None
+ data = None
if sock.info().has_key("Last-Modified") and sock.info()["Last-Modified"]:
lm_string = sock.info()["Last-Modified"]
if lm_string:
@@ -28,15 +60,12 @@
sock.close()
return [None,None]
- if url.endswith(".csv.gz"):
- import StringIO
- import gzip
- data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).readlines()
- else:
- data = sock.readlines()
- lines = filter(lambda a: a.strip()!="", data)
+ if url.endswith(".csv") or url.endswith(".csv.gz"):
+ data = convert_from_csv(read_csv(url, sock))
+ if url.endswith(".json") or url.endswith(".json.gz"):
+ data = read_json(url, sock)
sock.close()
- return ([map(unicode, [a for a in l]) for l in csv.reader(lines)], datetime_lm)
+ return (data, datetime_lm)
def unify_list_entry(entry):
for e in entry:
=== modified file 'harvest/opportunities/management/commands/updatelists.py'
--- harvest/opportunities/management/commands/updatelists.py 2010-11-02 15:51:05 +0000
+++ harvest/opportunities/management/commands/updatelists.py 2010-11-16 12:13:02 +0000
@@ -41,12 +41,9 @@
def chop_name(self, list_url):
filename = os.path.basename(list_url)
- if filename.endswith(".csv"):
- return filename.split(".csv")[0]
- if filename.endswith(".csv.gz"):
- return filename.split(".csv.gz")[0]
- if filename.endswith(".cgi"):
- return filename.split(".cgi")[0]
+ for extension in [ ".csv", ".csv.gz", ".cgi", ".json", ".json.gz" ]:
+ if filename.endswith(extension):
+ return filename.split(extension)[0]
return filename
def handle_noargs(self, **options):
=== modified file 'harvest/opportunities/management/commands/updateopportunities.py'
--- harvest/opportunities/management/commands/updateopportunities.py 2010-10-11 14:45:14 +0000
+++ harvest/opportunities/management/commands/updateopportunities.py 2010-11-16 12:13:02 +0000
@@ -22,15 +22,15 @@
if entries:
logger.debug("Has entries and was updated at %s." % new_date)
op_list.last_updated = new_date
- for (sourcepackage, url, description) in filter(lambda a: len(a)==3, entries):
- sp, created = models.SourcePackage.objects.get_or_create(name=sourcepackage)
+ for entry in entries:
+ sp, created = models.SourcePackage.objects.get_or_create(name=entry["source_package"])
try:
- opportunity = models.Opportunity.objects.get(description=description,
- url=url, sourcepackage=sp,
+ opportunity = models.Opportunity.objects.get(description=entry["short_description"],
+ url=entry["link"], sourcepackage=sp,
opportunitylist=op_list)
except models.Opportunity.DoesNotExist:
- opportunity = models.Opportunity(description=description,
- url=url, sourcepackage=sp,
+ opportunity = models.Opportunity(description=entry["short_description"],
+ url=entry["link"], sourcepackage=sp,
since=op_list.last_updated,
opportunitylist=op_list,
experience=op_list.experience)