team4alfanous team mailing list archive

Thread
Date

[Branch ~team4alfanous/alfanous/alfanous-git] Rev 244: add annotations to output system:

To: Alfanous team <team4alfanous@xxxxxxxxxxxxxxxxxxx>
From: noreply@xxxxxxxxxxxxx
Date: Sat, 09 Jun 2012 16:35:17 -0000
Reply-to: noreply@xxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

------------------------------------------------------------
revno: 244
git commit: c19eade8c6d426e0854464b21b2008cf70816abc
committer: Assem Chelli <assem.ch@xxxxxxxxx>
timestamp: Sat 2012-06-09 02:00:33 +0100
message:
  add annotations to output system:
   * annotations of each term of the search query ::  annotation_by_word
   * annotations of each word of each aya text in the results  :: 
  annotation_by_position
modified:
  src/alfanous/outputs.py


--
lp:~team4alfanous/alfanous/alfanous-git
https://code.launchpad.net/~team4alfanous/alfanous/alfanous-git

Your team Alfanous team is subscribed to branch lp:~team4alfanous/alfanous/alfanous-git.
To unsubscribe from this branch go to https://code.launchpad.net/~team4alfanous/alfanous/alfanous-git/+edit-subscription

=== modified file 'src/alfanous/outputs.py'
--- src/alfanous/outputs.py	2012-06-08 03:49:33 +0000
+++ src/alfanous/outputs.py	2012-06-09 01:00:33 +0000
@@ -49,8 +49,9 @@
 
 	DEFAULTS = {
 		    "maxrange":25,
-		    "results_limit":100,
-		    "flags":{ "action":"error",
+		    "results_limit":6236,
+		    "flags":{
+				  "action":"error",
 			      "ident":"undefined",
 			      "platform":"undefined",
 			      "domain":"undefined",
@@ -307,6 +308,7 @@
 		SE = self.FQSE if fuzzy else self.QSE
 		res, termz = SE.search_all( unicode( query.replace( "\\", "" ), 'utf8' ) , self._defaults["results_limit"], sortedby = sortedby )
 		terms = [term[1] for term in list( termz )] # TODO: I dont like this termz structure , must change it
+		terms_uthmani = map( STANDARD2UTHMANI, terms )
 		#pagination
 		offset = 1 if offset < 1 else offset;
 		range = self._defaults["maxrange"] if range > self._defaults["maxrange"] else range;
@@ -336,7 +338,6 @@
 		##########################################
 		extend_runtime = res.runtime
 		# Words & Annotations
-
 		words_output = {}
 		if word_info:
 			matches = 0
@@ -348,7 +349,7 @@
 					if term[2]:
 						matches += term[2]
 					docs += term[3]
-					annotation_word_query += u" OR normalised:%s " % STANDARD2UTHMANI( term[1] )
+					annotation_word_query += u" OR normalized:%s " % STANDARD2UTHMANI( term[1] )
 					words_output[ cpt ] = {"word":term[1], "nb_matches":term[2], "nb_ayas":term[3], "vocalizations": vocalization_dict[term[1]]}
 					cpt += 1
 			annotation_word_query += u" ) "
@@ -360,14 +361,15 @@
 			adja_query = trad_query = annotation_aya_query = u"( 0"
 
 			for r in reslist :
-				if prev_aya: adja_query += " OR gid:%s " % unicode( r["gid"] - 1 )
-				if next_aya: adja_query += " OR gid:%s " % unicode( r["gid"] + 1 )
-				if translation: trad_query += " OR gid:%s " % unicode( r["gid"] )
-				if annotation_aya: annotation_aya_query += " OR ( aya_id:%s  AND  sura_id:%d ) " % ( unicode( r["aya_id"] ), unicode( r["sura_id"] ) )
-
-			adja_query += " )"
-			trad_query += " )" + u" AND id:%s " % unicode( translation )
-			annotation_aya_query += " )"
+				if prev_aya: adja_query += u" OR gid:%s " % unicode( r["gid"] - 1 )
+				if next_aya: adja_query += u" OR gid:%s " % unicode( r["gid"] + 1 )
+				if translation: trad_query += u" OR gid:%s " % unicode( r["gid"] )
+				if annotation_aya: annotation_aya_query += u" OR  ( aya_id:%s AND  sura_id:%s ) " % ( unicode( r["aya_id"] ) , unicode( r["sura_id"] ) )
+
+			adja_query += u" )"
+			trad_query += u" )" + u" AND id:%s " % unicode( translation )
+			annotation_aya_query += u" )"
+
 
 		# Adjacents 
 		if prev_aya or next_aya:
@@ -387,14 +389,37 @@
 
 		#annotations for aya words
 		if annotation_aya or ( annotation_word and word_info ) :
-			annotation_word_query = annotation_word_query if annotation_word and word_info else "()"
-			annotation_aya_query = annotation_aya_query if annotation_aya else "()"
-			annotation_query = annotation_aya_query + " 0R " + annotation_word_query
-			#print annotation_query.encode("utf-8")
+			annotation_word_query = annotation_word_query if annotation_word and word_info else u"()"
+			annotation_aya_query = annotation_aya_query if annotation_aya else u"()"
+			annotation_query = annotation_aya_query + u" OR  " + annotation_word_query
+			#print annotation_query.encode( "utf-8" )
 			annot_res = self.WSE.find_extended( annotation_query, "gid" )
 			extend_runtime += annot_res.runtime
-			## TODO:prepare annotations for use 
+			## prepare annotations for use 
+			annotations_by_word = {}
+			annotations_by_position = {}
+			for annot in annot_res:
+				if ( annotation_word and word_info ) :
+					if annot["normalized"] in terms_uthmani:
+						if annotations_by_word.has_key( annot["normalized"] ):
+							annotations_by_word[annot["normalized"]][annot["word"]] = annot;
+						else:
+							annotations_by_word[annot["normalized"]] = { annot["word"]: annot}
+				if annotation_aya:
+					if annotations_by_position.has_key( ( annot["sura_id"], annot["aya_id"] ) ):
+						annotations_by_position[( annot["sura_id"], annot["aya_id"] )][annot["word_id"]] = annot
+					else:
+						annotations_by_position[( annot["sura_id"], annot["aya_id"] )] = { annot["word_id"]: annot }
 
+		## merge word annotations to word output
+		if ( annotation_word and word_info ):
+			for cpt in xrange( 1, len( termz ) + 1 ):
+				current_word = STANDARD2UTHMANI( output["words"][cpt]["word"] )
+				#print current_word.encode( "utf-8" ), "=>", annotations_by_word, "=>", list( annot_res )
+				if current_word in terms_uthmani:
+					current_word_annotations = annotations_by_word[ current_word ]
+					output["words"][cpt]["annotations"] = current_word_annotations
+					output["words"][cpt]["nb_annotations"] = len ( current_word_annotations )
 
 		output["runtime"] = extend_runtime
 		output["interval"] = {"start":start + 1, "end":end, "total":len( res )}
@@ -450,9 +475,6 @@
 
 		    		},
 
-
-
-
 		                "position": {} if not aya_position_info
 		                else {
 		                	"manzil":r["manzil"],
@@ -481,7 +503,10 @@
 		    				"exist":( r["sajda"] == u"نعم" ),
 		    				"type": r["sajda_type"]  if ( r["sajda"] == u"نعم" ) else None,
 		    				"id":N( r["sajda_id"] ) if ( r["sajda"] == u"نعم" ) else None,
-		    			}
+		    			},
+
+				"annotations": {} if not annotation_aya
+							else annotations_by_position[( r["sura_id"], r["aya_id"] )]
 		    		}
 
 		return {"search": output}
@@ -494,10 +519,6 @@
 		return json.dumps( self._do( flags ) )
 
 
-
-
-
-
 class Xml( Raw ):
 	""" XML output format