← Back to team overview

team4alfanous team mailing list archive

[Branch ~team4alfanous/alfanous/alfanous-git] Rev 260: Strip Tatweel (ـ) in text analyzing - normalization phase.

 

------------------------------------------------------------
revno: 260
git commit: 88e68f3b7c4531ecc1bcdb7bfb33bc152ae7ceb2
committer: Assem Chelli <assem.ch@xxxxxxxxx>
timestamp: Wed 2012-06-13 13:49:23 +0100
message:
  Strip Tatweel (ـ) in text analyzing  - normalization phase. 
  this fixes the bug of searching with the keyword "اللـــه"
modified:
  src/alfanous/TextProcessing.py


--
lp:alfanous
https://code.launchpad.net/~team4alfanous/alfanous/alfanous-git

Your team Alfanous team is subscribed to branch lp:alfanous.
To unsubscribe from this branch go to https://code.launchpad.net/~team4alfanous/alfanous/alfanous-git/+edit-subscription
=== modified file 'src/alfanous/TextProcessing.py'
--- src/alfanous/TextProcessing.py	2012-06-02 09:55:06 +0000
+++ src/alfanous/TextProcessing.py	2012-06-13 12:49:23 +0000
@@ -31,7 +31,7 @@
 from alfanous.Support.whoosh.analysis import StopFilter, RegexTokenizer #LowercaseFilter, StandardAnalyzer,
 #from pyarabic.araby  import araby
 
-from alfanous.Support.ar_ctype import strip_tashkeel, normalize_spellerrors, normalize_hamza, normalize_lamalef  #, HARAKAT_pat, 
+from alfanous.Support.ar_ctype import strip_tashkeel, strip_tatweel, normalize_spellerrors, normalize_hamza, normalize_lamalef  #, HARAKAT_pat, 
 from alfanous.Support.arabic_const import FATHATAN, DAMMATAN, KASRATAN, FATHA, DAMMA, KASRA, SUKUN, SHADDA # *
 from alfanous.Constantes import INVERTEDSHAPING
 
@@ -72,6 +72,7 @@
         if self.__shaping:
                 text = normalize_lamalef( text )
                 text = unicode_.normalize_shaping( text )
+                text = strip_tatweel( text )
 
         if self.__tashkil:
                 text = strip_tashkeel( text )