team4alfanous team mailing list archive
-
team4alfanous team
-
Mailing list archive
-
Message #00065
[Branch ~team4alfanous/alfanous/alfanous-git] Rev 260: Strip Tatweel (ـ) in text analyzing - normalization phase.
------------------------------------------------------------
revno: 260
git commit: 88e68f3b7c4531ecc1bcdb7bfb33bc152ae7ceb2
committer: Assem Chelli <assem.ch@xxxxxxxxx>
timestamp: Wed 2012-06-13 13:49:23 +0100
message:
Strip Tatweel (ـ) in text analyzing - normalization phase.
this fixes the bug of searching with the keyword "اللـــه"
modified:
src/alfanous/TextProcessing.py
--
lp:alfanous
https://code.launchpad.net/~team4alfanous/alfanous/alfanous-git
Your team Alfanous team is subscribed to branch lp:alfanous.
To unsubscribe from this branch go to https://code.launchpad.net/~team4alfanous/alfanous/alfanous-git/+edit-subscription
=== modified file 'src/alfanous/TextProcessing.py'
--- src/alfanous/TextProcessing.py 2012-06-02 09:55:06 +0000
+++ src/alfanous/TextProcessing.py 2012-06-13 12:49:23 +0000
@@ -31,7 +31,7 @@
from alfanous.Support.whoosh.analysis import StopFilter, RegexTokenizer #LowercaseFilter, StandardAnalyzer,
#from pyarabic.araby import araby
-from alfanous.Support.ar_ctype import strip_tashkeel, normalize_spellerrors, normalize_hamza, normalize_lamalef #, HARAKAT_pat,
+from alfanous.Support.ar_ctype import strip_tashkeel, strip_tatweel, normalize_spellerrors, normalize_hamza, normalize_lamalef #, HARAKAT_pat,
from alfanous.Support.arabic_const import FATHATAN, DAMMATAN, KASRATAN, FATHA, DAMMA, KASRA, SUKUN, SHADDA # *
from alfanous.Constantes import INVERTEDSHAPING
@@ -72,6 +72,7 @@
if self.__shaping:
text = normalize_lamalef( text )
text = unicode_.normalize_shaping( text )
+ text = strip_tatweel( text )
if self.__tashkil:
text = strip_tashkeel( text )