launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #09115
[Merge] lp:~adeuring/launchpad/revert-r-15464 into lp:launchpad
Abel Deuring has proposed merging lp:~adeuring/launchpad/revert-r-15464 into lp:launchpad.
Requested reviews:
Abel Deuring (adeuring)
For more details, see:
https://code.launchpad.net/~adeuring/launchpad/revert-r-15464/+merge/111636
revert r15464: should have landed in db-devel
--
https://code.launchpad.net/~adeuring/launchpad/revert-r-15464/+merge/111636
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== removed file 'database/schema/patch-2209-24-1.sql'
--- database/schema/patch-2209-24-1.sql 2012-06-20 11:53:24 +0000
+++ database/schema/patch-2209-24-1.sql 1970-01-01 00:00:00 +0000
@@ -1,125 +0,0 @@
--- Copyright 2012 Canonical Ltd. This software is licensed under the
--- GNU Affero General Public License version 3 (see the file LICENSE).
-
-SET client_min_messages=ERROR;
-
-CREATE OR REPLACE FUNCTION _ftq(text) RETURNS text
- LANGUAGE plpythonu IMMUTABLE STRICT
- AS $_$
- import re
-
- # I think this method would be more robust if we used a real
- # tokenizer and parser to generate the query string, but we need
- # something suitable for use as a stored procedure which currently
- # means no external dependancies.
-
- # Convert to Unicode
- query = args[0].decode('utf8')
- ## plpy.debug('1 query is %s' % repr(query))
-
- # Normalize whitespace
- query = re.sub("(?u)\s+"," ", query)
-
- # Convert AND, OR, NOT and - to tsearch2 punctuation
- query = re.sub(r"(?u)(?:^|\s)-([\w\(])", r" !\1", query)
- query = re.sub(r"(?u)\bAND\b", "&", query)
- query = re.sub(r"(?u)\bOR\b", "|", query)
- query = re.sub(r"(?u)\bNOT\b", " !", query)
- ## plpy.debug('2 query is %s' % repr(query))
-
- # Deal with unwanted punctuation.
- # ':' is used in queries to specify a weight of a word.
- # '\' is treated differently in to_tsvector() and to_tsquery().
- punctuation = r'[:\\]'
- query = re.sub(r"(?u)%s+" % (punctuation,), " ", query)
- ## plpy.debug('3 query is %s' % repr(query))
-
- # Strip ! characters inside and at the end of a word
- query = re.sub(r"(?u)(?<=\w)[\!]+", " ", query)
-
- # Now that we have handle case sensitive booleans, convert to lowercase
- query = query.lower()
-
- # Remove unpartnered bracket on the left and right
- query = re.sub(r"(?ux) ^ ( [^(]* ) \)", r"(\1)", query)
- query = re.sub(r"(?ux) \( ( [^)]* ) $", r"(\1)", query)
-
- # Remove spurious brackets
- query = re.sub(r"(?u)\(([^\&\|]*?)\)", r" \1 ", query)
- ## plpy.debug('5 query is %s' % repr(query))
-
- # Insert & between tokens without an existing boolean operator
- # ( not proceeded by (|&!
- query = re.sub(r"(?u)(?<![\(\|\&\!])\s*\(", "&(", query)
- ## plpy.debug('6 query is %s' % repr(query))
- # ) not followed by )|&
- query = re.sub(r"(?u)\)(?!\s*(\)|\||\&|\s*$))", ")&", query)
- ## plpy.debug('6.1 query is %s' % repr(query))
- # Whitespace not proceded by (|&! not followed by &|
- query = re.sub(r"(?u)(?<![\(\|\&\!\s])\s+(?![\&\|\s])", "&", query)
- ## plpy.debug('7 query is %s' % repr(query))
-
- # Detect and repair syntax errors - we are lenient because
- # this input is generally from users.
-
- # Fix unbalanced brackets
- openings = query.count("(")
- closings = query.count(")")
- if openings > closings:
- query = query + " ) "*(openings-closings)
- elif closings > openings:
- query = " ( "*(closings-openings) + query
- ## plpy.debug('8 query is %s' % repr(query))
-
- # Strip ' character that do not have letters on both sides
- query = re.sub(r"(?u)((?<!\w)'|'(?!\w))", "", query)
-
- # Brackets containing nothing but whitespace and booleans, recursive
- last = ""
- while last != query:
- last = query
- query = re.sub(r"(?u)\([\s\&\|\!]*\)", "", query)
- ## plpy.debug('9 query is %s' % repr(query))
-
- # An & or | following a (
- query = re.sub(r"(?u)(?<=\()[\&\|\s]+", "", query)
- ## plpy.debug('10 query is %s' % repr(query))
-
- # An &, | or ! immediatly before a )
- query = re.sub(r"(?u)[\&\|\!\s]*[\&\|\!]+\s*(?=\))", "", query)
- ## plpy.debug('11 query is %s' % repr(query))
-
- # An &,| or ! followed by another boolean.
- query = re.sub(r"(?ux) \s* ( [\&\|\!] ) [\s\&\|]+", r"\1", query)
- ## plpy.debug('12 query is %s' % repr(query))
-
- # Leading & or |
- query = re.sub(r"(?u)^[\s\&\|]+", "", query)
- ## plpy.debug('13 query is %s' % repr(query))
-
- # Trailing &, | or !
- query = re.sub(r"(?u)[\&\|\!\s]+$", "", query)
- ## plpy.debug('14 query is %s' % repr(query))
-
- # If we have nothing but whitespace and tsearch2 operators,
- # return NULL.
- if re.search(r"(?u)^[\&\|\!\s\(\)]*$", query) is not None:
- return None
-
- # Convert back to UTF-8
- query = query.encode('utf8')
- ## plpy.debug('15 query is %s' % repr(query))
-
- return query or None
- $_$;
-
-CREATE OR REPLACE FUNCTION ftq(text) RETURNS pg_catalog.tsquery
- LANGUAGE plpythonu IMMUTABLE STRICT
- AS $_$
- p = plpy.prepare(
- "SELECT to_tsquery('default', _ftq($1)) AS x", ["text"])
- query = plpy.execute(p, args, 1)[0]["x"]
- return query or None
- $_$;
-
-INSERT INTO LaunchpadDatabaseRevision VALUES (2209, 24, 1);
=== modified file 'lib/lp/answers/stories/question-browse-and-search.txt'
--- lib/lp/answers/stories/question-browse-and-search.txt 2012-06-21 08:33:10 +0000
+++ lib/lp/answers/stories/question-browse-and-search.txt 2012-06-22 16:21:22 +0000
@@ -311,7 +311,7 @@
dealing with plugins problems, he always start by a search for such
problems:
- >>> browser.getControl(name='field.search_text').value = 'plugin'
+ >>> browser.getControl(name='field.search_text').value = 'plug-in'
>>> browser.getControl('Search', index=0).click()
>>> questions = find_tag_by_id(browser.contents, 'question-listing')
>>> for question in questions.fetch('td', 'questionTITLE'):
=== modified file 'lib/lp/registry/doc/vocabularies.txt'
--- lib/lp/registry/doc/vocabularies.txt 2012-06-21 08:33:10 +0000
+++ lib/lp/registry/doc/vocabularies.txt 2012-06-22 16:21:22 +0000
@@ -731,7 +731,8 @@
>>> [(p.name, getattr(p.teamowner, 'name', None))
... for p in vocab.search('ubuntu-team')]
- [(u'ubuntu-team', u'mark')]
+ [(u'doc', None), (u'name18', u'mark'),
+ (u'ubuntu-security', u'kamion'), (u'ubuntu-team', u'mark')]
But it doesn't include merged accounts:
=== modified file 'lib/lp/services/database/doc/textsearching.txt'
--- lib/lp/services/database/doc/textsearching.txt 2012-06-20 11:41:04 +0000
+++ lib/lp/services/database/doc/textsearching.txt 2012-06-22 16:21:22 +0000
@@ -138,22 +138,7 @@
... compiled = compiled.decode('UTF-8')
... compiled = compiled.encode('US-ASCII', 'backslashreplace')
... print '%s <=> %s' % (uncompiled, compiled)
- >>>
- >>> def search(text_to_search, search_phrase):
- ... cur = cursor()
- ... cur.execute("SELECT to_tsvector(%s)", (text_to_search, ))
- ... ts_vector = cur.fetchall()[0][0]
- ... cur.execute("SELECT ftq(%s)", (search_phrase, ))
- ... ts_query = cur.fetchall()[0][0]
- ... cur.execute(
- ... "SELECT to_tsvector(%s) @@ ftq(%s)",
- ... (text_to_search, search_phrase))
- ... match = cur.fetchall()[0][0]
- ... return "FTI data: %s query: %s match: %s" % (
- ... ts_vector, ts_query, str(match))
- >>>
- >>> def search_same(text):
- ... return search(text, text)
+
Queries are lowercased
@@ -240,178 +225,127 @@
(hi&ho|hoe)&work&go <=> ( 'hi' & 'ho' | 'hoe' ) & 'work' & 'go'
-If a single '-' precedes a word, it is converted into the '!' operator.
-Note also that a trailing '-' is dropped by to_tsquery().
+Hypenation is handled specially. Note that the & operator has precidence
+over the | operator and that tsearch2 removes the unnecessary branckets.
+
+ >>> ftq('foo-bar')
+ ((foo&bar)|foobar) <=> 'foo' & 'bar' | 'foobar'
+
+ >>> ftq('foo-bar-baz')
+ ((foo&bar&baz)|foobarbaz) <=> 'foo' & 'bar' & 'baz' | 'foobarbaz'
+
+ >>> ftq('foo & bar-baz')
+ foo&((bar&baz)|barbaz) <=> 'foo' & ( 'bar' & 'baz' | 'barbaz' )
>>> ftq('-foo bar-')
- !foo&bar- <=> !'foo' & 'bar'
-
-Repeated '-' are simply ignored by to_tsquery().
+ !foo&bar <=> !'foo' & 'bar'
>>> ftq('---foo--- ---bar---')
- ---foo---&---bar--- <=> 'foo' & 'bar'
-
-Hyphens surrounded by two words are retained. This reflects the way
-how to_tsquery() and to_tsvector() handle such strings.
-
- >>> print search_same('foo-bar')
- FTI data: 'bar':3 'foo':2 'foo-bar':1
- query: 'foo-bar' & 'foo' & 'bar'
- match: True
-
-
-Punctuation is handled consistently. If a string containing punctuation
-appears in an FTI, it can also be passed to ftq(),and a search for this
-string finds the indexed text.
-
- >>> punctuation = '\'"#$%*+,./:;<=>?@[\]^`{}~'
- >>> for symbol in punctuation:
- ... print repr(symbol), search_same('foo%sbar' % symbol)
- "'" FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '"' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '#' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '$' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '%' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '*' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '+' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- ',' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '.' FTI data: 'foo.bar':1 query: 'foo.bar' match: True
- '/' FTI data: 'foo/bar':1 query: 'foo/bar' match: True
- ':' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- ';' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '<' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '=' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '>' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '?' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '@' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '[' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '\\' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- ']' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '^' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '`' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '{' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '}' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
- '~' FTI data: 'foo':1 '~bar':2 query: 'foo' & '~bar' match: True
-
- >>> for symbol in punctuation:
- ... print repr(symbol), search_same('aa %sbb%s cc' % (symbol, symbol))
- "'" FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '"' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '#' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '$' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '%' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '*' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '+' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- ',' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '.' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '/' FTI data: '/bb':2 'aa':1 'cc':3 query: 'aa' & '/bb' & 'cc' match: True
- ':' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- ';' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '<' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '=' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '>' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '?' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '@' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '[' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '\\' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- ']' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '^' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '`' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '{' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '}' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
- '~' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & '~bb' & 'cc' match: False
-
-XXX Abel Deuring 2012-06-20 bug=1015511: Note that the last line above
-shows a bug: The FTI data for the string "aa ~bb~ cc" contains the words
-'aa', 'bb', 'cc', while the ts_query object for the same text contains
-'aa', '~bb', 'cc', hence the query does not match the string. More details_
-
-XXX Abel Deuring 2012-06-20 bug=1015519: XML tags cannot be searched.
-
- >>> print search_same('foo <bar> baz')
- FTI data: 'baz':2 'foo':1 query: 'foo' & 'baz' match: True
-
-More specifically, tags are simply dropped from the FTI data and from
-search queries.
-
- >>> print search('some text <div>whatever</div>', '<div>')
- FTI data: 'text':2 'whatev':3 query: None match: None
-
-Of course, omitting '<' and '>'from the query does not help.
-
- >>> print search('some text <div>whatever</div>', 'div')
- FTI data: 'text':2 'whatev':3 query: 'div' match: False
-
-Treatment of characters that are used as operators in to_tsquery():
+ foo&bar <=> 'foo' & 'bar'
+
+ >>> ftq('foo-bar test')
+ ((foo&bar)|foobar)&test <=> ( 'foo' & 'bar' | 'foobar' ) & 'test'
+
+ >>> ftq('foo-bar OR test')
+ ((foo&bar)|foobar)|test <=> ( 'foo' & 'bar' | 'foobar' ) | 'test'
+
+
+Most punctuation characters are converted to whitespace outside of
+words, or treated as a hypen inside words. The exceptions are the
+operators ()!&|!.
+
+ >>> ftq(':100%')
+ 100 <=> '100'
+
+ >>> ftq(r'foo\bar')
+ ((foo&bar)|foobar) <=> 'foo' & 'bar' | 'foobar'
+
+ >>> ftq('/dev/pmu')
+ ((dev&pmu)|devpmu) <=> 'dev' & 'pmu' | 'devpmu'
>>> ftq('cool!')
cool <=> 'cool'
-Email addresses are retained as a whole, both by to_tsvector() and by
-ftq().
-
- >>> print search_same('foo@xxxxxxx')
- FTI data: 'foo@xxxxxxx':1 query: 'foo@xxxxxxx' match: True
-
-File names are retained as a whole.
-
- >>> print search_same('foo-bar.txt')
- FTI data: 'foo-bar.txt':1 query: 'foo-bar.txt' match: True
+ >>> ftq('foo@xxxxxxx')
+ ((foo&bar&com)|foobarcom) <=> 'foo' & 'bar' & 'com' | 'foobarcom'
+
Some punctuation we pass through to tsearch2 for it to handle.
-NB. This gets stemmed, see below.
-
- >>> print search_same("shouldn't")
- FTI data: 'shouldn':1 query: 'shouldn' match: True
-
-Bug #44913 - Unicode characters in the wrong place.
-
- >>> search_same(u'abc-a\N{LATIN SMALL LETTER C WITH CEDILLA}')
- "FTI data: 'abc':2 'abc-a\xc3\xa7':1 'a\xc3\xa7':3
- query: 'abc-a\xc3\xa7' & 'abc' & 'a\xc3\xa7'
- match: True"
-
-Cut & Paste of 'Smart' quotes. Note that the quotation mark is retained
-in the FTI.
-
- >>> print search_same(u'a-a\N{RIGHT DOUBLE QUOTATION MARK}')
- FTI data: 'a-a”':1 'a”':3 query: 'a-a”' & 'a”' match: True
-
- >>> print search_same(
- ... u'\N{LEFT SINGLE QUOTATION MARK}a.a'
- ... u'\N{RIGHT SINGLE QUOTATION MARK}')
- FTI data: 'a’':2 '‘a':1 query: '‘a' & 'a’' match: True
+
+ >>> ftq("shouldn't") # NB. This gets stemmed, see below
+ shouldn't <=> 'shouldn'
+
+It was noticed though in Bug #33920 that tsearch2 couldn't cope if the
+apostrophe was not inside a word. So we strip it in these cases.
+
+ >>> ftq("'cool")
+ cool <=> 'cool'
+ >>> ftq("'shouldn't")
+ shouldn't <=> 'shouldn'
+ >>> ftq("' cool")
+ cool <=> 'cool'
+ >>> ftq("cool '")
+ cool <=> 'cool'
+ >>> ftq("' cool '")
+ cool <=> 'cool'
+ >>> ftq("'cool'")
+ cool <=> 'cool'
+ >>> ftq("('cool' AND bananas)")
+ (cool&bananas) <=> 'cool' & 'banana'
+
+It was also noticed through Bug #39828 that tsearch2 will not cope if the
+! character is embedded inside or found at the end of a word.
+
+ >>> ftq('cool!')
+ cool <=> 'cool'
+ >>> ftq('hi!mom')
+ hi&mom <=> 'hi' & 'mom'
+ >>> ftq('hi!!!!!mom')
+ hi&mom <=> 'hi' & 'mom'
+ >>> ftq('hi !mom')
+ hi&!mom <=> 'hi' & !'mom'
+
+
+Bug #44913 - Unicode characters in the wrong place
+
+ >>> ftq(u'a-a\N{LATIN SMALL LETTER C WITH CEDILLA}')
+ ((a&a\xe7)|aa\xe7) <=> 'a\xe7' | 'aa\xe7'
+
+ Cut & Paste of 'Smart' quotes
+
+ >>> ftq(u'a-a\N{RIGHT DOUBLE QUOTATION MARK}')
+ ((a&a)|aa) <=> 'aa'
+
+ >>> ftq(u'\N{LEFT SINGLE QUOTATION MARK}a.a\N{RIGHT SINGLE QUOTATION MARK}')
+ ((a&a)|aa) <=> 'aa'
Bug #44913 - Nothing but stopwords in a query needing repair
- >>> print search_same('a)a')
- FTI data: query: None match: None
+ >>> ftq('a)a')
+ a&a <=> None
Stop words (words deemed too common in English to search on) are removed
from queries by tsearch2.
- >>> print search_same("Don't do it harder!")
- FTI data: 'harder':5 query: 'harder' match: True
+ >>> ftq("Don't do it harder!")
+ don't&do&it&harder <=> 'harder'
Note that some queries will return None after compilation, because they
contained nothing but stop words or punctuation.
- >>> print search_same("don't do it!")
- FTI data: query: None match: None
+ >>> ftq("don't do it!")
+ don't&do&it <=> None
- >>> print search_same(",,,")
- FTI data: query: None match: None
+ >>> ftq(",,,")
+ None <=> None
Queries containing nothing except whitespace, boolean operators and
punctuation will just return None.
-Note in the fourth example below that the '-' left in the query by _ftq()
-is ignored by to_tsquery().
-
>>> ftq(" ")
None <=> None
>>> ftq("AND")
@@ -419,7 +353,7 @@
>>> ftq(" AND (!)")
None <=> None
>>> ftq("-")
- - <=> None
+ None <=> None
Words are also stemmed by tsearch2 (using the English stemmer).
@@ -447,7 +381,7 @@
(hi|!hello)&mom <=> ( 'hi' | !'hello' ) & 'mom'
>>> ftq('(hi OR - AND hello) AND mom')
- (hi|-&hello)&mom <=> ( 'hi' | 'hello' ) & 'mom'
+ (hi|hello)&mom <=> ( 'hi' | 'hello' ) & 'mom'
>>> ftq('hi AND mom AND')
hi&mom <=> 'hi' & 'mom'
@@ -459,7 +393,7 @@
(hi|hello)&mom <=> ( 'hi' | 'hello' ) & 'mom'
>>> ftq('() hi mom ( ) ((! |((&)))) :-)')
- (hi&mom&-) <=> 'hi' & 'mom'
+ (hi&mom) <=> 'hi' & 'mom'
>>> ftq("(hi mom")
hi&mom <=> 'hi' & 'mom'
@@ -480,15 +414,15 @@
hi&mom <=> 'hi' & 'mom'
>>> ftq("(foo .") # Bug 43245
- foo&. <=> 'foo'
+ foo <=> 'foo'
>>> ftq("(foo.")
- foo. <=> 'foo'
+ foo <=> 'foo'
Bug #54972
>>> ftq("a[a\n[a")
- a[a&[a <=> None
+ ((a&a)|aa)&a <=> 'aa'
Bug #96698
@@ -503,10 +437,10 @@
Bug #160236
>>> ftq("foo&&bar-baz")
- foo&bar-baz <=> 'foo' & 'bar-baz' & 'bar' & 'baz'
+ foo&((bar&baz)|barbaz) <=> 'foo' & ( 'bar' & 'baz' | 'barbaz' )
>>> ftq("foo||bar.baz")
- foo|bar.baz <=> 'foo' | 'bar.baz'
+ foo|((bar&baz)|barbaz) <=> 'foo' | ( 'bar' & 'baz' | 'barbaz' )
Phrase Searching
@@ -548,8 +482,7 @@
>>> runsql(r"""
... SELECT title, max(ranking) FROM (
- ... SELECT Bug.title,rank(Bug.fti||Message.fti,ftq('firefox'))
- ... AS ranking
+ ... SELECT Bug.title,rank(Bug.fti||Message.fti,ftq('firefox')) AS ranking
... FROM Bug, BugMessage, Message
... WHERE Bug.id = BugMessage.bug AND Message.id = BugMessage.message
... AND (Bug.fti @@ ftq('firefox') OR Message.fti @@ ftq('firefox'))
@@ -566,8 +499,7 @@
... AND BugTask.product = Product.id
... AND Product.name LIKE lower('%firefox%')
... UNION
- ... SELECT Bug.title, rank(Product.fti, ftq('firefox')) - 0.3
- ... AS ranking
+ ... SELECT Bug.title, rank(Product.fti, ftq('firefox')) - 0.3 AS ranking
... FROM Bug, BugTask, Product
... WHERE Bug.id = BugTask.bug
... AND BugTask.product = Product.id
@@ -586,8 +518,7 @@
Printing doesn't work 0.70
-Natural Language Phrase Query
------------------------------
+== Natural Language Phrase Query ==
The standard boolean searches of tsearch2 are fine, but sometime you
want more fuzzy searches.
@@ -626,8 +557,7 @@
on Ubuntu) - so we are disabling this and reworking from the ground up.
-nl_term_candidates()
-~~~~~~~~~~~~~~~~~~~~
+=== nl_term_candidates() ===
To find the terms in a search phrase that are canditates for the search,
we can use the nl_term_candidates() function. This function uses ftq()
@@ -644,16 +574,19 @@
>>> nl_term_candidates('how do I do this?')
[]
+We also handle expansion of hypenated words (like ftq does):
+
+ >>> nl_term_candidates('firefox foo-bar give me trouble')
+ [u'firefox', u'foo', u'bar', u'foobar', u'give', u'troubl']
+
Except for the hyphenation character, all non-word caracters are ignored:
>>> nl_term_candidates(
- ... "Will the \'\'|\'\' character (inside a ''quoted'' string) "
- ... "work???")
+ ... "Will the \'\'|\'\' character (inside a ''quoted'' string) work???")
[u'charact', u'insid', u'quot', u'string', u'work']
-nl_phrase_search()
-~~~~~~~~~~~~~~~~~~
+=== nl_phrase_search() ===
To get the actual tsearch2 query that should be run, you will use the
nl_phrase_search() function. This one takes two mandatory parameters and
@@ -704,8 +637,7 @@
u'slow|system'
-Using other constraints
-.......................
+==== Using other constraints ====
You can pass a third parameter to the function that will be use as
an additional constraint to determine the total number of rows that
@@ -727,8 +659,7 @@
>>> nl_phrase_search(
... 'firefox gets very slow on flickr', Question,
- ... "Question.product = %s AND Product.active = 't'"
- ... % firefox_product.id,
+ ... "Question.product = %s AND Product.active = 't'" % firefox_product.id,
... ['Product'], fast_enabled=False)
u'slow|flickr'
@@ -748,8 +679,7 @@
u'(firefox&flickr&slow)|(flickr&slow)|(firefox&slow)|(firefox&flickr)'
-No keywords filtering with few rows
-...................................
+==== No keywords filtering with few rows ====
The 50% rule is really useful only when there are many rows. When there
only very few rows, that keyword elimination becomes a problem since
Follow ups