← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~adeuring/launchpad/revert-r-15464 into lp:launchpad

 

Abel Deuring has proposed merging lp:~adeuring/launchpad/revert-r-15464 into lp:launchpad.

Requested reviews:
  Abel Deuring (adeuring)

For more details, see:
https://code.launchpad.net/~adeuring/launchpad/revert-r-15464/+merge/111636

revert r15464: should have landed in db-devel
-- 
https://code.launchpad.net/~adeuring/launchpad/revert-r-15464/+merge/111636
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== removed file 'database/schema/patch-2209-24-1.sql'
--- database/schema/patch-2209-24-1.sql	2012-06-20 11:53:24 +0000
+++ database/schema/patch-2209-24-1.sql	1970-01-01 00:00:00 +0000
@@ -1,125 +0,0 @@
--- Copyright 2012 Canonical Ltd.  This software is licensed under the
--- GNU Affero General Public License version 3 (see the file LICENSE).
-
-SET client_min_messages=ERROR;
-
-CREATE OR REPLACE FUNCTION _ftq(text) RETURNS text
-    LANGUAGE plpythonu IMMUTABLE STRICT
-    AS $_$
-        import re
-
-        # I think this method would be more robust if we used a real
-        # tokenizer and parser to generate the query string, but we need
-        # something suitable for use as a stored procedure which currently
-        # means no external dependancies.
-
-        # Convert to Unicode
-        query = args[0].decode('utf8')
-        ## plpy.debug('1 query is %s' % repr(query))
-
-        # Normalize whitespace
-        query = re.sub("(?u)\s+"," ", query)
-
-        # Convert AND, OR, NOT and - to tsearch2 punctuation
-        query = re.sub(r"(?u)(?:^|\s)-([\w\(])", r" !\1", query)
-        query = re.sub(r"(?u)\bAND\b", "&", query)
-        query = re.sub(r"(?u)\bOR\b", "|", query)
-        query = re.sub(r"(?u)\bNOT\b", " !", query)
-        ## plpy.debug('2 query is %s' % repr(query))
-
-        # Deal with unwanted punctuation.
-        # ':' is used in queries to specify a weight of a word.
-        # '\' is treated differently in to_tsvector() and to_tsquery().
-        punctuation = r'[:\\]'
-        query = re.sub(r"(?u)%s+" % (punctuation,), " ", query)
-        ## plpy.debug('3 query is %s' % repr(query))
-
-        # Strip ! characters inside and at the end of a word
-        query = re.sub(r"(?u)(?<=\w)[\!]+", " ", query)
-
-        # Now that we have handle case sensitive booleans, convert to lowercase
-        query = query.lower()
-
-        # Remove unpartnered bracket on the left and right
-        query = re.sub(r"(?ux) ^ ( [^(]* ) \)", r"(\1)", query)
-        query = re.sub(r"(?ux) \( ( [^)]* ) $", r"(\1)", query)
-
-        # Remove spurious brackets
-        query = re.sub(r"(?u)\(([^\&\|]*?)\)", r" \1 ", query)
-        ## plpy.debug('5 query is %s' % repr(query))
-
-        # Insert & between tokens without an existing boolean operator
-        # ( not proceeded by (|&!
-        query = re.sub(r"(?u)(?<![\(\|\&\!])\s*\(", "&(", query)
-        ## plpy.debug('6 query is %s' % repr(query))
-        # ) not followed by )|&
-        query = re.sub(r"(?u)\)(?!\s*(\)|\||\&|\s*$))", ")&", query)
-        ## plpy.debug('6.1 query is %s' % repr(query))
-        # Whitespace not proceded by (|&! not followed by &|
-        query = re.sub(r"(?u)(?<![\(\|\&\!\s])\s+(?![\&\|\s])", "&", query)
-        ## plpy.debug('7 query is %s' % repr(query))
-
-        # Detect and repair syntax errors - we are lenient because
-        # this input is generally from users.
-
-        # Fix unbalanced brackets
-        openings = query.count("(")
-        closings = query.count(")")
-        if openings > closings:
-            query = query + " ) "*(openings-closings)
-        elif closings > openings:
-            query = " ( "*(closings-openings) + query
-        ## plpy.debug('8 query is %s' % repr(query))
-
-        # Strip ' character that do not have letters on both sides
-        query = re.sub(r"(?u)((?<!\w)'|'(?!\w))", "", query)
-
-        # Brackets containing nothing but whitespace and booleans, recursive
-        last = ""
-        while last != query:
-            last = query
-            query = re.sub(r"(?u)\([\s\&\|\!]*\)", "", query)
-        ## plpy.debug('9 query is %s' % repr(query))
-
-        # An & or | following a (
-        query = re.sub(r"(?u)(?<=\()[\&\|\s]+", "", query)
-        ## plpy.debug('10 query is %s' % repr(query))
-
-        # An &, | or ! immediatly before a )
-        query = re.sub(r"(?u)[\&\|\!\s]*[\&\|\!]+\s*(?=\))", "", query)
-        ## plpy.debug('11 query is %s' % repr(query))
-
-        # An &,| or ! followed by another boolean.
-        query = re.sub(r"(?ux) \s* ( [\&\|\!] ) [\s\&\|]+", r"\1", query)
-        ## plpy.debug('12 query is %s' % repr(query))
-
-        # Leading & or |
-        query = re.sub(r"(?u)^[\s\&\|]+", "", query)
-        ## plpy.debug('13 query is %s' % repr(query))
-
-        # Trailing &, | or !
-        query = re.sub(r"(?u)[\&\|\!\s]+$", "", query)
-        ## plpy.debug('14 query is %s' % repr(query))
-
-        # If we have nothing but whitespace and tsearch2 operators,
-        # return NULL.
-        if re.search(r"(?u)^[\&\|\!\s\(\)]*$", query) is not None:
-            return None
-
-        # Convert back to UTF-8
-        query = query.encode('utf8')
-        ## plpy.debug('15 query is %s' % repr(query))
-
-        return query or None
-        $_$;
-
-CREATE OR REPLACE FUNCTION ftq(text) RETURNS pg_catalog.tsquery
-    LANGUAGE plpythonu IMMUTABLE STRICT
-    AS $_$
-        p = plpy.prepare(
-            "SELECT to_tsquery('default', _ftq($1)) AS x", ["text"])
-        query = plpy.execute(p, args, 1)[0]["x"]
-        return query or None
-        $_$;
-
-INSERT INTO LaunchpadDatabaseRevision VALUES (2209, 24, 1);

=== modified file 'lib/lp/answers/stories/question-browse-and-search.txt'
--- lib/lp/answers/stories/question-browse-and-search.txt	2012-06-21 08:33:10 +0000
+++ lib/lp/answers/stories/question-browse-and-search.txt	2012-06-22 16:21:22 +0000
@@ -311,7 +311,7 @@
 dealing with plugins problems, he always start by a search for such
 problems:
 
-    >>> browser.getControl(name='field.search_text').value = 'plugin'
+    >>> browser.getControl(name='field.search_text').value = 'plug-in'
     >>> browser.getControl('Search', index=0).click()
     >>> questions = find_tag_by_id(browser.contents, 'question-listing')
     >>> for question in questions.fetch('td', 'questionTITLE'):

=== modified file 'lib/lp/registry/doc/vocabularies.txt'
--- lib/lp/registry/doc/vocabularies.txt	2012-06-21 08:33:10 +0000
+++ lib/lp/registry/doc/vocabularies.txt	2012-06-22 16:21:22 +0000
@@ -731,7 +731,8 @@
 
     >>> [(p.name, getattr(p.teamowner, 'name', None))
     ...  for p in vocab.search('ubuntu-team')]
-    [(u'ubuntu-team', u'mark')]
+    [(u'doc', None), (u'name18', u'mark'),
+     (u'ubuntu-security', u'kamion'), (u'ubuntu-team', u'mark')]
 
 But it doesn't include merged accounts:
 

=== modified file 'lib/lp/services/database/doc/textsearching.txt'
--- lib/lp/services/database/doc/textsearching.txt	2012-06-20 11:41:04 +0000
+++ lib/lp/services/database/doc/textsearching.txt	2012-06-22 16:21:22 +0000
@@ -138,22 +138,7 @@
     ...         compiled = compiled.decode('UTF-8')
     ...         compiled = compiled.encode('US-ASCII', 'backslashreplace')
     ...     print '%s <=> %s' % (uncompiled, compiled)
-    >>>
-    >>> def search(text_to_search, search_phrase):
-    ...     cur = cursor()
-    ...     cur.execute("SELECT to_tsvector(%s)", (text_to_search, ))
-    ...     ts_vector = cur.fetchall()[0][0]
-    ...     cur.execute("SELECT ftq(%s)", (search_phrase, ))
-    ...     ts_query = cur.fetchall()[0][0]
-    ...     cur.execute(
-    ...         "SELECT to_tsvector(%s) @@ ftq(%s)",
-    ...         (text_to_search, search_phrase))
-    ...     match = cur.fetchall()[0][0]
-    ...     return "FTI data: %s query: %s match: %s" % (
-    ...         ts_vector, ts_query, str(match))
-    >>>
-    >>> def search_same(text):
-    ...     return search(text, text)
+
 
 Queries are lowercased
 
@@ -240,178 +225,127 @@
     (hi&ho|hoe)&work&go <=> ( 'hi' & 'ho' | 'hoe' ) & 'work' & 'go'
 
 
-If a single '-' precedes a word, it is converted into the '!' operator.
-Note also that a trailing '-' is dropped by to_tsquery().
+Hypenation is handled specially. Note that the & operator has precidence
+over the | operator and that tsearch2 removes the unnecessary branckets.
+
+    >>> ftq('foo-bar')
+    ((foo&bar)|foobar) <=> 'foo' & 'bar' | 'foobar'
+
+    >>> ftq('foo-bar-baz')
+    ((foo&bar&baz)|foobarbaz) <=> 'foo' & 'bar' & 'baz' | 'foobarbaz'
+
+    >>> ftq('foo & bar-baz')
+    foo&((bar&baz)|barbaz) <=> 'foo' & ( 'bar' & 'baz' | 'barbaz' )
 
     >>> ftq('-foo bar-')
-    !foo&bar- <=> !'foo' & 'bar'
-
-Repeated '-' are simply ignored by to_tsquery().
+    !foo&bar <=> !'foo' & 'bar'
 
     >>> ftq('---foo--- ---bar---')
-    ---foo---&---bar--- <=> 'foo' & 'bar'
-
-Hyphens surrounded by two words are retained. This reflects the way
-how to_tsquery() and to_tsvector() handle such strings.
-
-    >>> print search_same('foo-bar')
-    FTI data: 'bar':3 'foo':2 'foo-bar':1
-    query: 'foo-bar' & 'foo' & 'bar'
-    match: True
-
-
-Punctuation is handled consistently. If a string containing punctuation
-appears in an FTI, it can also be passed to ftq(),and a search for this
-string finds the indexed text.
-
-    >>> punctuation = '\'"#$%*+,./:;<=>?@[\]^`{}~'
-    >>> for symbol in punctuation:
-    ...     print repr(symbol), search_same('foo%sbar' % symbol)
-    "'" FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '"' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '#' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '$' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '%' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '*' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '+' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    ',' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '.' FTI data: 'foo.bar':1 query: 'foo.bar' match: True
-    '/' FTI data: 'foo/bar':1 query: 'foo/bar' match: True
-    ':' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    ';' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '<' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '=' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '>' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '?' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '@' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '[' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '\\' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    ']' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '^' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '`' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '{' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '}' FTI data: 'bar':2 'foo':1 query: 'foo' & 'bar' match: True
-    '~' FTI data: 'foo':1 '~bar':2 query: 'foo' & '~bar' match: True
-
-    >>> for symbol in punctuation:
-    ...     print repr(symbol), search_same('aa %sbb%s cc' % (symbol, symbol))
-    "'" FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '"' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '#' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '$' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '%' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '*' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '+' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    ',' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '.' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '/' FTI data: '/bb':2 'aa':1 'cc':3 query: 'aa' & '/bb' & 'cc' match: True
-    ':' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    ';' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '<' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '=' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '>' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '?' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '@' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '[' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '\\' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    ']' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '^' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '`' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '{' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '}' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & 'bb' & 'cc' match: True
-    '~' FTI data: 'aa':1 'bb':2 'cc':3 query: 'aa' & '~bb' & 'cc' match: False
-
-XXX Abel Deuring 2012-06-20 bug=1015511: Note that the last line above
-shows a bug: The FTI data for the string "aa ~bb~ cc" contains the words
-'aa', 'bb', 'cc', while the ts_query object for the same text contains
-'aa', '~bb', 'cc', hence the query does not match the string. More details_
-
-XXX Abel Deuring 2012-06-20 bug=1015519: XML tags cannot be searched.
-
-    >>> print search_same('foo <bar> baz')
-    FTI data: 'baz':2 'foo':1 query: 'foo' & 'baz' match: True
-
-More specifically, tags are simply dropped from the FTI data and from
-search queries.
-
-    >>> print search('some text <div>whatever</div>', '<div>')
-    FTI data: 'text':2 'whatev':3 query: None match: None
-
-Of course, omitting '<' and '>'from the query does not help.
-
-    >>> print search('some text <div>whatever</div>', 'div')
-    FTI data: 'text':2 'whatev':3 query: 'div' match: False
-
-Treatment of characters that are used as operators in to_tsquery():
+    foo&bar <=> 'foo' & 'bar'
+
+    >>> ftq('foo-bar test')
+    ((foo&bar)|foobar)&test <=> ( 'foo' & 'bar' | 'foobar' ) & 'test'
+
+    >>> ftq('foo-bar OR test')
+    ((foo&bar)|foobar)|test <=> ( 'foo' & 'bar' | 'foobar' ) | 'test'
+
+
+Most punctuation characters are converted to whitespace outside of
+words, or treated as a hypen inside words. The exceptions are the
+operators ()!&|!.
+
+    >>> ftq(':100%')
+    100 <=> '100'
+
+    >>> ftq(r'foo\bar')
+    ((foo&bar)|foobar) <=> 'foo' & 'bar' | 'foobar'
+
+    >>> ftq('/dev/pmu')
+    ((dev&pmu)|devpmu) <=> 'dev' & 'pmu' | 'devpmu'
 
     >>> ftq('cool!')
     cool <=> 'cool'
 
-Email addresses are retained as a whole, both by to_tsvector() and by
-ftq().
-
-    >>> print search_same('foo@xxxxxxx')
-    FTI data: 'foo@xxxxxxx':1 query: 'foo@xxxxxxx' match: True
-
-File names are retained as a whole.
-
-    >>> print search_same('foo-bar.txt')
-    FTI data: 'foo-bar.txt':1 query: 'foo-bar.txt' match: True
+    >>> ftq('foo@xxxxxxx')
+    ((foo&bar&com)|foobarcom) <=> 'foo' & 'bar' & 'com' | 'foobarcom'
+
 
 Some punctuation we pass through to tsearch2 for it to handle.
-NB. This gets stemmed, see below.
-
-    >>> print search_same("shouldn't")
-    FTI data: 'shouldn':1 query: 'shouldn' match: True
-
-Bug #44913 - Unicode characters in the wrong place.
-
-    >>> search_same(u'abc-a\N{LATIN SMALL LETTER C WITH CEDILLA}')
-    "FTI data: 'abc':2 'abc-a\xc3\xa7':1 'a\xc3\xa7':3
-    query: 'abc-a\xc3\xa7' & 'abc' & 'a\xc3\xa7'
-    match: True"
-
-Cut & Paste of 'Smart' quotes. Note that the quotation mark is retained
-in the FTI.
-
-    >>> print search_same(u'a-a\N{RIGHT DOUBLE QUOTATION MARK}')
-    FTI data: 'a-a”':1 'a”':3 query: 'a-a”' & 'a”' match: True
-
-    >>> print search_same(
-    ...     u'\N{LEFT SINGLE QUOTATION MARK}a.a'
-    ...     u'\N{RIGHT SINGLE QUOTATION MARK}')
-    FTI data: 'a’':2 '‘a':1 query: '‘a' & 'a’' match: True
+
+    >>> ftq("shouldn't") # NB. This gets stemmed, see below
+    shouldn't <=> 'shouldn'
+
+It was noticed though in Bug #33920 that tsearch2 couldn't cope if the
+apostrophe was not inside a word. So we strip it in these cases.
+
+    >>> ftq("'cool")
+    cool <=> 'cool'
+    >>> ftq("'shouldn't")
+    shouldn't <=> 'shouldn'
+    >>> ftq("' cool")
+    cool <=> 'cool'
+    >>> ftq("cool '")
+    cool <=> 'cool'
+    >>> ftq("' cool '")
+    cool <=> 'cool'
+    >>> ftq("'cool'")
+    cool <=> 'cool'
+    >>> ftq("('cool' AND bananas)")
+    (cool&bananas) <=> 'cool' & 'banana'
+
+It was also noticed through Bug #39828 that tsearch2 will not cope if the
+! character is embedded inside or found at the end of a word.
+
+    >>> ftq('cool!')
+    cool <=> 'cool'
+    >>> ftq('hi!mom')
+    hi&mom <=> 'hi' & 'mom'
+    >>> ftq('hi!!!!!mom')
+    hi&mom <=> 'hi' & 'mom'
+    >>> ftq('hi !mom')
+    hi&!mom <=> 'hi' & !'mom'
+
+
+Bug #44913 - Unicode characters in the wrong place
+
+    >>> ftq(u'a-a\N{LATIN SMALL LETTER C WITH CEDILLA}')
+    ((a&a\xe7)|aa\xe7) <=> 'a\xe7' | 'aa\xe7'
+
+    Cut & Paste of 'Smart' quotes
+
+    >>> ftq(u'a-a\N{RIGHT DOUBLE QUOTATION MARK}')
+    ((a&a)|aa) <=> 'aa'
+
+    >>> ftq(u'\N{LEFT SINGLE QUOTATION MARK}a.a\N{RIGHT SINGLE QUOTATION MARK}')
+    ((a&a)|aa) <=> 'aa'
 
 
 Bug #44913 - Nothing but stopwords in a query needing repair
 
-    >>> print search_same('a)a')
-    FTI data:  query: None match: None
+    >>> ftq('a)a')
+    a&a <=> None
 
 
 Stop words (words deemed too common in English to search on) are removed
 from queries by tsearch2.
 
-    >>> print search_same("Don't do it harder!")
-    FTI data: 'harder':5 query: 'harder' match: True
+    >>> ftq("Don't do it harder!")
+    don't&do&it&harder <=> 'harder'
 
 
 Note that some queries will return None after compilation, because they
 contained nothing but stop words or punctuation.
 
-    >>> print search_same("don't do it!")
-    FTI data:  query: None match: None
+    >>> ftq("don't do it!")
+    don't&do&it <=> None
 
-    >>> print search_same(",,,")
-    FTI data:  query: None match: None
+    >>> ftq(",,,")
+    None <=> None
 
 
 Queries containing nothing except whitespace, boolean operators and
 punctuation will just return None.
 
-Note in the fourth example below that the '-' left in the query by _ftq()
-is ignored by to_tsquery().
-
     >>> ftq(" ")
     None <=> None
     >>> ftq("AND")
@@ -419,7 +353,7 @@
     >>> ftq(" AND (!)")
     None <=> None
     >>> ftq("-")
-    - <=> None
+    None <=> None
 
 
 Words are also stemmed by tsearch2 (using the English stemmer).
@@ -447,7 +381,7 @@
     (hi|!hello)&mom <=> ( 'hi' | !'hello' ) & 'mom'
 
     >>> ftq('(hi OR - AND hello) AND mom')
-    (hi|-&hello)&mom <=> ( 'hi' | 'hello' ) & 'mom'
+    (hi|hello)&mom <=> ( 'hi' | 'hello' ) & 'mom'
 
     >>> ftq('hi AND mom AND')
     hi&mom <=> 'hi' & 'mom'
@@ -459,7 +393,7 @@
     (hi|hello)&mom <=> ( 'hi' | 'hello' ) & 'mom'
 
     >>> ftq('() hi mom ( ) ((! |((&)))) :-)')
-    (hi&mom&-) <=> 'hi' & 'mom'
+    (hi&mom) <=> 'hi' & 'mom'
 
     >>> ftq("(hi mom")
     hi&mom <=> 'hi' & 'mom'
@@ -480,15 +414,15 @@
     hi&mom <=> 'hi' & 'mom'
 
     >>> ftq("(foo .") # Bug 43245
-    foo&. <=> 'foo'
+    foo <=> 'foo'
 
     >>> ftq("(foo.")
-    foo. <=> 'foo'
+    foo <=> 'foo'
 
     Bug #54972
 
     >>> ftq("a[a\n[a")
-    a[a&[a <=> None
+    ((a&a)|aa)&a <=> 'aa'
 
     Bug #96698
 
@@ -503,10 +437,10 @@
     Bug #160236
 
     >>> ftq("foo&&bar-baz")
-    foo&bar-baz <=> 'foo' & 'bar-baz' & 'bar' & 'baz'
+    foo&((bar&baz)|barbaz) <=> 'foo' & ( 'bar' & 'baz' | 'barbaz' )
 
     >>> ftq("foo||bar.baz")
-    foo|bar.baz <=> 'foo' | 'bar.baz'
+    foo|((bar&baz)|barbaz) <=> 'foo' | ( 'bar' & 'baz' | 'barbaz' )
 
 
 Phrase Searching
@@ -548,8 +482,7 @@
 
     >>> runsql(r"""
     ...   SELECT title, max(ranking) FROM (
-    ...    SELECT Bug.title,rank(Bug.fti||Message.fti,ftq('firefox'))
-    ...    AS ranking
+    ...    SELECT Bug.title,rank(Bug.fti||Message.fti,ftq('firefox')) AS ranking
     ...    FROM Bug, BugMessage, Message
     ...    WHERE Bug.id = BugMessage.bug AND Message.id = BugMessage.message
     ...       AND (Bug.fti @@ ftq('firefox') OR Message.fti @@ ftq('firefox'))
@@ -566,8 +499,7 @@
     ...       AND BugTask.product = Product.id
     ...       AND Product.name LIKE lower('%firefox%')
     ...    UNION
-    ...    SELECT Bug.title, rank(Product.fti, ftq('firefox')) - 0.3
-    ...    AS ranking
+    ...    SELECT Bug.title, rank(Product.fti, ftq('firefox')) - 0.3 AS ranking
     ...    FROM Bug, BugTask, Product
     ...    WHERE Bug.id = BugTask.bug
     ...       AND BugTask.product = Product.id
@@ -586,8 +518,7 @@
     Printing doesn't work     0.70
 
 
-Natural Language Phrase Query
------------------------------
+== Natural Language Phrase Query ==
 
 The standard boolean searches of tsearch2 are fine, but sometime you
 want more fuzzy searches.
@@ -626,8 +557,7 @@
 on Ubuntu) - so we are disabling this and reworking from the ground up.
 
 
-nl_term_candidates()
-~~~~~~~~~~~~~~~~~~~~
+=== nl_term_candidates() ===
 
 To find the terms in a search phrase that are canditates for the search,
 we can use the nl_term_candidates() function. This function uses ftq()
@@ -644,16 +574,19 @@
     >>> nl_term_candidates('how do I do this?')
     []
 
+We also handle expansion of hypenated words (like ftq does):
+
+    >>> nl_term_candidates('firefox foo-bar give me trouble')
+    [u'firefox', u'foo', u'bar', u'foobar', u'give', u'troubl']
+
 Except for the hyphenation character, all non-word caracters are ignored:
 
     >>> nl_term_candidates(
-    ...     "Will the \'\'|\'\' character (inside a ''quoted'' string) "
-    ...     "work???")
+    ...     "Will the \'\'|\'\' character (inside a ''quoted'' string) work???")
     [u'charact', u'insid', u'quot', u'string', u'work']
 
 
-nl_phrase_search()
-~~~~~~~~~~~~~~~~~~
+=== nl_phrase_search() ===
 
 To get the actual tsearch2 query that should be run, you will use the
 nl_phrase_search() function. This one takes two mandatory parameters and
@@ -704,8 +637,7 @@
     u'slow|system'
 
 
-Using other constraints
-.......................
+==== Using other constraints ====
 
 You can pass a third parameter to the function that will be use as
 an additional constraint to determine the total number of rows that
@@ -727,8 +659,7 @@
 
     >>> nl_phrase_search(
     ...     'firefox gets very slow on flickr', Question,
-    ...     "Question.product = %s AND Product.active = 't'"
-    ...     % firefox_product.id,
+    ...     "Question.product = %s AND Product.active = 't'" % firefox_product.id,
     ...     ['Product'], fast_enabled=False)
     u'slow|flickr'
 
@@ -748,8 +679,7 @@
     u'(firefox&flickr&slow)|(flickr&slow)|(firefox&slow)|(firefox&flickr)'
 
 
-No keywords filtering with few rows
-...................................
+==== No keywords filtering with few rows ====
 
 The 50% rule is really useful only when there are many rows. When there
 only very few rows, that keyword elimination becomes a problem since


Follow ups