← Back to team overview

gtg-contributors team mailing list archive

[Merge] lp:~gtg-contributors/gtg/gtg_url_autolink into lp:gtg

 

Madhumitha Viswanathan has proposed merging lp:~gtg-contributors/gtg/gtg_url_autolink into lp:gtg.

Requested reviews:
  Gtg developers (gtg)
Related bugs:
  Bug #767578 in Getting Things GNOME!: "Special Characters at the end of URLs parsed incorrectly"
  https://bugs.launchpad.net/gtg/+bug/767578

For more details, see:
https://code.launchpad.net/~gtg-contributors/gtg/gtg_url_autolink/+merge/86138

Fixed URL Autolinking for http(s) schemes. Related bug: https://bugs.launchpad.net/gtg/+bug/767578
-- 
https://code.launchpad.net/~gtg-contributors/gtg/gtg_url_autolink/+merge/86138
Your team Gtg contributors is subscribed to branch lp:~gtg-contributors/gtg/gtg_url_autolink.
=== modified file 'CHANGELOG'
--- CHANGELOG	2011-11-19 21:41:08 +0000
+++ CHANGELOG	2011-12-17 11:16:24 +0000
@@ -1,4 +1,6 @@
+
 ????-??-?? Getting Things GNOME! ?.?.?
+    * Fixed url autolinking for http(s) schemes, by Madhumitha Viswanathan
     * Added Mantis Bug Tracker backend, by Alayn Gortazar
     * Added search feature, by João Ascenso
     * Fixed crash traceback when pressing 'delete' key, by Jeff Oliver

=== modified file 'GTG/gtk/editor/taskview.py'
--- GTG/gtk/editor/taskview.py	2011-11-28 23:06:56 +0000
+++ GTG/gtk/editor/taskview.py	2011-12-17 11:16:24 +0000
@@ -37,11 +37,9 @@
 
 from GTG.gtk.editor import taskviewserial
 from GTG.tools      import openurl
-
+from GTG.tools      import urlregex
 
 separators = [' ', '.', ',', '/', '\n', '\t', '!', '?', ';', '\0']
-url_separators = [' ', ',', '\n', '\t', '\0']
-
 
 bullet1_ltr = '→'
 bullet1_rtl = '←'
@@ -561,13 +559,21 @@
             prev = it.copy()
             prev.backward_word_start()
             text = buff.get_text(prev,it)
-            if text in ["http","https"]:
-                while it.get_char() not in url_separators and (it.get_char() != '\0') :
+            numchar = 0            
+            
+            if text in ["http","https","www"]:
+                while it.get_char() and it.get_char()!='\0':
                     it.forward_char()
-                url = buff.get_text(prev,it)
-                if url.startswith("http://";) or url.startswith("https://";) :
+                isurl = buff.get_text(prev,it)
+
+                m = urlregex.match(isurl)
+                if m is not None:
+                    url = isurl[:m.end()] 
                     texttag = self.create_anchor_tag(buff,url,text=None,typ="http")
+                    it = prev.copy()
+                    it.forward_chars(m.end())
                     buff.apply_tag(texttag, prev , it)
+
             elif text in ["bug","lp","bgo","fdo", "bko"] :
                 if it.get_char() == " " :
                     it.forward_char()

=== added file 'GTG/tools/urlregex.py'
--- GTG/tools/urlregex.py	1970-01-01 00:00:00 +0000
+++ GTG/tools/urlregex.py	2011-12-17 11:16:24 +0000
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# -----------------------------------------------------------------------------
+# Gettings Things Gnome! - a personal organizer for the GNOME desktop
+# Copyright (c) 2008-2009 - Lionel Dricot & Bertrand Rousseau
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------
+
+"""
+Detects urls using regex
+"""
+
+import re
+
+# 
+UTF_CHARS = ur'a-z0-9_\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff'
+# URLs
+PRE_CHARS = ur'(?:[^/"\':!=]|^|\:)'
+DOMAIN_CHARS = ur'([\.-]|[^\s_\!\.\/])+\.[a-z]{2,}(?::[0-9]+)?'
+PATH_CHARS = ur'(?:[\.,]?[%s!\*\'\(\);:=\+\$/%s#\[\]\-_,~@])' % (UTF_CHARS, '%')
+QUERY_CHARS = ur'[a-z0-9!\*\'\(\);:&=\+\$/%#\[\]\-_\.,~]'
+
+# Valid end-of-path characters (so /foo. does not gobble the period).
+PATH_ENDING_CHARS = r'[%s\)=#/]' % UTF_CHARS
+QUERY_ENDING_CHARS = '[a-z0-9_&=#]'
+
+URL_REGEX = re.compile('((%s)((https?://|www\\.)(%s)(\/%s*%s?)?(\?%s*%s)?))'
+                       % (PRE_CHARS, DOMAIN_CHARS, PATH_CHARS,
+                         PATH_ENDING_CHARS, QUERY_CHARS, QUERY_ENDING_CHARS),
+                         re.IGNORECASE)
+
+
+def match(text):
+    return re.match(URL_REGEX, text)


Follow ups