← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/geoip-ip-address-handling into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/geoip-ip-address-handling into lp:launchpad.

Commit message:
Revamp GeoIP IP address handling.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/geoip-ip-address-handling/+merge/369729

This removes the weird special case mapping private addresses to South Africa, and switches to the ipaddress module rather than ad-hoc private IP address detection, theoretically allowing us to handle IPv6 addresses too (although real IPv6 support requires switching to GeoIP2 data).

The ipaddress module was already in our virtualenv as an indirect dependency.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/geoip-ip-address-handling into lp:launchpad.
=== modified file 'lib/lp/answers/browser/tests/views.txt'
--- lib/lp/answers/browser/tests/views.txt	2018-12-07 13:24:51 +0000
+++ lib/lp/answers/browser/tests/views.txt	2019-07-04 18:55:19 +0000
@@ -573,14 +573,14 @@
 will contain languages from the HTTP request, or the most likely
 interesting languages based on GeoIP information.
 
-For example, if the user doesn't log in and their browser is configured to
-accept Brazilian Portuguese, the vocabulary will contain the languages
-spoken in South Africa (because the 127.0.0.1 IP address is mapped to
-South Africa in the tests).
+For example, if the user doesn't log in, their browser is configured to
+accept Brazilian Portuguese, and their request appears to come from a South
+African IP address, the vocabulary will contain the languages spoken in
+South Africa.
 
     >>> login(ANONYMOUS)
     >>> request = LaunchpadTestRequest(
-    ...     HTTP_ACCEPT_LANGUAGE='pt_BR')
+    ...     HTTP_ACCEPT_LANGUAGE='pt_BR', REMOTE_ADDR='196.36.161.227')
     >>> from lp.answers.browser.question import (
     ...     QuestionLanguageVocabularyFactory)
     >>> view = getMultiAdapter((firefox, request), name='+addticket')
@@ -676,7 +676,7 @@
 database.
 
     >>> request = LaunchpadTestRequest(
-    ...     HTTP_ACCEPT_LANGUAGE='fr, en_CA')
+    ...     HTTP_ACCEPT_LANGUAGE='fr, en_CA', REMOTE_ADDR='196.36.161.227')
 
     >>> login(ANONYMOUS)
     >>> view = UserSupportLanguagesView(None, request)

=== modified file 'lib/lp/answers/stories/question-search-multiple-languages.txt'
--- lib/lp/answers/stories/question-search-multiple-languages.txt	2019-05-22 14:57:45 +0000
+++ lib/lp/answers/stories/question-search-multiple-languages.txt	2019-07-04 18:55:19 +0000
@@ -3,6 +3,12 @@
 By default, only questions written in English or one of the user
 preferred languages are listed and searched.
 
+Make the test browsers look like they're coming from an arbitrary South
+African IP address, since we'll use that later.
+
+    >>> anon_browser.addHeader('X_FORWARDED_FOR', '196.36.161.227')
+    >>> user_browser.addHeader('X_FORWARDED_FOR', '196.36.161.227')
+
 
 == Anonymous searching ==
 
@@ -29,10 +35,9 @@
     ...     print(question.find('a').renderContents())
     Installation failed
 
-The questions match the languages inferred from by GeoIP
-127.0.0.1 is mapped to South Africa in the test suite. The language
-control shows the intersection of the user's languages and the
-languages of the target's questions. In this example:
+The questions match the languages inferred from GeoIP (South Africa in
+this case). The language control shows the intersection of the user's
+languages and the languages of the target's questions. In this example:
 set(['af', 'en', 'st', 'xh', 'zu']) & set(['en', 'es']) == set(en).
 
     >>> sorted(anon_browser.getControl(name='field.language').options)
@@ -113,6 +118,10 @@
 anonymous user makes a request from a GeoIP that has no languages
 mapped, we assume they speak the default language of English.
 
+    >>> for pos, (key, _) in enumerate(anon_browser.mech_browser.addheaders):
+    ...     if key == 'X_FORWARDED_FOR':
+    ...         del anon_browser.mech_browser.addheaders[pos]
+    ...         break
     >>> anon_browser.addHeader('X_FORWARDED_FOR', '172.16.1.1')
     >>> anon_browser.open(
     ...     'http://launchpad.test/ubuntu/+source/mozilla-firefox/+questions')
@@ -166,6 +175,10 @@
 When the project languages are just English, and the user speaks
 that language, we do not show the language controls.
 
+    >>> for pos, (key, _) in enumerate(user_browser.mech_browser.addheaders):
+    ...     if key == 'X_FORWARDED_FOR':
+    ...         del user_browser.mech_browser.addheaders[pos]
+    ...         break
     >>> user_browser.addHeader('X_FORWARDED_FOR', '172.16.1.1')
     >>> user_browser.open(
     ...     'http://launchpad.test/ubuntu/+source/mozilla-firefox/+questions')

=== modified file 'lib/lp/services/geoip/configure.zcml'
--- lib/lp/services/geoip/configure.zcml	2010-10-25 05:33:20 +0000
+++ lib/lp/services/geoip/configure.zcml	2019-07-04 18:55:19 +0000
@@ -1,4 +1,4 @@
-<!-- Copyright 2009-2010 Canonical Ltd.  This software is licensed under the
+<!-- Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
      GNU Affero General Public License version 3 (see the file LICENSE).
 -->
 
@@ -34,9 +34,4 @@
         factory="lp.services.geoip.helpers.request_country"
         provides="lp.services.worlddata.interfaces.country.ICountry" />
 
-    <adapter
-        for="zope.publisher.interfaces.browser.IBrowserRequest"
-        factory="lp.services.geoip.model.GeoIPRequest"
-        provides="lp.services.geoip.interfaces.IGeoIPRecord" />
-
 </configure>

=== modified file 'lib/lp/services/geoip/doc/geoip.txt'
--- lib/lp/services/geoip/doc/geoip.txt	2016-01-26 15:47:37 +0000
+++ lib/lp/services/geoip/doc/geoip.txt	2019-07-04 18:55:19 +0000
@@ -15,26 +15,30 @@
     u'Brazil'
 
 When running tests the IP address will start with '127.', and GeoIP
-would, obviously, fail to find the country for, so we use a South
-African IP address in that case.
+would, obviously, fail to find the country for that, so we return None.
 
-    >>> geoip.getCountryByAddr('127.0.0.88').name
-    u'South Africa'
+    >>> print(geoip.getCountryByAddr('127.0.0.88'))
+    None
 
 We do the same trick for any IP addresses on private networks.
 
-    >>> geoip.getCountryByAddr('10.0.0.88').name
-    u'South Africa'
-
-    >>> geoip.getCountryByAddr('192.168.0.7').name
-    u'South Africa'
-
-    >>> geoip.getCountryByAddr('172.16.0.1').name
-    u'South Africa'
-
-IGeoIP also provides a getRecordByAddress() method, which returns a
-GeoIPRecord object, containing a bunch more information about that IP's
-location.
+    >>> print(geoip.getCountryByAddr('10.0.0.88'))
+    None
+
+    >>> print(geoip.getCountryByAddr('192.168.0.7'))
+    None
+
+    >>> print(geoip.getCountryByAddr('172.16.0.1'))
+    None
+
+    >>> print(geoip.getCountryByAddr('::1'))
+    None
+
+    >>> print(geoip.getCountryByAddr('fc00::1'))
+    None
+
+IGeoIP also provides a getRecordByAddress() method, which returns a dict
+containing a bunch more information about that IP's location.
 
     >>> record = geoip.getRecordByAddress('201.13.165.145')
     >>> for key, value in sorted(record.items()):
@@ -52,61 +56,13 @@
     region_name: ...
     time_zone: ...
 
-And again we'll use a South African IP if the address starts with '127.'.
-
-    >>> record = geoip.getRecordByAddress('127.0.0.1')
-    >>> for key, value in sorted(record.items()):
-    ...     print "%s: %s" % (key, value)
-    area_code: ...
-    city: ...
-    country_code: ZA
-    country_code3: ZAF
-    country_name: South Africa
-    dma_code: ...
-    latitude: ...
-    longitude: ...
-    postal_code: ...
-    region: ...
-    region_name: ...
-    time_zone: ...
-
-If it can't find a GeoIPRecord for the given IP address, it will return
+And again we'll return None if the address is private.
+
+    >>> print(geoip.getRecordByAddress('127.0.0.1'))
+    None
+
+If it can't find a GeoIP record for the given IP address, it will return
 None.
 
     >>> print geoip.getRecordByAddress('255.255.255.255')
     None
-
-We also have GeoIPRequest, which adapts an IBrowserRequest into an
-IGeoIPRecord, providing the latitude, longitude and time zone of the
-request's originating IP address.
-
-    >>> from lp.services.webapp.servers import LaunchpadTestRequest
-    >>> from lp.services.geoip.interfaces import IGeoIPRecord
-    >>> request = LaunchpadTestRequest()
-
-Since our request won't have an originating IP address, we'll use that
-same South African IP address.
-
-    >>> from lp.services.geoip.helpers import ipaddress_from_request
-    >>> print ipaddress_from_request(request)
-    None
-    >>> geoip_request = IGeoIPRecord(request)
-    >>> geoip_request.latitude
-    -33...
-    >>> geoip_request.longitude
-    18...
-    >>> geoip_request.time_zone
-    'Africa/Johannesburg'
-
-If the request had an originating IP address, though, it'd be used when
-we adapted it into an IGeoIPRecord.
-
-    >>> request = LaunchpadTestRequest(
-    ...     environ={'REMOTE_ADDR': '201.13.165.145'})
-    >>> geoip_request = IGeoIPRecord(request)
-    >>> geoip_request.latitude
-    -23...
-    >>> geoip_request.longitude
-    -45...
-    >>> geoip_request.time_zone in ('Brazil/Acre', 'America/Sao_Paulo')
-    True

=== modified file 'lib/lp/services/geoip/helpers.py'
--- lib/lp/services/geoip/helpers.py	2012-01-06 11:08:30 +0000
+++ lib/lp/services/geoip/helpers.py	2019-07-04 18:55:19 +0000
@@ -1,10 +1,10 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __metaclass__ = type
 
-import re
-
+import ipaddress
+import six
 from zope.component import getUtility
 
 from lp.services.geoip.interfaces import IGeoIP
@@ -12,6 +12,7 @@
 
 __all__ = [
     'request_country',
+    'ipaddress_is_global',
     'ipaddress_from_request',
     ]
 
@@ -25,39 +26,48 @@
     This information is not reliable and trivially spoofable - use it only
     for selecting sane defaults.
     """
-    ipaddress = ipaddress_from_request(request)
-    if ipaddress is not None:
-        return getUtility(IGeoIP).getCountryByAddr(ipaddress)
+    ip_address = ipaddress_from_request(request)
+    if ip_address is not None:
+        return getUtility(IGeoIP).getCountryByAddr(ip_address)
     return None
 
 
-_ipaddr_re = re.compile('\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?')
+def ipaddress_is_global(addr):
+    """Return True iff the IP address is on a global public network."""
+    try:
+        return ipaddress.ip_address(six.ensure_text(addr)).is_global
+    except ValueError:
+        return False
 
 
 def ipaddress_from_request(request):
     """Determine the IP address for this request.
 
-    Returns None if the IP address cannot be determined or is localhost.
+    Returns None if the IP address cannot be determined or is not on a
+    global public network.
 
     The remote IP address is determined by the X-Forwarded-For: header,
     or failing that, the REMOTE_ADDR CGI environment variable.
 
     Because this information is unreliable and trivially spoofable, we
-    don't bother to do much error checking to ensure the IP address is at all
-    valid.
+    don't bother to do much error checking to ensure the IP address is at
+    all valid, beyond what the ipaddress module gives us.
 
-    >>> google = '66.102.7.104'
     >>> ipaddress_from_request({'REMOTE_ADDR': '1.1.1.1'})
     '1.1.1.1'
     >>> ipaddress_from_request({
-    ...     'HTTP_X_FORWARDED_FOR': '666.666.666.666',
+    ...     'HTTP_X_FORWARDED_FOR': '66.66.66.66',
     ...     'REMOTE_ADDR': '1.1.1.1'
     ...     })
-    '666.666.666.666'
+    '66.66.66.66'
     >>> ipaddress_from_request({'HTTP_X_FORWARDED_FOR':
     ...     'localhost, 127.0.0.1, 255.255.255.255,1.1.1.1'
     ...     })
-    '255.255.255.255'
+    '1.1.1.1'
+    >>> ipaddress_from_request({
+    ...     'HTTP_X_FORWARDED_FOR': 'nonsense',
+    ...     'REMOTE_ADDR': '1.1.1.1'
+    ...     })
     """
     ipaddresses = request.get('HTTP_X_FORWARDED_FOR')
 
@@ -70,10 +80,7 @@
     # We actually get a comma separated list of addresses. We need to throw
     # away the obvious duds, such as loopback addresses
     ipaddresses = [addr.strip() for addr in ipaddresses.split(',')]
-    ipaddresses = [
-        addr for addr in ipaddresses
-            if not (addr.startswith('127.')
-                    or _ipaddr_re.search(addr) is None)]
+    ipaddresses = [addr for addr in ipaddresses if ipaddress_is_global(addr)]
 
     if ipaddresses:
         # If we have more than one, have a guess.

=== modified file 'lib/lp/services/geoip/interfaces.py'
--- lib/lp/services/geoip/interfaces.py	2013-01-07 02:40:55 +0000
+++ lib/lp/services/geoip/interfaces.py	2019-07-04 18:55:19 +0000
@@ -1,15 +1,11 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
-from zope.interface import (
-    Attribute,
-    Interface,
-    )
+from zope.interface import Interface
 
 
 __all__ = [
     'IGeoIP',
-    'IGeoIPRecord',
     'IRequestLocalLanguages',
     'IRequestPreferredLanguages',
     ]
@@ -19,7 +15,7 @@
     """The GeoIP utility, which represents the GeoIP database."""
 
     def getRecordByAddress(ip_address):
-        """Return the IGeoIPRecord for the given IP address, or None."""
+        """Return the GeoIP record for the given IP address, or None."""
 
     def getCountryByAddr(ip_address):
         """Find and return an ICountry based on the given IP address.
@@ -29,18 +25,6 @@
         """
 
 
-class IGeoIPRecord(Interface):
-    """A single record in the GeoIP database.
-
-    A GeoIP record gathers together all of the relevant information for a
-    single IP address or machine name in the DNS.
-    """
-
-    latitude = Attribute("The geographic latitude, in real degrees.")
-    latitude = Attribute("The geographic longitude, in real degrees.")
-    time_zone = Attribute("The time zone.")
-
-
 class IRequestLocalLanguages(Interface):
 
     def getLocalLanguages():

=== modified file 'lib/lp/services/geoip/model.py'
--- lib/lp/services/geoip/model.py	2015-07-08 16:05:11 +0000
+++ lib/lp/services/geoip/model.py	2019-07-04 18:55:19 +0000
@@ -1,9 +1,8 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __all__ = [
     'GeoIP',
-    'GeoIPRequest',
     'RequestLocalLanguages',
     'RequestPreferredLanguages',
     ]
@@ -16,10 +15,12 @@
 from zope.interface import implementer
 
 from lp.services.config import config
-from lp.services.geoip.helpers import ipaddress_from_request
+from lp.services.geoip.helpers import (
+    ipaddress_from_request,
+    ipaddress_is_global,
+    )
 from lp.services.geoip.interfaces import (
     IGeoIP,
-    IGeoIPRecord,
     IRequestLocalLanguages,
     IRequestPreferredLanguages,
     )
@@ -42,7 +43,8 @@
 
     def getRecordByAddress(self, ip_address):
         """See `IGeoIP`."""
-        ip_address = ensure_address_is_not_private(ip_address)
+        if not ipaddress_is_global(ip_address):
+            return None
         try:
             return self._gi.record_by_addr(ip_address)
         except SystemError:
@@ -53,7 +55,8 @@
 
     def getCountryByAddr(self, ip_address):
         """See `IGeoIP`."""
-        ip_address = ensure_address_is_not_private(ip_address)
+        if not ipaddress_is_global(ip_address):
+            return None
         geoip_record = self.getRecordByAddress(ip_address)
         if geoip_record is None:
             return None
@@ -68,44 +71,6 @@
             return country
 
 
-@implementer(IGeoIPRecord)
-class GeoIPRequest:
-    """An adapter for a BrowserRequest into an IGeoIPRecord."""
-
-    def __init__(self, request):
-        self.request = request
-        ip_address = ipaddress_from_request(self.request)
-        if ip_address is None:
-            # This happens during page testing, when the REMOTE_ADDR is not
-            # set by Zope.
-            ip_address = '127.0.0.1'
-        ip_address = ensure_address_is_not_private(ip_address)
-        self.ip_address = ip_address
-        self.geoip_record = getUtility(IGeoIP).getRecordByAddress(
-            self.ip_address)
-
-    @property
-    def latitude(self):
-        """See `IGeoIPRecord`."""
-        if self.geoip_record is None:
-            return None
-        return self.geoip_record['latitude']
-
-    @property
-    def longitude(self):
-        """See `IGeoIPRecord`."""
-        if self.geoip_record is None:
-            return None
-        return self.geoip_record['longitude']
-
-    @property
-    def time_zone(self):
-        """See `IGeoIPRecord`."""
-        if self.geoip_record is None:
-            return None
-        return self.geoip_record['time_zone']
-
-
 @implementer(IRequestLocalLanguages)
 class RequestLocalLanguages(object):
 
@@ -168,27 +133,5 @@
         return sorted(languages, key=lambda x: x.englishname)
 
 
-def ensure_address_is_not_private(ip_address):
-    """Return the given IP address if it doesn't start with '127.'.
-
-    If it does start with '127.' then we return a South African IP address.
-    Notice that we have no specific reason for using a South African IP
-    address here -- we could have used any other non-private IP address.
-    """
-    private_prefixes = (
-        '127.',
-        '192.168.',
-        '172.16.',
-        '10.',
-        )
-
-    for prefix in private_prefixes:
-        if ip_address.startswith(prefix):
-            # This is an arbitrary South African IP which was handy at the
-            # time of writing; it's not special in any way.
-            return '196.36.161.227'
-    return ip_address
-
-
 class NoGeoIPDatabaseFound(Exception):
     """No GeoIP database was found."""

=== modified file 'setup.py'
--- setup.py	2019-06-18 16:52:56 +0000
+++ setup.py	2019-07-04 18:55:19 +0000
@@ -164,6 +164,7 @@
         'gunicorn[gthread]',
         'html5browser',
         'importlib-resources',
+        'ipaddress',
         'ipython',
         'jsautobuild',
         'launchpad-buildd',


Follow ups