launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #18927
[Merge] lp:~cjwatson/launchpad/destroy-ascii-smash into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/destroy-ascii-smash into lp:launchpad.
Commit message:
Perform proper RFC2047-encoding of mail notification headers, and remove ascii_smash.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #362957 in Launchpad itself: "Ridiculous unicode transliteration on -changes announce list From: header"
https://bugs.launchpad.net/launchpad/+bug/362957
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/destroy-ascii-smash/+merge/264039
ascii_smash made some sense for shipit, where there were third-party limitations on label printing; but now that that's gone, there is no valid justification for using it anywhere in Launchpad. Its rendering of Arabic text is particularly terrible. I've replaced all uses of ascii_smash with either straightforward Unicode or RFC2047-encoding.
The Soyuz case requires delicate treatment, as we need fix_maintainer to cope with the particular variant of RFC822 used in Maintainer and Changed-By fields, but it isn't necessarily possible to run the RFC822 output of fix_maintainer back through fix_maintainer, because it may output a parenthesised form rather than an angle-bracketed form. I avoided this problem by keeping track of the e-mail address on its own in a few more places, which is enough for a Person lookup.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/destroy-ascii-smash into lp:launchpad.
=== modified file 'lib/lp/answers/doc/person.txt'
--- lib/lp/answers/doc/person.txt 2013-05-01 00:23:31 +0000
+++ lib/lp/answers/doc/person.txt 2015-07-07 13:35:34 +0000
@@ -170,13 +170,12 @@
But Carlos has one.
# Because not everyone uses a real editor <wink>
- >>> from lp.services.encoding import ascii_smash
>>> carlos_raw = personset.getByName('carlos')
>>> carlos = IQuestionsPerson(carlos_raw)
>>> for question in carlos.searchQuestions(
... language=(english, spanish)):
- ... print ascii_smash(question.title), question.language.code
- Problema al recompilar kernel con soporte smp (doble-nucleo) es
+ ... print question.title, question.language.code
+ Problema al recompilar kernel con soporte smp (doble-núcleo) es
Questions needing attention
=== modified file 'lib/lp/answers/doc/questionsets.txt'
--- lib/lp/answers/doc/questionsets.txt 2013-05-01 00:23:31 +0000
+++ lib/lp/answers/doc/questionsets.txt 2015-07-07 13:35:34 +0000
@@ -48,16 +48,15 @@
regular full text algorithm.
# Because not everyone uses a real editor <wink>
- >>> from lp.services.encoding import ascii_smash
>>> for question in question_set.searchQuestions(search_text=u'firefox'):
- ... print ascii_smash(question.title), question.target.displayname
- Problemas de Impressao no Firefox Mozilla Firefox
+ ... print question.title, question.target.displayname
+ Problemas de Impressão no Firefox Mozilla Firefox
Firefox loses focus and gets stuck Mozilla Firefox
Firefox cannot render Bank Site Mozilla Firefox
mailto: problem in webpage mozilla-firefox in Ubuntu
Newly installed plug-in doesn't seem to be used Mozilla Firefox
Problem showing the SVG demo on W3C site Mozilla Firefox
- AINKAFSEEN ALEFLAMTEHGHAINYEHYEHREHALEFTEH ... Ubuntu
+ عكس التغييرات غير المحفوظة للمستن؟ Ubuntu
Status
@@ -93,8 +92,8 @@
>>> from lp.services.worlddata.interfaces.language import ILanguageSet
>>> spanish = getUtility(ILanguageSet)['es']
>>> for t in question_set.searchQuestions(language=spanish):
- ... print ascii_smash(t.title)
- Problema al recompilar kernel con soporte smp (doble-nucleo)
+ ... print t.title
+ Problema al recompilar kernel con soporte smp (doble-núcleo)
Combinations
@@ -106,14 +105,14 @@
>>> for question in question_set.searchQuestions(
... search_text=u'firefox',
... status=(QuestionStatus.OPEN, QuestionStatus.INVALID)):
- ... print ascii_smash(question.title), question.status.title, (
+ ... print question.title, question.status.title, (
... question.target.displayname)
- Problemas de Impressao no Firefox Open Mozilla Firefox
+ Problemas de Impressão no Firefox Open Mozilla Firefox
Firefox is slow and consumes too much ... mozilla-firefox in Ubuntu
Firefox loses focus and gets stuck Open Mozilla Firefox
Firefox cannot render Bank Site Open Mozilla Firefox
Problem showing the SVG demo on W3C site Open Mozilla Firefox
- AINKAFSEEN ALEFLAMTEHGHAINYEHYEHREHALEFTEH ... Ubuntu
+ عكس التغييرات غير المحفوظة للمستن؟ Open Ubuntu
Sort order
@@ -126,24 +125,24 @@
>>> from lp.answers.enums import QuestionSort
>>> for question in question_set.searchQuestions(
... search_text=u'firefox', sort=QuestionSort.OLDEST_FIRST):
- ... print question.id, ascii_smash(question.title), (
+ ... print question.id, question.title, (
... question.target.displayname)
- 14 AINKAFSEEN ALEFLAMTEHGHAINYEHYEHREHALEFTEH ... Ubuntu
+ 14 عكس التغييرات غير المحفوظة للمستن؟ Ubuntu
1 Firefox cannot render Bank Site Mozilla Firefox
2 Problem showing the SVG demo on W3C site Mozilla Firefox
4 Firefox loses focus and gets stuck Mozilla Firefox
6 Newly installed plug-in doesn't seem to be used Mozilla Firefox
9 mailto: problem in webpage mozilla-firefox in Ubuntu
- 13 Problemas de Impressao no Firefox Mozilla Firefox
+ 13 Problemas de Impressão no Firefox Mozilla Firefox
When no text search is done, the default sort order is by newest first.
>>> for question in question_set.searchQuestions(
... status=QuestionStatus.OPEN)[:5]:
- ... print question.id, ascii_smash(question.title), (
+ ... print question.id, question.title, (
... question.target.displayname)
- 13 Problemas de Impressao no Firefox Mozilla Firefox
- 12 Problema al recompilar kernel con soporte smp (doble-nucleo) Ubuntu
+ 13 Problemas de Impressão no Firefox Mozilla Firefox
+ 12 Problema al recompilar kernel con soporte smp (doble-núcleo) Ubuntu
11 Continue playing after shutdown Ubuntu
5 Installation failed Ubuntu
4 Firefox loses focus and gets stuck Mozilla Firefox
=== modified file 'lib/lp/app/stories/launchpad-root/site-search.txt'
--- lib/lp/app/stories/launchpad-root/site-search.txt 2013-09-27 04:13:23 +0000
+++ lib/lp/app/stories/launchpad-root/site-search.txt 2015-07-07 13:35:34 +0000
@@ -5,8 +5,6 @@
specific search with Launchpad's prominent objects (projects, bugs,
teams, etc.).
- >>> from lp.services.encoding import ascii_smash
-
# Our very helpful function for printing all the page results.
>>> def print_search_results(contents=None):
@@ -14,7 +12,7 @@
... contents = anon_browser.contents
... tag = find_tag_by_id(contents, 'search-results')
... if tag:
- ... print ascii_smash(extract_text(tag))
+ ... print extract_text(tag)
# Another helper to make searching convenient.
=== modified file 'lib/lp/archiveuploader/tests/nascentupload-announcements.txt'
--- lib/lp/archiveuploader/tests/nascentupload-announcements.txt 2014-11-08 23:53:17 +0000
+++ lib/lp/archiveuploader/tests/nascentupload-announcements.txt 2015-07-07 13:35:34 +0000
@@ -512,7 +512,7 @@
DEBUG * Changer using non-preferred email
DEBUG
DEBUG Date: Tue, 25 Apr 2006 10:36:14 -0300
- DEBUG Changed-By: Celso R. Providelo <cprov@xxxxxxxxxx>
+ DEBUG Changed-By: cprov@xxxxxxxxxx (Celso R. Providelo)
DEBUG Maintainer: Launchpad team <launchpad@xxxxxxxxxxxxxxxxxxx>
DEBUG http://launchpad.dev/ubuntu/+source/bar/1.0-4
DEBUG
@@ -679,8 +679,7 @@
0
Uploads with UTF-8 characters in email addresses in the changes file are
-permitted, but converted to ASCII, which is a limitation of the mailer.
-However, UTF-8 in the mail content is preserved.
+permitted, but RFC2047-encoded. UTF-8 in the mail content is preserved.
>>> hoary.status = SeriesStatus.DEVELOPMENT
>>> anything_policy = getPolicy(
@@ -701,10 +700,8 @@
>>> len(msgs)
2
-"Cihar" should actually be "Čihař" but the mailer will convert to ASCII.
-
- >>> [message['From'] for message in msgs]
- ['Root <root@localhost>', 'Non-ascii changed-by Cihar
+ >>> [message['From'].replace('\n ', ' ') for message in msgs]
+ ['Root <root@localhost>', '=?utf-8?q?Non-ascii_changed-by_=C4=8Ciha=C5=99?=
<daniel.silverstone@xxxxxxxxxxxxx>']
UTF-8 text in the changes file that is sent on the email is preserved
=== modified file 'lib/lp/archiveuploader/tests/safe_fix_maintainer.txt'
--- lib/lp/archiveuploader/tests/safe_fix_maintainer.txt 2010-07-24 09:12:37 +0000
+++ lib/lp/archiveuploader/tests/safe_fix_maintainer.txt 2015-07-07 13:35:34 +0000
@@ -1,41 +1,43 @@
-Test some utils method inheritaded from DAK:
+Test some utils method inherited from DAK:
safe_fix_maintainer() is a function used to sanitise the
identification fields coming from the Debian control files (changes
and dsc). It allows safe unicode and non-unicode inputs.
- >>> from lp.archiveuploader.utils import (
- ... safe_fix_maintainer)
+ >>> from lp.archiveuploader.utils import safe_fix_maintainer
- >>> maintainer_field = 'maintainer'
- >>> changer_field = 'changed-by'
+ >>> maintainer_field = 'maintainer'
+ >>> changer_field = 'changed-by'
Pure ASCII content using the two available fieldname (pretty much the same)
- >>> content = 'Hello World <hello@xxxxxxxxx>'
- >>> safe_fix_maintainer(content, maintainer_field)
- ('Hello World <hello@xxxxxxxxx>', 'Hello World <hello@xxxxxxxxx>', 'Hello World', 'hello@xxxxxxxxx')
+ >>> content = 'Hello World <hello@xxxxxxxxx>'
+ >>> safe_fix_maintainer(content, maintainer_field)
+ ('Hello World <hello@xxxxxxxxx>', 'Hello World <hello@xxxxxxxxx>',
+ 'Hello World', 'hello@xxxxxxxxx')
- >>> content = 'Hello World <hello@xxxxxxxxx>'
- >>> safe_fix_maintainer(content, changer_field)
- ('Hello World <hello@xxxxxxxxx>', 'Hello World <hello@xxxxxxxxx>', 'Hello World', 'hello@xxxxxxxxx')
+ >>> content = 'Hello World <hello@xxxxxxxxx>'
+ >>> safe_fix_maintainer(content, changer_field)
+ ('Hello World <hello@xxxxxxxxx>', 'Hello World <hello@xxxxxxxxx>',
+ 'Hello World', 'hello@xxxxxxxxx')
Passing Unicode:
- # XXX cprov 2006-02-20 bug=32148: Not sure if it is working properly,
- # at least doesn't raise any exception like in bug #32148.
+ # XXX cprov 2006-02-20 bug=32148: Not sure if it is working properly,
+ # at least doesn't raise any exception like in bug #32148.
- >>> content = u'Rapha\xc3l Pinson <raphink@xxxxxxxxxx>'
- >>> safe_fix_maintainer(content, maintainer_field)
- ('RaphaAl Pinson <raphink@xxxxxxxxxx>', 'RaphaAl Pinson <raphink@xxxxxxxxxx>', 'RaphaAl Pinson', 'raphink@xxxxxxxxxx')
+ >>> content = u'Rapha\xc3l Pinson <raphink@xxxxxxxxxx>'
+ >>> safe_fix_maintainer(content, maintainer_field)
+ ('Rapha\xc3\x83l Pinson <raphink@xxxxxxxxxx>',
+ '=?utf-8?q?Rapha=C3=83l_Pinson?= <raphink@xxxxxxxxxx>',
+ 'Rapha\xc3\x83l Pinson', 'raphink@xxxxxxxxxx')
Passing latin encoded string:
- >>> content = 'Rapha\xebl Pinson <raphink@xxxxxxxxxx>'
- >>> safe_fix_maintainer(content, maintainer_field)
- ('Raphael Pinson <raphink@xxxxxxxxxx>', 'Raphael Pinson <raphink@xxxxxxxxxx>', 'Raphael Pinson', 'raphink@xxxxxxxxxx')
-
-
-
+ >>> content = 'Rapha\xebl Pinson <raphink@xxxxxxxxxx>'
+ >>> safe_fix_maintainer(content, maintainer_field)
+ ('Rapha\xc3\xabl Pinson <raphink@xxxxxxxxxx>',
+ '=?utf-8?q?Rapha=C3=ABl_Pinson?= <raphink@xxxxxxxxxx>',
+ 'Rapha\xc3\xabl Pinson', 'raphink@xxxxxxxxxx')
=== modified file 'lib/lp/archiveuploader/utils.py'
--- lib/lp/archiveuploader/utils.py 2015-03-13 19:05:50 +0000
+++ lib/lp/archiveuploader/utils.py 2015-07-07 13:35:34 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Archive uploader utilities."""
@@ -37,10 +37,7 @@
import signal
import subprocess
-from lp.services.encoding import (
- ascii_smash,
- guess as guess_encoding,
- )
+from lp.services.encoding import guess as guess_encoding
from lp.soyuz.enums import BinaryPackageFileType
@@ -269,15 +266,13 @@
def safe_fix_maintainer(content, fieldname):
"""Wrapper for fix_maintainer() to handle unicode and string argument.
- It verifies the content type and transform it in a unicode with guess()
- before call ascii_smash(). Then we can safely call fix_maintainer().
+ It verifies the content type and transforms it to a unicode with
+ guess(). Then we can safely call fix_maintainer().
"""
if type(content) != unicode:
content = guess_encoding(content)
- content = ascii_smash(content)
-
- return fix_maintainer(content, fieldname)
+ return fix_maintainer(content.encode("utf-8"), fieldname)
def extract_dpkg_source(dsc_filepath, target, vendor=None):
=== modified file 'lib/lp/services/encoding.py'
--- lib/lp/services/encoding.py 2011-12-19 23:38:16 +0000
+++ lib/lp/services/encoding.py 2015-07-07 13:35:34 +0000
@@ -1,20 +1,17 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Character encoding utilities"""
__metaclass__ = type
__all__ = [
- 'ascii_smash',
'escape_nonascii_uniquely',
'guess',
'is_ascii_only',
]
import codecs
-from cStringIO import StringIO
import re
-import unicodedata
_boms = [
@@ -156,202 +153,6 @@
return unicode(s, 'ISO-8859-1', 'replace')
-def ascii_smash(unicode_string):
- """Attempt to convert the Unicode string, possibly containing accents,
- to an ASCII string.
-
- This is used for generating shipping labels because our shipping company
- can only deal with ASCII despite being European :-/
-
- ASCII goes through just fine
-
- >>> ascii_smash(u"Hello")
- 'Hello'
-
- Latin-1 accented characters have their accents stripped.
-
- >>> ascii_smash(u"Ol\N{LATIN SMALL LETTER E WITH ACUTE}")
- 'Ole'
- >>> ascii_smash(u"\N{LATIN CAPITAL LETTER A WITH RING ABOVE}iste")
- 'Aiste'
- >>> ascii_smash(
- ... u"\N{LATIN SMALL LETTER AE}"
- ... u"\N{LATIN SMALL LETTER I WITH GRAVE}"
- ... u"\N{LATIN SMALL LETTER O WITH STROKE}"
- ... u"\N{LATIN SMALL LETTER U WITH CIRCUMFLEX}"
- ... )
- 'aeiou'
- >>> ascii_smash(
- ... u"\N{LATIN CAPITAL LETTER AE}"
- ... u"\N{LATIN CAPITAL LETTER I WITH GRAVE}"
- ... u"\N{LATIN CAPITAL LETTER O WITH STROKE}"
- ... u"\N{LATIN CAPITAL LETTER U WITH TILDE}"
- ... )
- 'AEIOU'
- >>> ascii_smash(u"Stra\N{LATIN SMALL LETTER SHARP S}e")
- 'Strasse'
-
- Moving further into Eastern Europe we get more odd letters
-
- >>> ascii_smash(
- ... u"\N{LATIN CAPITAL LETTER Z WITH CARON}"
- ... u"ivkovi\N{LATIN SMALL LETTER C WITH CARON}"
- ... )
- 'Zivkovic'
-
- >>> ascii_smash(u"\N{LATIN CAPITAL LIGATURE OE}\N{LATIN SMALL LIGATURE OE}")
- 'OEoe'
-
- """
- out = StringIO()
- for char in unicode_string:
- out.write(ascii_char_smash(char))
- return out.getvalue()
-
-
-def ascii_char_smash(char):
- """Smash a single Unicode character into an ASCII representation.
-
- >>> ascii_char_smash(u"\N{KATAKANA LETTER SMALL A}")
- 'a'
- >>> ascii_char_smash(u"\N{KATAKANA LETTER A}")
- 'A'
- >>> ascii_char_smash(u"\N{KATAKANA LETTER KA}")
- 'KA'
- >>> ascii_char_smash(u"\N{HIRAGANA LETTER SMALL A}")
- 'a'
- >>> ascii_char_smash(u"\N{HIRAGANA LETTER A}")
- 'A'
- >>> ascii_char_smash(u"\N{BOPOMOFO LETTER ANG}")
- 'ANG'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER H WITH STROKE}")
- 'H'
- >>> ascii_char_smash(u"\N{LATIN SMALL LETTER LONG S}")
- 's'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER THORN}")
- 'TH'
- >>> ascii_char_smash(u"\N{LATIN SMALL LETTER THORN}")
- 'th'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER I WITH OGONEK}")
- 'I'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER AE}")
- 'AE'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}")
- 'Ae'
- >>> ascii_char_smash(u"\N{LATIN SMALL LETTER A WITH DIAERESIS}")
- 'ae'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}")
- 'Oe'
- >>> ascii_char_smash(u"\N{LATIN SMALL LETTER O WITH DIAERESIS}")
- 'oe'
- >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}")
- 'Ue'
- >>> ascii_char_smash(u"\N{LATIN SMALL LETTER U WITH DIAERESIS}")
- 'ue'
- >>> ascii_char_smash(u"\N{LATIN SMALL LETTER SHARP S}")
- 'ss'
-
- Latin-1 and other symbols are lost
-
- >>> ascii_char_smash(u"\N{POUND SIGN}")
- ''
-
- Unless they also happen to be letters of some kind, such as greek
-
- >>> ascii_char_smash(u"\N{MICRO SIGN}")
- 'mu'
-
- Fractions
-
- >>> ascii_char_smash(u"\N{VULGAR FRACTION ONE HALF}")
- '1/2'
-
- """
- mapping = {
- u"\N{LATIN CAPITAL LETTER AE}": "AE",
- u"\N{LATIN SMALL LETTER AE}": "ae",
-
- u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}": "Ae",
- u"\N{LATIN SMALL LETTER A WITH DIAERESIS}": "ae",
-
- u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}": "Oe",
- u"\N{LATIN SMALL LETTER O WITH DIAERESIS}": "oe",
-
- u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}": "Ue",
- u"\N{LATIN SMALL LETTER U WITH DIAERESIS}": "ue",
-
- u"\N{LATIN SMALL LETTER SHARP S}": "ss",
-
- u"\N{LATIN CAPITAL LETTER THORN}": "TH",
- u"\N{LATIN SMALL LETTER THORN}": "th",
-
- u"\N{FRACTION SLASH}": "/",
- u"\N{MULTIPLICATION SIGN}": "x",
-
- u"\N{KATAKANA-HIRAGANA DOUBLE HYPHEN}": "=",
- }
-
- # Pass through ASCII
- if ord(char) < 127:
- return char
-
- # Handle manual mappings
- if mapping.has_key(char):
- return mapping[char]
-
- # Regress to decomposed form and recurse if necessary.
- decomposed = unicodedata.normalize("NFKD", char)
- if decomposed != char:
- out = StringIO()
- for char in decomposed:
- out.write(ascii_char_smash(char))
- return out.getvalue()
-
- # Handle whitespace
- if char.isspace():
- return " "
-
- # Handle digits
- if char.isdigit():
- return unicodedata.digit(char)
-
- # Handle decimal (probably pointless given isdigit above)
- if char.isdecimal():
- return unicodedata.decimal(char)
-
- # Handle numerics, such as 1/2
- if char.isnumeric():
- formatted = "%f" % unicodedata.numeric(char)
- # Strip leading and trailing 0
- return formatted.strip("0")
-
- # Ignore unprintables, such as the accents we denormalized
- if not char.isalnum():
- return ""
-
- # Return modified latin characters as just the latin part.
- name = unicodedata.name(char)
-
- match = re.search("LATIN CAPITAL LIGATURE (\w+)", name)
- if match is not None:
- return match.group(1)
-
- match = re.search("LATIN SMALL LIGATURE (\w+)", name)
- if match is not None:
- return match.group(1).lower()
-
- match = re.search("(?:LETTER SMALL|SMALL LETTER) (\w+)", name)
- if match is not None:
- return match.group(1).lower()
-
- match = re.search("LETTER (\w+)", name)
- if match is not None:
- return match.group(1)
-
- # Something we can't represent. Return empty string.
- return ""
-
-
def escape_nonascii_uniquely(bogus_string):
"""Replace non-ascii characters with a hex representation.
=== modified file 'lib/lp/soyuz/adapters/notification.py'
--- lib/lp/soyuz/adapters/notification.py 2015-03-13 19:05:50 +0000
+++ lib/lp/soyuz/adapters/notification.py 2015-07-07 13:35:34 +0000
@@ -1,4 +1,4 @@
-# Copyright 2011-2014 Canonical Ltd. This software is licensed under the
+# Copyright 2011-2015 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Notification for uploads and copies."""
@@ -27,10 +27,7 @@
from lp.registry.interfaces.person import IPersonSet
from lp.registry.interfaces.pocket import PackagePublishingPocket
from lp.services.config import config
-from lp.services.encoding import (
- ascii_smash,
- guess as guess_encoding,
- )
+from lp.services.encoding import guess as guess_encoding
from lp.services.mail.helpers import get_email_template
from lp.services.mail.sendmail import (
format_address,
@@ -232,9 +229,11 @@
info = fetch_information(spr, bprs, changes)
from_addr = info['changedby']
+ from_email = info['changedby_email']
if announce_from_person is not None:
if announce_from_person.preferredemail is not None:
from_addr = format_address_for_person(announce_from_person)
+ from_email = announce_from_person.preferredemail.email
# If we're sending an acceptance notification for a non-PPA upload,
# announce if possible. Avoid announcing backports, binary-only
@@ -243,7 +242,7 @@
and not archive.is_ppa
and pocket != PackagePublishingPocket.BACKPORTS
and not (pocket == PackagePublishingPocket.SECURITY and spr is None)
- and not is_auto_sync_upload(spr, bprs, pocket, from_addr)):
+ and not is_auto_sync_upload(spr, bprs, pocket, from_email)):
name = None
bcc_addr = None
if spr:
@@ -301,17 +300,17 @@
# Some syncs (e.g. from Debian) will involve packages whose
# changed-by person was auto-created in LP and hence does not have a
# preferred email address set. We'll get a None here.
- changedby_person = email_to_person(info['changedby'])
+ changedby_person = email_to_person(info['changedby_email'])
if blamer is not None and blamer != changedby_person:
signer_signature = person_to_email(blamer)
if signer_signature != info['changedby']:
information['SIGNER'] = '\nSigned-By: %s' % signer_signature
# Add maintainer if present and different from changed-by.
- maintainer = info['maintainer']
- changedby = info['changedby']
- if maintainer and maintainer != changedby:
- information['MAINTAINER'] = '\nMaintainer: %s' % maintainer
+ maintainer_displayname = info['maintainer_displayname']
+ if (maintainer_displayname and
+ maintainer_displayname != changedby_displayname):
+ information['MAINTAINER'] = '\nMaintainer: %s' % maintainer_displayname
return get_template(archive, action) % information
@@ -360,24 +359,15 @@
config.uploader.default_sender_name,
config.uploader.default_sender_address)
- # `sendmail`, despite handling unicode message bodies, can't
- # cope with non-ascii sender/recipient addresses, so ascii_smash
- # is used on all addresses.
-
# All emails from here have a Bcc to the default recipient.
bcc_text = format_address(
config.uploader.default_recipient_name,
config.uploader.default_recipient_address)
if bcc:
bcc_text = "%s, %s" % (bcc_text, bcc)
- extra_headers['Bcc'] = ascii_smash(bcc_text)
+ extra_headers['Bcc'] = bcc_text
- recipients = ascii_smash(", ".join(to_addrs))
- if isinstance(from_addr, unicode):
- # ascii_smash only works on unicode strings.
- from_addr = ascii_smash(from_addr)
- else:
- from_addr.encode('ascii')
+ recipients = ", ".join(to_addrs)
if dry_run and logger is not None:
debug(logger, "Would have sent a mail:")
@@ -471,8 +461,8 @@
candidate_recipients = [blamer]
info = fetch_information(spr, bprs, changes)
- changer = email_to_person(info['changedby'])
- maintainer = email_to_person(info['maintainer'])
+ changer = email_to_person(info['changedby_email'])
+ maintainer = email_to_person(info['maintainer_email'])
if blamer is None and not archive.is_copy:
debug(logger, "Changes file is unsigned; adding changer as recipient.")
@@ -565,23 +555,16 @@
return summary
-def email_to_person(fullemail):
- """Return an `IPerson` given an RFC2047 email address.
+def email_to_person(email):
+ """Return an `IPerson` given an email address (without a name).
- :param fullemail: Potential email address.
+ :param email: Potential email address.
:return: `IPerson` with the given email address. None if there
- isn't one, or if `fullemail` isn't a proper email address.
+ isn't one, or if `email` is None.
"""
- if not fullemail:
- return None
-
- try:
- # The 2nd arg to s_f_m() doesn't matter as it won't fail since every-
- # thing will have already parsed at this point.
- rfc822, rfc2047, name, email = safe_fix_maintainer(fullemail, "email")
- return getUtility(IPersonSet).getByEmail(email)
- except ParseMaintError:
- return None
+ if not email:
+ return None
+ return getUtility(IPersonSet).getByEmail(email)
def person_to_email(person):
@@ -591,6 +574,25 @@
return format_address_for_person(person)
+def fix_email(fullemail, field_name):
+ """Turn an email address from .changes into various useful forms.
+
+ The input address may be None, or anything that `fix_maintainer`
+ understands.
+
+ :return: A tuple of (RFC2047-compatible address, Unicode
+ RFC822-compatible address, email).
+ """
+ if not fullemail:
+ return None, None, None
+
+ try:
+ rfc822, rfc2047, _, email = safe_fix_maintainer(fullemail, field_name)
+ return rfc2047, rfc822.decode('utf-8'), email
+ except ParseMaintError:
+ return None, None, None
+
+
def is_auto_sync_upload(spr, bprs, pocket, changed_by_email):
"""Return True if this is a (Debian) auto sync upload.
@@ -609,17 +611,19 @@
def fetch_information(spr, bprs, changes, previous_version=None):
changedby = None
changedby_displayname = None
+ changedby_email = None
maintainer = None
maintainer_displayname = None
+ maintainer_email = None
if changes:
changesfile = ChangesFile.formatChangesComment(
sanitize_string(changes.get('Changes')))
date = changes.get('Date')
- changedby = sanitize_string(changes.get('Changed-By'))
- maintainer = sanitize_string(changes.get('Maintainer'))
- changedby_displayname = changedby
- maintainer_displayname = maintainer
+ changedby, changedby_displayname, changedby_email = fix_email(
+ changes.get('Changed-By'), 'Changed-By')
+ maintainer, maintainer_displayname, maintainer_email = fix_email(
+ changes.get('Maintainer'), 'Maintainer')
elif spr or bprs:
if not spr and bprs:
spr = bprs[0].build.source_package_release
@@ -631,10 +635,12 @@
addr = formataddr((spr.creator.displayname,
spr.creator.preferredemail.email))
changedby_displayname = sanitize_string(addr)
+ changedby_email = spr.creator.preferredemail.email
if maintainer:
addr = formataddr((spr.maintainer.displayname,
spr.maintainer.preferredemail.email))
maintainer_displayname = sanitize_string(addr)
+ maintainer_email = spr.maintainer.preferredemail.email
else:
changesfile = date = None
@@ -643,8 +649,10 @@
'date': date,
'changedby': changedby,
'changedby_displayname': changedby_displayname,
+ 'changedby_email': changedby_email,
'maintainer': maintainer,
'maintainer_displayname': maintainer_displayname,
+ 'maintainer_email': maintainer_email,
}
Follow ups