← Back to team overview

testtools-dev team mailing list archive

[Merge] lp:~gz/testtools/unprintable-assertThat-80412 into lp:testtools

 

Martin [gz] has proposed merging lp:~gz/testtools/unprintable-assertThat-80412 into lp:testtools with lp:~jml/testtools/unprintable-assertThat-804127 as a prerequisite.

Requested reviews:
  testtools committers (testtools-committers)

For more details, see:
https://code.launchpad.net/~gz/testtools/unprintable-assertThat-80412/+merge/72641

Resolve issues related to stringifying in the matcher code, by replacing several custom repr-like schemes using %s on strings (which may upcase the result to unicode or prevent later upcasting, or leak control codes, or...) with a big ugly method that tries to do what the callers wanted.

This deserves a longer description of the reasoning behind the changes, but there's some discussion of the reasoning in the prerequisite branch and I need some feedback before I go crazy\bier.

Some random notes so I don't forget them later:
* Switched from """...""" to '''...''' so if I see output pasted in bug reports I'll know if I broke it.
* Being clever with the escaping of single quotes in multiline strings is probably more trouble than it's worth, just putting a backslash in front of every occurrence would simplify the logic a lot.
* Astral characters break, as per usual. I'm amused to note that upstream just fixed one more issue their end[1] (but without exposing a usable iterator to python level code still), and on Python 3 only of course.
* If the decision is to dump all these attempts at fancy formatting and just live with the normal repr, I would not mind that at all.


[1]: http://bugs.python.org/issue9200
-- 
https://code.launchpad.net/~gz/testtools/unprintable-assertThat-80412/+merge/72641
Your team testtools developers is subscribed to branch lp:testtools.
=== modified file 'testtools/compat.py'
--- testtools/compat.py	2011-08-24 01:57:23 +0000
+++ testtools/compat.py	2011-08-24 01:57:23 +0000
@@ -25,6 +25,7 @@
 import re
 import sys
 import traceback
+import unicodedata
 
 from testtools.helpers import try_imports
 
@@ -52,6 +53,7 @@
 """
 
 if sys.version_info > (3, 0):
+    import builtins
     def _u(s):
         return s
     _r = ascii
@@ -59,12 +61,14 @@
         """A byte literal."""
         return s.encode("latin-1")
     advance_iterator = next
+    # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
     def istext(x):
         return isinstance(x, str)
     def classtypes():
         return (type,)
     str_is_unicode = True
 else:
+    import __builtin__ as builtins
     def _u(s):
         # The double replace mangling going on prepares the string for
         # unicode-escape - \foo is preserved, \u and \U are decoded.
@@ -112,6 +116,87 @@
         return isinstance(exception, (KeyboardInterrupt, SystemExit))
 
 
+# GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
+#                there should be better ways to write code needing this.
+if not issubclass(getattr(builtins, "bytes", str), str):
+    def _isbytes(x):
+        return isinstance(x, bytes)
+else:
+    # Never return True on Pythons that provide the name but not the real type
+    def _isbytes(x):
+        return False
+
+
+def _slow_escape(text):
+    """Escape unicode `text` leaving printable characters unmodified
+
+    The behaviour emulates the Python 3 implementation of repr, see
+    unicode_repr in unicodeobject.c and isprintable definition.
+
+    Because this iterates over the input a codepoint at a time, it's slow, and
+    does not handle astral characters correctly on Python builds with 16 bit
+    rather than 32 bit unicode type.
+    """
+    output = []
+    for c in text:
+        o = ord(c)
+        if o < 256:
+            if o < 32 or 126 < o < 161:
+                output.append(c.encode("unicode-escape"))
+            elif o == 92:
+                # Seperate due to bug in unicode-escape codec in Python 2.4
+                output.append("\\\\")
+            else:
+                output.append(c)
+        else:
+            # To get correct behaviour would need to pair up surrogates here
+            if unicodedata.category(c)[0] in "CZ":
+                output.append(c.encode("unicode-escape"))
+            else:
+                output.append(c)
+    return "".join(output)
+
+
+def text_repr(text, multiline=False):
+    """Rich repr for `text` returning unicode, triple quoted if `multiline`"""
+    is_py3k = sys.version_info > (3, 0)
+    if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
+        # Use normal repr for single line of unicode on Python 3 or bytes
+        return repr(text)
+    prefix = repr(text[:0])[:-2]
+    if multiline:
+        if is_py3k:
+            nl = isinstance(text, bytes) and bytes([10]) or "\n"
+            offset = len(prefix) + 1
+            lines = []
+            for l in text.split(nl):
+                r = repr(l)
+                q = r[-1]
+                lines.append(r[offset:-1].replace("\\" + q, q))
+        elif not str_is_unicode and isinstance(text, str):
+            lines = [l.encode("string-escape").replace("\\'", "'")
+                for l in text.split("\n")]
+        else:
+            lines = [_slow_escape(l) for l in text.split("\n")]
+        _semi_done = "\n".join(lines) + "''"
+        p = 0
+        while True:
+            p = _semi_done.find("'''", p)
+            if p == -1:
+                break
+            _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
+            p += 2
+        return "".join([prefix, "'''\\\n", _semi_done, "'"])
+    escaped_text = _slow_escape(text)
+    quote = "'"
+    if "'" in text:
+        if '"' in text:
+            escaped_text = escaped_text.replace("'", "\\'")
+        else:
+            quote = '"'
+    return "".join([prefix, quote, escaped_text, quote])
+
+
 def unicode_output_stream(stream):
     """Get wrapper for given stream that writes any unicode without exception
 
@@ -146,11 +231,6 @@
     return writer(stream, "replace")
 
 
-try:
-    to_text = unicode
-except NameError:
-    to_text = str
-
 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
 # treat all versions the same way

=== modified file 'testtools/matchers.py'
--- testtools/matchers.py	2011-08-24 01:57:23 +0000
+++ testtools/matchers.py	2011-08-24 01:57:23 +0000
@@ -49,7 +49,10 @@
     classtypes,
     _error_repr,
     isbaseexception,
+    _isbytes,
     istext,
+    str_is_unicode,
+    text_repr
     )
 
 
@@ -151,12 +154,25 @@
     def __str__(self):
         difference = self.mismatch.describe()
         if self.verbose:
+            # GZ 2011-08-24: Smelly API? Better to take any object and special
+            #                case text inside?
+            if istext(self.matchee) or _isbytes(self.matchee):
+                matchee = text_repr(self.matchee)
+            else:
+                matchee = repr(self.matchee)
             return (
-                'Match failed. Matchee: "%s"\nMatcher: %s\nDifference: %s\n'
-                % (self.matchee, self.matcher, difference))
+                'Match failed. Matchee: %s\nMatcher: %s\nDifference: %s\n'
+                % (matchee, self.matcher, difference))
         else:
             return difference
 
+    if not str_is_unicode:
+
+        __unicode__ = __str__
+
+        def __str__(self):
+            return self.__unicode__().encode("ascii", "backslashreplace")
+
 
 class MismatchDecorator(object):
     """Decorate a ``Mismatch``.
@@ -268,7 +284,12 @@
         self.with_nl = with_nl
 
     def describe(self):
-        return self.matcher._describe_difference(self.with_nl)
+        s = self.matcher._describe_difference(self.with_nl)
+        if str_is_unicode or isinstance(s, unicode):
+            return s
+        # GZ 2011-08-24: This is actually pretty bogus, most C0 codes should
+        #                be escaped, in addition to non-ascii bytes.
+        return s.decode("latin1").encode("ascii", "backslashreplace")
 
 
 class DoesNotContain(Mismatch):
@@ -298,8 +319,8 @@
         self.expected = expected
 
     def describe(self):
-        return "'%s' does not start with '%s'." % (
-            self.matchee, self.expected)
+        return "%s does not start with %s." % (
+            text_repr(self.matchee), text_repr(self.expected))
 
 
 class DoesNotEndWith(Mismatch):
@@ -314,8 +335,8 @@
         self.expected = expected
 
     def describe(self):
-        return "'%s' does not end with '%s'." % (
-            self.matchee, self.expected)
+        return "%s does not end with %s." % (
+            text_repr(self.matchee), text_repr(self.expected))
 
 
 class _BinaryComparison(object):
@@ -347,8 +368,10 @@
     def _format(self, thing):
         # Blocks of text with newlines are formatted as triple-quote
         # strings. Everything else is pretty-printed.
-        if istext(thing) and '\n' in thing:
-            return '"""\\\n%s"""' % (thing,)
+        if istext(thing):
+            return text_repr(thing, multiline='\n' in thing)
+        elif _isbytes(thing):
+            return text_repr(thing, multiline=0xA in thing)
         return pformat(thing)
 
     def describe(self):
@@ -359,7 +382,7 @@
                 self._mismatch_string, self._format(self.expected),
                 self._format(self.other))
         else:
-            return "%s %s %s" % (left, self._mismatch_string,right)
+            return "%s %s %s" % (left, self._mismatch_string, right)
 
 
 class Equals(_BinaryComparison):
@@ -599,7 +622,7 @@
         self.expected = expected
 
     def __str__(self):
-        return "Starts with '%s'." % self.expected
+        return "StartsWith(%r)" % (self.expected,)
 
     def match(self, matchee):
         if not matchee.startswith(self.expected):
@@ -618,7 +641,7 @@
         self.expected = expected
 
     def __str__(self):
-        return "Ends with '%s'." % self.expected
+        return "EndsWith(%r)" % (self.expected,)
 
     def match(self, matchee):
         if not matchee.endswith(self.expected):
@@ -875,8 +898,12 @@
 
     def match(self, value):
         if not re.match(self.pattern, value, self.flags):
+            pattern = self.pattern
+            if not isinstance(pattern, str_is_unicode and str or unicode):
+                pattern = pattern.decode("latin1")
+            pattern = pattern.encode("unicode_escape").decode("ascii")
             return Mismatch("%r does not match /%s/" % (
-                    value, self.pattern))
+                    value, pattern.replace("\\\\", "\\")))
 
 
 class MatchesSetwise(object):

=== modified file 'testtools/tests/test_compat.py'
--- testtools/tests/test_compat.py	2011-07-04 18:03:28 +0000
+++ testtools/tests/test_compat.py	2011-08-24 01:57:23 +0000
@@ -16,6 +16,7 @@
     _get_source_encoding,
     _u,
     str_is_unicode,
+    text_repr,
     unicode_output_stream,
     )
 from testtools.matchers import (
@@ -262,6 +263,119 @@
         self.assertEqual("pa???n", sout.getvalue())
 
 
+class TestTextRepr(testtools.TestCase):
+    """Ensure in extending repr, basic behaviours are not being broken"""
+
+    ascii_examples = (
+        # Single character examples
+        #  C0 control codes should be escaped except multiline \n
+        ("\x00", "'\\x00'", "'''\\\n\\x00'''"),
+        ("\b", "'\\x08'", "'''\\\n\\x08'''"),
+        ("\t", "'\\t'", "'''\\\n\\t'''"),
+        ("\n", "'\\n'", "'''\\\n\n'''"),
+        ("\r", "'\\r'", "'''\\\n\\r'''"),
+        #  Quotes and backslash should match normal repr behaviour
+        ('"', "'\"'", "'''\\\n\"'''"),
+        ("'", "\"'\"", "'''\\\n\\''''"),
+        ("\\", "'\\\\'", "'''\\\n\\\\'''"),
+        #  DEL is also unprintable and should be escaped
+        ("\x7F", "'\\x7f'", "'''\\\n\\x7f'''"),
+
+        # Character combinations that need double checking
+        ("\r\n", "'\\r\\n'", "'''\\\n\\r\n'''"),
+        ("\"'", "'\"\\''", "'''\\\n\"\\''''"),
+        ("'\"", "'\\'\"'", "'''\\\n'\"'''"),
+        ("\\n", "'\\\\n'", "'''\\\n\\\\n'''"),
+        ("\\\n", "'\\\\\\n'", "'''\\\n\\\\\n'''"),
+        ("\\'''", "\"\\\\'''\"", "'''\\\n\\\\\\'\\'\\''''"),
+        )
+
+    # Bytes with the high bit set should always be escaped
+    bytes_examples = (
+        (_b("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
+        (_b("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
+        (_b("\xC0"), "'\\xc0'", "'''\\\n\\xc0'''"),
+        (_b("\xFF"), "'\\xff'", "'''\\\n\\xff'''"),
+        (_b("\xC2\xA7"), "'\\xc2\\xa7'", "'''\\\n\\xc2\\xa7'''"),
+        )
+
+    # Unicode doesn't escape printable characters as per the Python 3 model
+    unicode_examples = (
+        # C1 codes are unprintable
+        (_u("\x80"), "'\\x80'", "'''\\\n\\x80'''"),
+        (_u("\x9F"), "'\\x9f'", "'''\\\n\\x9f'''"),
+        # No-break space is unprintable
+        (_u("\xA0"), "'\\xa0'", "'''\\\n\\xa0'''"),
+        # Letters latin alphabets are printable
+        (_u("\xA1"), _u("'\xa1'"), _u("'''\\\n\xa1'''")),
+        (_u("\xFF"), _u("'\xff'"), _u("'''\\\n\xff'''")),
+        (_u("\u0100"), _u("'\u0100'"), _u("'''\\\n\u0100'''")),
+        # Line and paragraph seperators are unprintable
+        (_u("\u2028"), "'\\u2028'", "'''\\\n\\u2028'''"),
+        (_u("\u2029"), "'\\u2029'", "'''\\\n\\u2029'''"),
+        # Unpaired surrogates are unprintable
+        (_u("\uD800"), "'\\ud800'", "'''\\\n\\ud800'''"),
+        (_u("\uDFFF"), "'\\udfff'", "'''\\\n\\udfff'''"),
+        # Unprintable general categories not fully tested: Cc, Cf, Co, Cn, Zs
+        )
+
+    b_prefix = repr(_b(""))[:-2]
+    u_prefix = repr(_u(""))[:-2]
+
+    def test_ascii_examples_bytes(self):
+        for s, expected, _ in self.ascii_examples:
+            b = _b(s)
+            actual = text_repr(b)
+            # Add self.assertIsInstance check?
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_ascii_examples_unicode(self):
+        for s, expected, _ in self.ascii_examples:
+            u = _u(s)
+            actual = text_repr(u)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+    def test_ascii_examples_multiline_bytes(self):
+        for s, _, expected in self.ascii_examples:
+            b = _b(s)
+            actual = text_repr(b, multiline=True)
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_ascii_examples_multiline_unicode(self):
+        for s, _, expected in self.ascii_examples:
+            u = _u(s)
+            actual = text_repr(u, multiline=True)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+    def test_bytes_examples(self):
+        for b, expected, _ in self.bytes_examples:
+            actual = text_repr(b)
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_bytes_examples_multiline(self):
+        for b, _, expected in self.bytes_examples:
+            actual = text_repr(b, multiline=True)
+            self.assertEqual(actual, self.b_prefix + expected)
+            self.assertEqual(eval(actual), b)
+
+    def test_unicode_examples(self):
+        for u, expected, _ in self.unicode_examples:
+            actual = text_repr(u)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+    def test_unicode_examples_multiline(self):
+        for u, _, expected in self.unicode_examples:
+            actual = text_repr(u, multiline=True)
+            self.assertEqual(actual, self.u_prefix + expected)
+            self.assertEqual(eval(actual), u)
+
+
 def test_suite():
     from unittest import TestLoader
     return TestLoader().loadTestsFromName(__name__)

=== modified file 'testtools/tests/test_matchers.py'
--- testtools/tests/test_matchers.py	2011-08-24 01:57:23 +0000
+++ testtools/tests/test_matchers.py	2011-08-24 01:57:23 +0000
@@ -12,7 +12,9 @@
     )
 from testtools.compat import (
     StringIO,
-    to_text,
+    str_is_unicode,
+    text_repr,
+    _b,
     _u,
     )
 from testtools.matchers import (
@@ -20,7 +22,11 @@
     AllMatch,
     Annotate,
     AnnotatedMismatch,
+<<<<<<< TREE
     Contains,
+=======
+    _BinaryMismatch,
+>>>>>>> MERGE-SOURCE
     Equals,
     DocTestMatches,
     DoesNotEndWith,
@@ -96,7 +102,7 @@
         mismatch = matcher.match(2)
         e = MismatchError(matchee, matcher, mismatch, True)
         expected = (
-            'Match failed. Matchee: "%s"\n'
+            'Match failed. Matchee: %r\n'
             'Matcher: %s\n'
             'Difference: %s\n' % (
                 matchee,
@@ -112,17 +118,80 @@
         matcher = Equals(_u('a'))
         mismatch = matcher.match(matchee)
         expected = (
-            'Match failed. Matchee: "%s"\n'
+            'Match failed. Matchee: %s\n'
             'Matcher: %s\n'
             'Difference: %s\n' % (
-                matchee,
+                text_repr(matchee),
                 matcher,
                 mismatch.describe(),
                 ))
         e = MismatchError(matchee, matcher, mismatch, True)
-        # XXX: Using to_text rather than str because, on Python 2, str will
-        # raise UnicodeEncodeError.
-        self.assertEqual(expected, to_text(e))
+        if str_is_unicode:
+            actual = str(e)
+        else:
+            actual = unicode(e)
+            # Using str() should still work, and return ascii only
+            self.assertEqual(
+                expected.replace(matchee, matchee.encode("unicode-escape")),
+                str(e).decode("ascii"))
+        self.assertEqual(expected, actual)
+
+
+class Test_BinaryMismatch(TestCase):
+    """Mismatches from binary comparisons need useful describe output"""
+
+    _long_string = "This is a longish multiline non-ascii string\n\xa7"
+    _long_b = _b(_long_string)
+    _long_u = _u(_long_string)
+
+    def test_short_objects(self):
+        o1, o2 = object(), object()
+        mismatch = _BinaryMismatch(o1, "!~", o2)
+        self.assertEqual(mismatch.describe(), "%r !~ %r" % (o1, o2))
+
+    def test_short_mixed_strings(self):
+        b, u = _b("\xa7"), _u("\xa7")
+        mismatch = _BinaryMismatch(b, "!~", u)
+        self.assertEqual(mismatch.describe(), "%r !~ %r" % (b, u))
+
+    def test_long_bytes(self):
+        one_line_b = self._long_b.replace(_b("\n"), _b(" "))
+        mismatch = _BinaryMismatch(one_line_b, "!~", self._long_b)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(one_line_b),
+                text_repr(self._long_b, multiline=True)))
+
+    def test_long_unicode(self):
+        one_line_u = self._long_u.replace("\n", " ")
+        mismatch = _BinaryMismatch(one_line_u, "!~", self._long_u)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(one_line_u),
+                text_repr(self._long_u, multiline=True)))
+
+    def test_long_mixed_strings(self):
+        mismatch = _BinaryMismatch(self._long_b, "!~", self._long_u)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(self._long_b, multiline=True),
+                text_repr(self._long_u, multiline=True)))
+
+    def test_long_bytes_and_object(self):
+        obj = object()
+        mismatch = _BinaryMismatch(self._long_b, "!~", obj)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(self._long_b, multiline=True),
+                repr(obj)))
+
+    def test_long_unicode_and_object(self):
+        obj = object()
+        mismatch = _BinaryMismatch(self._long_u, "!~", obj)
+        self.assertEqual(mismatch.describe(),
+            "%s:\nreference = %s\nactual = %s\n" % ("!~",
+                text_repr(self._long_u, multiline=True),
+                repr(obj)))
 
 
 class TestMatchersInterface(object):
@@ -208,6 +277,23 @@
         self.assertEqual("bar\n", matcher.want)
         self.assertEqual(doctest.ELLIPSIS, matcher.flags)
 
+    def test_describe_non_ascii_bytes(self):
+        """Even with bytestrings, the mismatch should be coercible to unicode
+
+        DocTestMatches is intended for text, but the Python 2 str type also
+        permits arbitrary binary inputs. This is a slightly bogus thing to do,
+        and under Python 3 using bytes objects will reasonably raise an error.
+        """
+        header = _b("\x89PNG\r\n\x1a\n...")
+        if str_is_unicode:
+            self.assertRaises(TypeError,
+                DocTestMatches, header, doctest.ELLIPSIS)
+            return
+        matcher = DocTestMatches(header, doctest.ELLIPSIS)
+        mismatch = matcher.match(_b("GIF89a\1\0\1\0\0\0\0;"))
+        # Must be treatable as unicode text, the exact output matters less
+        self.assertTrue(unicode(mismatch.describe()))
+
 
 class TestEqualsInterface(TestCase, TestMatchersInterface):
 
@@ -610,6 +696,21 @@
         mismatch = DoesNotStartWith("fo", "bo")
         self.assertEqual("'fo' does not start with 'bo'.", mismatch.describe())
 
+    def test_describe_non_ascii_unicode(self):
+        string = _u("A\xA7")
+        suffix = _u("B\xA7")
+        mismatch = DoesNotStartWith(string, suffix)
+        self.assertEqual("%s does not start with %s." % (
+            text_repr(string), text_repr(suffix)),
+            mismatch.describe())
+
+    def test_describe_non_ascii_bytes(self):
+        string = _b("A\xA7")
+        suffix = _b("B\xA7")
+        mismatch = DoesNotStartWith(string, suffix)
+        self.assertEqual("%r does not start with %r." % (string, suffix),
+            mismatch.describe())
+
 
 class StartsWithTests(TestCase):
 
@@ -617,7 +718,17 @@
 
     def test_str(self):
         matcher = StartsWith("bar")
-        self.assertEqual("Starts with 'bar'.", str(matcher))
+        self.assertEqual("StartsWith('bar')", str(matcher))
+
+    def test_str_with_bytes(self):
+        b = _b("\xA7")
+        matcher = StartsWith(b)
+        self.assertEqual("StartsWith(%r)" % (b,), str(matcher))
+
+    def test_str_with_unicode(self):
+        u = _u("\xA7")
+        matcher = StartsWith(u)
+        self.assertEqual("StartsWith(%r)" % (u,), str(matcher))
 
     def test_match(self):
         matcher = StartsWith("bar")
@@ -646,6 +757,21 @@
         mismatch = DoesNotEndWith("fo", "bo")
         self.assertEqual("'fo' does not end with 'bo'.", mismatch.describe())
 
+    def test_describe_non_ascii_unicode(self):
+        string = _u("A\xA7")
+        suffix = _u("B\xA7")
+        mismatch = DoesNotEndWith(string, suffix)
+        self.assertEqual("%s does not end with %s." % (
+            text_repr(string), text_repr(suffix)),
+            mismatch.describe())
+
+    def test_describe_non_ascii_bytes(self):
+        string = _b("A\xA7")
+        suffix = _b("B\xA7")
+        mismatch = DoesNotEndWith(string, suffix)
+        self.assertEqual("%r does not end with %r." % (string, suffix),
+            mismatch.describe())
+
 
 class EndsWithTests(TestCase):
 
@@ -653,7 +779,17 @@
 
     def test_str(self):
         matcher = EndsWith("bar")
-        self.assertEqual("Ends with 'bar'.", str(matcher))
+        self.assertEqual("EndsWith('bar')", str(matcher))
+
+    def test_str_with_bytes(self):
+        b = _b("\xA7")
+        matcher = EndsWith(b)
+        self.assertEqual("EndsWith(%r)" % (b,), str(matcher))
+
+    def test_str_with_unicode(self):
+        u = _u("\xA7")
+        matcher = EndsWith(u)
+        self.assertEqual("EndsWith(%r)" % (u,), str(matcher))
 
     def test_match(self):
         matcher = EndsWith("arf")
@@ -770,11 +906,17 @@
         ("MatchesRegex('a|b')", MatchesRegex('a|b')),
         ("MatchesRegex('a|b', re.M)", MatchesRegex('a|b', re.M)),
         ("MatchesRegex('a|b', re.I|re.M)", MatchesRegex('a|b', re.I|re.M)),
+        ("MatchesRegex(%r)" % (_b("\xA7"),), MatchesRegex(_b("\xA7"))),
+        ("MatchesRegex(%r)" % (_u("\xA7"),), MatchesRegex(_u("\xA7"))),
         ]
 
     describe_examples = [
         ("'c' does not match /a|b/", 'c', MatchesRegex('a|b')),
         ("'c' does not match /a\d/", 'c', MatchesRegex(r'a\d')),
+        ("%r does not match /\\s+\\xa7/" % (_b('c'),),
+            _b('c'), MatchesRegex(_b("\\s+\xA7"))),
+        ("%r does not match /\\s+\\xa7/" % (_u('c'),),
+            _u('c'), MatchesRegex(_u("\\s+\xA7"))),
         ]
 
 

=== modified file 'testtools/tests/test_testcase.py'
--- testtools/tests/test_testcase.py	2011-08-24 01:57:23 +0000
+++ testtools/tests/test_testcase.py	2011-08-24 01:57:23 +0000
@@ -488,7 +488,7 @@
         matchee = 'foo'
         matcher = Equals('bar')
         expected = (
-            'Match failed. Matchee: "%s"\n'
+            'Match failed. Matchee: %r\n'
             'Matcher: %s\n'
             'Difference: %s\n' % (
                 matchee,
@@ -528,10 +528,10 @@
         matchee = _u('\xa7')
         matcher = Equals(_u('a'))
         expected = (
-            'Match failed. Matchee: "%s"\n'
+            'Match failed. Matchee: %s\n'
             'Matcher: %s\n'
             'Difference: %s\n\n' % (
-                matchee,
+                repr(matchee).replace("\\xa7", matchee),
                 matcher,
                 matcher.match(matchee).describe(),
                 ))
@@ -565,6 +565,21 @@
         self.assertFails(expected_error, self.assertEquals, a, b)
         self.assertFails(expected_error, self.failUnlessEqual, a, b)
 
+    def test_assertEqual_non_ascii_str_with_newlines(self):
+        message = _u("Be careful mixing unicode and bytes")
+        a = "a\n\xa7\n"
+        b = "Just a longish string so the more verbose output form is used."
+        expected_error = '\n'.join([
+            '!=:',
+            "reference = '''\\",
+            'a',
+            repr('\xa7')[1:-1],
+            "'''",
+            'actual = %r' % (b,),
+            ': ' + message,
+            ])
+        self.assertFails(expected_error, self.assertEqual, a, b, message)
+
     def test_assertIsNone(self):
         self.assertIsNone(None)
 


Follow ups