← Back to team overview

curtin-dev team mailing list archive

[Merge] ~ogayot/curtin:sfdisk-accept-utf8 into curtin:master

 

Olivier Gayot has proposed merging ~ogayot/curtin:sfdisk-accept-utf8 into curtin:master.

Commit message:
    block_meta_v2: make GPT partition name support more robust
    
    In some cases, the sfdisk script will include GPT partition names.  GPT
    partition names support characters outside the ASCII range (they are
    stored in UTF-16 in the GPT metadata). Therefore, it is currently a
    wrong assumption that the sfdisk input is always valid ASCII.
    
    Furthermore, we inject the name straight inside the sfdisk input script
    (surrounded with double quotes)..
    This can result in various problems if the name contains a '"' character
    (the sfdisk field delimiter) or other characters such as '\\'.
    
    Fixed by converting all the characters of the partition names into their
    utf-8 \x notation. sfdisk natively supports reading this format. This
    will have a slight implact on the readability of the sfdisk script, but
    should make it way more robust.
    
    Signed-off-by: Olivier Gayot <olivier.gayot@xxxxxxxxxxxxx>

Requested reviews:
  curtin developers (curtin-dev)

For more details, see:
https://code.launchpad.net/~ogayot/curtin/+git/curtin/+merge/442225

GPT partition names are basically free-text, but curtin expects the sfdisk input script to be ASCII.

We also inject the partition name in the sfdisk script without properly sanitizing the name. This results in problems if the name contains '"' or other characters that have a special meaning in sfdisk.

Fixed by converting the partition name to a utf-8 hex notation.

e.g.

name="name"

becomes:

name="\x6e\x61\x6d\x65"
-- 
Your team curtin developers is requested to review the proposed merge of ~ogayot/curtin:sfdisk-accept-utf8 into curtin:master.
diff --git a/curtin/commands/block_meta_v2.py b/curtin/commands/block_meta_v2.py
index ae0b058..7939ba1 100644
--- a/curtin/commands/block_meta_v2.py
+++ b/curtin/commands/block_meta_v2.py
@@ -26,6 +26,12 @@ from curtin.storage_config import (
 from curtin.udev import udevadm_settle
 
 
+def to_utf8_hex_notation(string: str) -> str:
+    ''' Convert a unicode string into a UTF-8 representation using the \\x
+    notation. '''
+    return ''.join([f'\\x{c:02x}' for c in bytearray(string, 'utf-8')])
+
+
 @attr.s(auto_attribs=True)
 class PartTableEntry:
     # The order listed here matches the order sfdisk represents these fields
@@ -49,7 +55,11 @@ class PartTableEntry:
             if v is not None:
                 r += ' {}={}'.format(a, v)
         if self.name is not None:
-            r += ' name="{}"'.format(self.name)
+            # Partition names are basically free-text fields. Injecting some
+            # characters such as '"', '\' and '\n' will result in lots of
+            # trouble.  Fortunately, sfdisk supports \x notation, so we can
+            # rely on it.
+            r += ' name="{}"'.format(to_utf8_hex_notation(self.name))
         if self.attrs:
             r += ' attrs="{}"'.format(' '.join(self.attrs))
         if self.bootable:
diff --git a/tests/unittests/test_commands_block_meta.py b/tests/unittests/test_commands_block_meta.py
index dc03359..80206a8 100644
--- a/tests/unittests/test_commands_block_meta.py
+++ b/tests/unittests/test_commands_block_meta.py
@@ -3050,14 +3050,20 @@ label: gpt
         table.add(dict(number=1, offset=1 << 20, size=9 << 20, flag='boot',
                        partition_name=name))
         type_id = "C12A7328-F81F-11D2-BA4B-00A0C93EC93B"
+        to_hex = block_meta_v2.to_utf8_hex_notation
         expected = f'''\
 label: gpt
 
-1:  start=2048 size=18432 type={type_id} name="{name}"'''
+1:  start=2048 size=18432 type={type_id} name="{to_hex(name)}"'''
         self.assertEqual(expected, table.render())
 
-    def test_gpt_name_spaces(self):
-        name = self.random_string() + " " + self.random_string()
+    def test_gpt_name_free_text(self):
+        name = 'my "분할" réservée'
+        expected_name = ''.join([
+            '\\x6d\\x79',
+            '\\x20\\x22\\xeb\\xb6\\x84\\xed\\x95\\xa0\\x22',
+            '\\x20\\x72\\xc3\\xa9\\x73\\x65\\x72\\x76\\xc3\\xa9\\x65',
+        ])
         table = block_meta_v2.GPTPartTable(512)
         table.add(dict(number=1, offset=1 << 20, size=9 << 20, flag='boot',
                        partition_name=name))
@@ -3065,7 +3071,7 @@ label: gpt
         expected = f'''\
 label: gpt
 
-1:  start=2048 size=18432 type={type_id} name="{name}"'''
+1:  start=2048 size=18432 type={type_id} name="{expected_name}"'''
         self.assertEqual(expected, table.render())
 
     def test_gpt_attrs_none(self):
@@ -3244,7 +3250,7 @@ label: dos
                 uuid=uuid, name='name',
                 attrs=['stuff', 'things'])
         expected = f'1:  start=2 size=3 type=04 uuid={uuid} ' + \
-            'name="name" attrs="stuff things" bootable'
+            'name="\\x6e\\x61\\x6d\\x65" attrs="stuff things" bootable'
         self.assertEqual(expected, pte.render())
 
     def test_gpt_entry_preserve(self):
@@ -3255,8 +3261,9 @@ label: dos
                 number=1, start=2, size=3, type='04', bootable=False,
                 uuid=None, name=None, attrs=None)
         pte.preserve({'uuid': uuid, 'name': name, 'attrs': attrs})
+        to_hex = block_meta_v2.to_utf8_hex_notation
         expected = f'1:  start=2 size=3 type=04 uuid={uuid} ' + \
-            f'name="{name}" attrs="{attrs}"'
+            f'name="{to_hex(name)}" attrs="{attrs}"'
         self.assertEqual(expected, pte.render())
 
     def test_v2_dos_is_logical(self):

Follow ups