stackhpc · github-actions · Sep 19, 2024 · Sep 18, 2024 · Jun 26, 2024 · Oct 1, 2024
@@ -213,6 +213,8 @@
       The regular tempest-integrated-storage job but with glance metadata injection
     post-run: playbooks/post-check-metadata-injection.yaml
     vars:
+      configure_swap_size: 8192
+      tempest_concurrency: 3
       zuul_copy_output:
         /etc/glance-remote: logs
       devstack_localrc:
@@ -236,6 +238,11 @@
                 "glance_devstack_test":"doyouseeme?"
             image_conversion:
               output_format: raw
+        test-config:
+          "$TEMPEST_CONFIG":
+            image:
+              image_caching_enabled: True
+              disk_formats: qcow2,ari,aki,vhd,vmdk,raw,ami,vdi,iso,vhdx
 
 - job:
     name: glance-multistore-cinder-import
@@ -306,7 +313,6 @@
       - release-notes-jobs-python3
     check:
       jobs:
-        - openstack-tox-functional-py36-fips
         - openstack-tox-functional-py39
         - glance-tox-functional-py39-rbac-defaults
         - glance-ceph-thin-provisioning:

@@ -181,6 +181,16 @@ def execute(self, image_id):
                                                'bfile': backing_file}
             raise RuntimeError(msg)
 
+        try:
+            data_file = metadata['format-specific']['data']['data-file']
+        except KeyError:
+            data_file = None
+        if data_file is not None:
+            msg = _("File %(path)s has invalid data-file "
+                    "%(dfile)s, aborting.") % {"path": path,
+                                               "dfile": data_file}
+            raise RuntimeError(msg)
+
         return path
 
     def revert(self, image_id, result, **kwargs):

@@ -122,6 +122,14 @@ def _execute(self, action, file_path, **kwargs):
             raise RuntimeError(
                 'QCOW images with backing files are not allowed')
 
+        try:
+            data_file = metadata['format-specific']['data']['data-file']
+        except KeyError:
+            data_file = None
+        if data_file is not None:
+            raise RuntimeError(
+                'QCOW images with data-file set are not allowed')
+
         if metadata.get('format') == 'vmdk':
             create_type = metadata.get(
                 'format-specific', {}).get(

@@ -28,6 +28,14 @@
 LOG = logging.getLogger(__name__)
 
 
+def chunked_reader(fileobj, chunk_size=512):
+    while True:
+        chunk = fileobj.read(chunk_size)
+        if not chunk:
+            break
+        yield chunk
+
+
 class CaptureRegion(object):
     """Represents a region of a file we want to capture.
 
@@ -176,10 +184,16 @@ def virtual_size(self):
     @property
     def actual_size(self):
         """Returns the total size of the file, usually smaller than
-        virtual_size.
+        virtual_size. NOTE: this will only be accurate if the entire
+        file is read and processed.
         """
         return self._total_count
 
+    @property
+    def complete(self):
+        """Returns True if we have all the information needed."""
+        return all(r.complete for r in self._capture_regions.values())
+
     def __str__(self):
         """The string name of this file format."""
         return 'raw'
@@ -194,6 +208,35 @@ def context_info(self):
         return {name: len(region.data) for name, region in
                 self._capture_regions.items()}
 
+    @classmethod
+    def from_file(cls, filename):
+        """Read as much of a file as necessary to complete inspection.
+
+        NOTE: Because we only read as much of the file as necessary, the
+        actual_size property will not reflect the size of the file, but the
+        amount of data we read before we satisfied the inspector.
+
+        Raises ImageFormatError if we cannot parse the file.
+        """
+        inspector = cls()
+        with open(filename, 'rb') as f:
+            for chunk in chunked_reader(f):
+                inspector.eat_chunk(chunk)
+                if inspector.complete:
+                    # No need to eat any more data
+                    break
+        if not inspector.complete or not inspector.format_match:
+            raise ImageFormatError('File is not in requested format')
+        return inspector
+
+    def safety_check(self):
+        """Perform some checks to determine if this file is safe.
+
+        Returns True if safe, False otherwise. It may raise ImageFormatError
+        if safety cannot be guaranteed because of parsing or other errors.
+        """
+        return True
+
 
 # The qcow2 format consists of a big-endian 72-byte header, of which
 # only a small portion has information we care about:
@@ -202,15 +245,26 @@ def context_info(self):
 #   0  0x00   Magic 4-bytes 'QFI\xfb'
 #   4  0x04   Version (uint32_t, should always be 2 for modern files)
 #  . . .
+#   8  0x08   Backing file offset (uint64_t)
 #  24  0x18   Size in bytes (unint64_t)
+#  . . .
+#  72  0x48   Incompatible features bitfield (6 bytes)
 #
-# https://people.gnome.org/~markmc/qcow-image-format.html
+# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt
 class QcowInspector(FileInspector):
     """QEMU QCOW2 Format
 
     This should only require about 32 bytes of the beginning of the file
-    to determine the virtual size.
+    to determine the virtual size, and 104 bytes to perform the safety check.
     """
+
+    BF_OFFSET = 0x08
+    BF_OFFSET_LEN = 8
+    I_FEATURES = 0x48
+    I_FEATURES_LEN = 8
+    I_FEATURES_DATAFILE_BIT = 3
+    I_FEATURES_MAX_BIT = 4
+
     def __init__(self, *a, **k):
         super(QcowInspector, self).__init__(*a, **k)
         self.new_region('header', CaptureRegion(0, 512))
@@ -220,6 +274,10 @@ def _qcow_header_data(self):
             struct.unpack('>4sIQIIQ', self.region('header').data[:32]))
         return magic, size
 
+    @property
+    def has_header(self):
+        return self.region('header').complete
+
     @property
     def virtual_size(self):
         if not self.region('header').complete:
@@ -236,9 +294,77 @@ def format_match(self):
         magic, size = self._qcow_header_data()
         return magic == b'QFI\xFB'
 
+    @property
+    def has_backing_file(self):
+        if not self.region('header').complete:
+            return None
+        if not self.format_match:
+            return False
+        bf_offset_bytes = self.region('header').data[
+            self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN]
+        # nonzero means "has a backing file"
+        bf_offset, = struct.unpack('>Q', bf_offset_bytes)
+        return bf_offset != 0
+
+    @property
+    def has_unknown_features(self):
+        if not self.region('header').complete:
+            return None
+        if not self.format_match:
+            return False
+        i_features = self.region('header').data[
+            self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
+
+        # This is the maximum byte number we should expect any bits to be set
+        max_byte = self.I_FEATURES_MAX_BIT // 8
+
+        # The flag bytes are in big-endian ordering, so if we process
+        # them in index-order, they're reversed
+        for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))):
+            if byte_num == max_byte:
+                # If we're in the max-allowed byte, allow any bits less than
+                # the maximum-known feature flag bit to be set
+                allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1)
+            elif byte_num > max_byte:
+                # If we're above the byte with the maximum known feature flag
+                # bit, then we expect all zeroes
+                allow_mask = 0x0
+            else:
+                # Any earlier-than-the-maximum byte can have any of the flag
+                # bits set
+                allow_mask = 0xFF
+
+            if i_features[i] & ~allow_mask:
+                LOG.warning('Found unknown feature bit in byte %i: %s/%s',
+                            byte_num, bin(i_features[byte_num] & ~allow_mask),
+                            bin(allow_mask))
+                return True
+
+        return False
+
+    @property
+    def has_data_file(self):
+        if not self.region('header').complete:
+            return None
+        if not self.format_match:
+            return False
+        i_features = self.region('header').data[
+            self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
+
+        # First byte of bitfield, which is i_features[7]
+        byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8
+        # Third bit of bitfield, which is 0x04
+        bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8)
+        return bool(i_features[byte] & bit)
+
     def __str__(self):
         return 'qcow2'
 
+    def safety_check(self):
+        return (not self.has_backing_file and
+                not self.has_data_file and
+                not self.has_unknown_features)
+
 
 # The VHD (or VPC as QEMU calls it) format consists of a big-endian
 # 512-byte "footer" at the beginning of the file with various
@@ -345,6 +471,7 @@ class VHDXInspector(FileInspector):
     """
     METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E'
     VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8'
+    VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048  # From qemu
 
     def __init__(self, *a, **k):
         super(VHDXInspector, self).__init__(*a, **k)
@@ -459,6 +586,8 @@ def _find_meta_entry(self, desired_guid):
                 item_offset, item_length, _reserved = struct.unpack(
                     '<III',
                     meta_buffer[entry_offset + 16:entry_offset + 28])
+                item_length = min(item_length,
+                                  self.VHDX_METADATA_TABLE_MAX_SIZE)
                 self.region('metadata').length = len(meta_buffer)
                 self._log.debug('Found entry at offset %x', item_offset)
                 # Metadata item offset is from the beginning of the metadata
@@ -509,13 +638,20 @@ def __str__(self):
 #
 # https://www.vmware.com/app/vmdk/?src=vmdk
 class VMDKInspector(FileInspector):
-    """vmware VMDK format (monolithicSparse variant only)
+    """vmware VMDK format (monolithicSparse and streamOptimized variants only)
 
     This needs to store the 512 byte header and the descriptor region
     which should be just after that. The descriptor region is some
     variable number of 512 byte sectors, but is just text defining the
     layout of the disk.
     """
+
+    # The beginning and max size of the descriptor is also hardcoded in Qemu
+    # at 0x200 and 1MB - 1
+    DESC_OFFSET = 0x200
+    DESC_MAX_SIZE = (1 << 20) - 1
+    GD_AT_END = 0xffffffffffffffff
+
     def __init__(self, *a, **k):
         super(VMDKInspector, self).__init__(*a, **k)
         self.new_region('header', CaptureRegion(0, 512))
@@ -527,20 +663,33 @@ def post_process(self):
         if not self.region('header').complete:
             return
 
-        sig, ver, _flags, _sectors, _grain, desc_sec, desc_num = struct.unpack(
-            '<4sIIQQQQ', self.region('header').data[:44])
+        (sig, ver, _flags, _sectors, _grain, desc_sec, desc_num,
+         _numGTEsperGT, _rgdOffset, gdOffset) = struct.unpack(
+            '<4sIIQQQQIQQ', self.region('header').data[:64])
 
         if sig != b'KDMV':
             raise ImageFormatError('Signature KDMV not found: %r' % sig)
-            return
 
         if ver not in (1, 2, 3):
             raise ImageFormatError('Unsupported format version %i' % ver)
-            return
+
+        if gdOffset == self.GD_AT_END:
+            # This means we have a footer, which takes precedence over the
+            # header, which we cannot support since we stream.
+            raise ImageFormatError('Unsupported VMDK footer')
+
+        # Since we parse both desc_sec and desc_num (the location of the
+        # VMDK's descriptor, expressed in 512 bytes sectors) we enforce a
+        # check on the bounds to create a reasonable CaptureRegion. This
+        # is similar to how it's done in qemu.
+        desc_offset = desc_sec * 512
+        desc_size = min(desc_num * 512, self.DESC_MAX_SIZE)
+        if desc_offset != self.DESC_OFFSET:
+            raise ImageFormatError("Wrong descriptor location")
 
         if not self.has_region('descriptor'):
             self.new_region('descriptor', CaptureRegion(
-                desc_sec * 512, desc_num * 512))
+                desc_offset, desc_size))
 
     @property
     def format_match(self):
@@ -566,7 +715,7 @@ def virtual_size(self):
             vmdktype = descriptor[type_idx:type_end]
         else:
             vmdktype = b'formatnotfound'
-        if vmdktype != b'monolithicSparse':
+        if vmdktype not in (b'monolithicSparse', b'streamOptimized'):
             LOG.warning('Unsupported VMDK format %s', vmdktype)
             return 0
 
@@ -576,6 +725,59 @@ def virtual_size(self):
 
         return sectors * 512
 
+    def safety_check(self):
+        if (not self.has_region('descriptor') or
+                not self.region('descriptor').complete):
+            return False
+
+        try:
+            # Descriptor is padded to 512 bytes
+            desc_data = self.region('descriptor').data.rstrip(b'\x00')
+            # Descriptor is actually case-insensitive ASCII text
+            desc_text = desc_data.decode('ascii').lower()
+        except UnicodeDecodeError:
+            LOG.error('VMDK descriptor failed to decode as ASCII')
+            raise ImageFormatError('Invalid VMDK descriptor data')
+
+        extent_access = ('rw', 'rdonly', 'noaccess')
+        header_fields = []
+        extents = []
+        ddb = []
+
+        # NOTE(danms): Cautiously parse the VMDK descriptor. Each line must
+        # be something we understand, otherwise we refuse it.
+        for line in [x.strip() for x in desc_text.split('\n')]:
+            if line.startswith('#') or not line:
+                # Blank or comment lines are ignored
+                continue
+            elif line.startswith('ddb'):
+                # DDB lines are allowed (but not used by us)
+                ddb.append(line)
+            elif '=' in line and ' ' not in line.split('=')[0]:
+                # Header fields are a single word followed by an '=' and some
+                # value
+                header_fields.append(line)
+            elif line.split(' ')[0] in extent_access:
+                # Extent lines start with one of the three access modes
+                extents.append(line)
+            else:
+                # Anything else results in a rejection
+                LOG.error('Unsupported line %r in VMDK descriptor', line)
+                raise ImageFormatError('Invalid VMDK descriptor data')
+
+        # Check all the extent lines for concerning content
+        for extent_line in extents:
+            if '/' in extent_line:
+                LOG.error('Extent line %r contains unsafe characters',
+                          extent_line)
+                return False
+
+        if not extents:
+            LOG.error('VMDK file specified no extents')
+            return False
+
+        return True
+
     def __str__(self):
         return 'vmdk'