From fd1a6ee4283ff85b1917cb34b180c50473d9db2c Mon Sep 17 00:00:00 2001 From: Yann Dirson Date: Mon, 15 Jul 2024 15:29:56 +0200 Subject: [PATCH] Enhance tracing to hunt for restore bug The particular issue turned out to be that the nested UEFI host being restored had several disks attached. Since this is done as NVMe namespaces of a single disk, all sharing the same ID, udev in dom0 does not have enough info and messes up /dev/disk/by-id/ on which restore relies because of /etc/xensource-inventory: lrwxrwxrwx 1 root root 13 Jul 15 14:28 nvme-QEMU_NVMe_Ctrl_nvme0 -> ../../nvme0n3 lrwxrwxrwx 1 root root 15 Jul 15 14:28 nvme-QEMU_NVMe_Ctrl_nvme0-part1 -> ../../nvme0n2p1 lrwxrwxrwx 1 root root 15 Jul 15 14:28 nvme-QEMU_NVMe_Ctrl_nvme0-part2 -> ../../nvme0n1p2 Only the addition of the `tool.dump()` call allowed me to connect the "Failed to create root filesystem" error with that longstanding (in)famous issue, but a few other additions were useful to get there. Signed-off-by: Yann Dirson (cherry picked from commit 02d72e663450aa478a1c7087191419e856988a01, with one additional safety guard that does not apply to master any more) --- disktools.py | 1 + diskutil.py | 6 ++++++ restore.py | 3 +++ 3 files changed, 10 insertions(+) diff --git a/disktools.py b/disktools.py index 4bc0fa7c..39f2c042 100644 --- a/disktools.py +++ b/disktools.py @@ -1117,6 +1117,7 @@ def probePartitioningScheme(device): if out == 'dos': partitionType = constants.PARTITION_DOS + logger.debug("probePartitioningScheme(%r) => %r", device, partitionType) return partitionType def PartitionTool(device, partitionType=None): diff --git a/diskutil.py b/diskutil.py index bbfa2d65..575bdc7b 100644 --- a/diskutil.py +++ b/diskutil.py @@ -493,6 +493,8 @@ def __init__(self, device): self.storage = (None, None) self.logs = (False, None) self.swap = (False, None) + def __str__(self): + return "Disk({})".format(self.__dict__) INSTALL_RETAIL = 1 STORAGE_LVM = 1 @@ -512,10 +514,12 @@ def probeDisk(device): swap is a tuple of True or False and the partition device """ + logger.debug("probeDisk(%r)", device) disk = Disk(device) possible_srs = [] tool = PartitionTool(device) + tool.dump() for num, part in tool.items(): label = None part_device = tool._partitionDevice(num) @@ -543,6 +547,8 @@ def probeDisk(device): disk.swap = (True, part_device) elif part['id'] == GPTPartitionTool.ID_EFI_BOOT or part['id'] == GPTPartitionTool.ID_BIOS_BOOT: disk.boot = (True, part_device) + else: + logger.info("part %s has unknown id: %s", num, part) lv_tool = len(possible_srs) and LVMTool() for num in possible_srs: diff --git a/restore.py b/restore.py index 3da45cc9..962a13df 100644 --- a/restore.py +++ b/restore.py @@ -40,6 +40,8 @@ def restoreFromBackup(backup, progress=lambda x: ()): assert disk_device.startswith('/dev/') restore_partition = disk.root[1] + if not isinstance(restore_partition, str): + raise RuntimeError("failed to identify root partition to restore to, disk=%s" % disk) logger.log("Restoring to partition %s." % restore_partition) boot_part = tool.getPartition(boot_partnum) @@ -138,6 +140,7 @@ def restore_partitions(): try: util.mkfs(constants.rootfs_type, restore_partition) except Exception as e: + logger.critical("Failed to create root filesystem", exc_info=1) raise RuntimeError("Failed to create root filesystem: %s" % e) if efi_boot: