From 1f4a2cf1e89068382809ce1d7412f3df2ae752b3 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Fri, 17 Nov 2023 05:11:15 +0000 Subject: [PATCH] Extend import_progress kstat with a notes field Detail the import progress of log spacemaps as they can take a very long time Also grab the spa_note() messages to, as they provide insight into what is happening Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Co-authored-by: Don Brady Signed-off-by: Don Brady --- include/sys/spa.h | 2 + module/zfs/spa.c | 37 +++++ module/zfs/spa_log_spacemap.c | 12 ++ module/zfs/spa_misc.c | 48 ++++++- tests/runfiles/linux.run | 3 +- .../zpool_import/zpool_import_status.ksh | 133 ++++++++++++++++++ 6 files changed, 230 insertions(+), 5 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh diff --git a/include/sys/spa.h b/include/sys/spa.h index b90855687411..3d56a93e232b 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -966,6 +966,8 @@ extern int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t max_txg); extern int spa_import_progress_set_state(uint64_t pool_guid, spa_load_state_t spa_load_state); +extern void spa_import_progress_set_notes(uint64_t pool_guid, + const char *notes); /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag, diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 1410651c63cc..cb692533cc84 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3055,6 +3055,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_load_state = state; (void) spa_import_progress_set_state(spa_guid(spa), spa_load_state(spa)); + spa_import_progress_set_notes(spa_guid(spa), "spa_load()"); gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, type, &ereport); @@ -4935,6 +4936,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve the checkpoint txg if the pool has a checkpoint. */ + spa_import_progress_set_notes(spa_guid(spa), "Loading checkpoint txg"); error = spa_ld_read_checkpoint_txg(spa); if (error != 0) return (error); @@ -4947,6 +4949,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * initiated. Otherwise we could be reading from indirect vdevs before * we have loaded their mappings. */ + spa_import_progress_set_notes(spa_guid(spa), + "Loading indirect vdev metadata"); error = spa_ld_open_indirect_vdev_metadata(spa); if (error != 0) return (error); @@ -4955,6 +4959,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the full list of active features from the MOS and check if * they are all supported. */ + spa_import_progress_set_notes(spa_guid(spa), "Checking feature flags"); error = spa_ld_check_features(spa, &missing_feat_write); if (error != 0) return (error); @@ -4963,6 +4968,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load several special directories from the MOS needed by the dsl_pool * layer. */ + spa_import_progress_set_notes(spa_guid(spa), + "Loading special MOS directories"); error = spa_ld_load_special_directories(spa); if (error != 0) return (error); @@ -4970,6 +4977,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) /* * Retrieve pool properties from the MOS. */ + spa_import_progress_set_notes(spa_guid(spa), "Loading properties"); error = spa_ld_get_props(spa); if (error != 0) return (error); @@ -4978,6 +4986,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Retrieve the list of auxiliary devices - cache devices and spares - * and open them. */ + spa_import_progress_set_notes(spa_guid(spa), "Loading AUX vdevs"); error = spa_ld_open_aux_vdevs(spa, type); if (error != 0) return (error); @@ -4986,14 +4995,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Load the metadata for all vdevs. Also check if unopenable devices * should be autoreplaced. */ + spa_import_progress_set_notes(spa_guid(spa), "Loading vdev metadata"); error = spa_ld_load_vdev_metadata(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa_guid(spa), "Loading dedup tables"); error = spa_ld_load_dedup_tables(spa); if (error != 0) return (error); + spa_import_progress_set_notes(spa_guid(spa), "Loading BRT"); error = spa_ld_load_brt(spa); if (error != 0) return (error); @@ -5002,6 +5014,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Verify the logs now to make sure we don't have any unexpected errors * when we claim log blocks later. */ + spa_import_progress_set_notes(spa_guid(spa), "Verifying Log Devices"); error = spa_ld_verify_logs(spa, type, ereport); if (error != 0) return (error); @@ -5023,6 +5036,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * state. When performing an extreme rewind, we verify the whole pool, * which can take a very long time. */ + spa_import_progress_set_notes(spa_guid(spa), "Verifying pool data"); error = spa_ld_verify_pool_data(spa); if (error != 0) return (error); @@ -5032,6 +5046,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * we write anything to the pool because we'd need to update the space * accounting using the deflated sizes. */ + spa_import_progress_set_notes(spa_guid(spa), + "Calculating deflated space"); spa_update_dspace(spa); /* @@ -5039,6 +5055,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * pool. If we are importing the pool in read-write mode, a few * additional steps must be performed to finish the import. */ + spa_import_progress_set_notes(spa_guid(spa), "Starting import"); if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || spa->spa_load_max_txg == UINT64_MAX)) { uint64_t config_cache_txg = spa->spa_config_txg; @@ -5055,6 +5072,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); } + spa_import_progress_set_notes(spa_guid(spa), + "Claiming ZIL blocks"); /* * Traverse the ZIL and claim all blocks. */ @@ -5074,6 +5093,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * will have been set for us by ZIL traversal operations * performed above. */ + spa_import_progress_set_notes(spa_guid(spa), + "Syncing ZIL claims"); txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* @@ -5081,6 +5102,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * next sync, we would update the config stored in vdev labels * and the cachefile (by default /etc/zfs/zpool.cache). */ + spa_import_progress_set_notes(spa_guid(spa), + "Updating configs"); spa_ld_check_for_config_update(spa, config_cache_txg, update_config_cache); @@ -5089,6 +5112,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * Then check all DTLs to see if anything needs resilvering. * The resilver will be deferred if a rebuild was started. */ + spa_import_progress_set_notes(spa_guid(spa), + "Starting resilvers"); if (vdev_rebuild_active(spa->spa_root_vdev)) { vdev_rebuild_restart(spa); } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) && @@ -5102,6 +5127,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) */ spa_history_log_version(spa, "open", NULL); + spa_import_progress_set_notes(spa_guid(spa), + "Restarting device removals"); spa_restart_removal(spa); spa_spawn_aux_threads(spa); @@ -5114,19 +5141,29 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport) * auxiliary threads above (from which the livelist * deletion zthr is part of). */ + spa_import_progress_set_notes(spa_guid(spa), + "Cleaning up inconsistent objsets"); (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* * Clean up any stale temporary dataset userrefs. */ + spa_import_progress_set_notes(spa_guid(spa), + "Cleaning up temporary uerrefs"); dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + spa_import_progress_set_notes(spa_guid(spa), + "Restarting Initialize"); vdev_initialize_restart(spa->spa_root_vdev); + spa_import_progress_set_notes(spa_guid(spa), + "Restarting TRIM"); vdev_trim_restart(spa->spa_root_vdev); vdev_autotrim_restart(spa); spa_config_exit(spa, SCL_CONFIG, FTAG); + spa_import_progress_set_notes(spa_guid(spa), + "Finished importing"); } spa_import_progress_remove(spa_guid(spa)); diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index 2878e68c6e4b..c196d47d2dc4 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1153,6 +1153,7 @@ spa_ld_log_sm_data(spa_t *spa) uint_t pn = 0; uint64_t ps = 0; + uint64_t nsm = 0; psls = sls = avl_first(&spa->spa_sm_logs_by_txg); while (sls != NULL) { /* Prefetch log spacemaps up to 16 TXGs or MBs ahead. */ @@ -1185,6 +1186,16 @@ spa_ld_log_sm_data(spa_t *spa) summary_add_data(spa, sls->sls_txg, sls->sls_mscount, 0, sls->sls_nblocks); + char buf[256]; + (void) snprintf(buf, sizeof (buf), + "read %llu of %llu log space maps " + "in %lld ms", (u_longlong_t)nsm, + (u_longlong_t)avl_numnodes(&spa->spa_sm_logs_by_txg), + (longlong_t)((gethrtime() - read_logs_starttime) + / 1000000)); + + spa_import_progress_set_notes(spa_guid(spa), buf); + struct spa_ld_log_sm_arg vla = { .slls_spa = spa, .slls_txg = sls->sls_txg @@ -1200,6 +1211,7 @@ spa_ld_log_sm_data(spa_t *spa) pn--; ps -= space_map_length(sls->sls_sm); + nsm++; space_map_close(sls->sls_sm); sls->sls_sm = NULL; sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 72b690162d64..1a7f5fe20f1c 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -417,6 +417,8 @@ spa_load_note(spa_t *spa, const char *fmt, ...) zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name, spa->spa_trust_config ? "trusted" : "untrusted", buf); + + spa_import_progress_set_notes(spa_guid(spa), buf); } /* @@ -2171,6 +2173,7 @@ typedef struct spa_import_progress { uint64_t pool_guid; /* unique id for updates */ char *pool_name; spa_load_state_t spa_load_state; + char *spa_load_notes; uint64_t mmp_sec_remaining; /* MMP activity check */ uint64_t spa_load_max_txg; /* rewind txg */ procfs_list_node_t smh_node; @@ -2181,9 +2184,9 @@ spa_history_list_t *spa_import_progress_list = NULL; static int spa_import_progress_show_header(struct seq_file *f) { - seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid", + seq_printf(f, "%-20s %-14s %-14s %-12s %s %s\n", "pool_guid", "load_state", "multihost_secs", "max_txg", - "pool_name"); + "pool_name", "notes"); return (0); } @@ -2192,11 +2195,12 @@ spa_import_progress_show(struct seq_file *f, void *data) { spa_import_progress_t *sip = (spa_import_progress_t *)data; - seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n", + seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s %s\n", (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state, (u_longlong_t)sip->mmp_sec_remaining, (u_longlong_t)sip->spa_load_max_txg, - (sip->pool_name ? sip->pool_name : "-")); + (sip->pool_name ? sip->pool_name : "-"), + (sip->spa_load_notes ? sip->spa_load_notes : "-")); return (0); } @@ -2210,6 +2214,8 @@ spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size) sip = list_remove_head(&shl->procfs_list.pl_list); if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + spa_strfree(sip->spa_load_notes); kmem_free(sip, sizeof (spa_import_progress_t)); shl->size--; } @@ -2265,6 +2271,10 @@ spa_import_progress_set_state(uint64_t pool_guid, sip = list_prev(&shl->procfs_list.pl_list, sip)) { if (sip->pool_guid == pool_guid) { sip->spa_load_state = load_state; + if (sip->spa_load_notes != NULL) { + spa_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } error = 0; break; } @@ -2274,6 +2284,33 @@ spa_import_progress_set_state(uint64_t pool_guid, return (error); } +void +spa_import_progress_set_notes(uint64_t pool_guid, const char *notes) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + + if (shl->size == 0) + return; + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + if (sip->spa_load_notes != NULL) { + spa_strfree(sip->spa_load_notes); + sip->spa_load_notes = NULL; + } + if (notes != NULL) { + sip->spa_load_notes = spa_strdup(notes); + zfs_dbgmsg("'%s' %s", sip->pool_name, notes); + } + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); +} + int spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg) { @@ -2342,6 +2379,7 @@ spa_import_progress_add(spa_t *spa) poolname = spa_name(spa); sip->pool_name = spa_strdup(poolname); sip->spa_load_state = spa_load_state(spa); + sip->spa_load_notes = NULL; mutex_enter(&shl->procfs_list.pl_lock); procfs_list_add(&shl->procfs_list, sip); @@ -2361,6 +2399,8 @@ spa_import_progress_remove(uint64_t pool_guid) if (sip->pool_guid == pool_guid) { if (sip->pool_name) spa_strfree(sip->pool_name); + if (sip->spa_load_notes) + spa_strfree(sip->spa_load_notes); list_remove(&shl->procfs_list.pl_list, sip); shl->size--; kmem_free(sip, sizeof (spa_import_progress_t)); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 2252e46df3a8..3674815b159f 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -86,7 +86,8 @@ tags = ['functional', 'cli_root', 'zpool_expand'] tests = ['zpool_import_hostid_changed', 'zpool_import_hostid_changed_unclean_export', 'zpool_import_hostid_changed_cachefile', - 'zpool_import_hostid_changed_cachefile_unclean_export'] + 'zpool_import_hostid_changed_cachefile_unclean_export', + 'zpool_import_status'] tags = ['functional', 'cli_root', 'zpool_import'] [tests/functional/cli_root/zpool_reopen:Linux] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh new file mode 100755 index 000000000000..621f2e0effb5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_status.ksh @@ -0,0 +1,133 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2023 Klara, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg + +# +# DESCRIPTION: +# During a pool import, the 'import_progress' kstat contains details +# on the import progress. +# +# STRATEGY: +# 1. Create test pool with several devices +# 2. Generate some ZIL records and spacemap logs +# 3. Export the pool +# 4. Import the pool in the background and monitor the kstat content +# 5. Check the zfs debug messages for import progress +# + +verify_runnable "global" + +function cleanup +{ + log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 0 + log_must set_tunable64 METASLAB_DEBUG_LOAD 0 + + destroy_pool $TESTPOOL1 +} + +log_assert "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress" + +log_onexit cleanup + +log_must zpool create $TESTPOOL1 $VDEV0 $VDEV1 $VDEV2 +typeset guid=$(zpool get -H -o value guid $TESTPOOL1) + +log_must zfs create -o recordsize=8k $TESTPOOL1/fs +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL1/fs/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# Overwrite some blocks to populate spacemap logs +# +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools +log_must dd if=/dev/urandom of=/$TESTPOOL1/fs/00 bs=1M count=200 +sync_all_pools + +# +# Freeze the pool to retain intent log records +# +log_must zpool freeze $TESTPOOL1 + +# fill_fs [destdir] [dirnum] [filenum] [bytes] [num_writes] [data] +log_must fill_fs /$TESTPOOL1/fs 1 2000 100 1024 R + +log_must zpool list -v $TESTPOOL1 + +# +# Unmount filesystem and export the pool +# +# At this stage the zfs intent log contains +# a set of records to replay. +# +log_must zfs unmount /$TESTPOOL1/fs + +log_must set_tunable64 KEEP_LOG_SPACEMAPS_AT_EXPORT 1 +log_must zpool export $TESTPOOL1 + +log_must set_tunable64 METASLAB_DEBUG_LOAD 1 +log_note "Starting zpool import in background at" $(/usr/bin/date +'%H:%M:%S:%3N') +zpool import -d $DEVICE_DIR -f $guid & +pid=$! + +# +# capture progress until import is finished +# +log_note waiting for pid $pid to exit +cat /proc/spl/kstat/zfs/import_progress +while [[ -d /proc/"$pid" ]]; do + line=$(cat /proc/spl/kstat/zfs/import_progress | grep -v pool_guid) + if [[ -n $line ]]; then + echo $(/usr/bin/date +'%H:%M:%S:%3N') $line + fi + if [[ -f /$TESTPOOL1/fs/00 ]]; then + break; + fi + sleep 0.0001 +done +log_note "zpool import completed at" $(/usr/bin/date +'%H:%M:%S:%3N') + +entries=$(grep "spa_import_progress_set_notes(): 'testpool1'" /proc/spl/kstat/zfs/dbgmsg | wc -l) +log_note "found $entries progress notes in dbgmsg" +log_must test $entries -gt 20 + +log_must zpool status $TESTPOOL1 + +log_pass "During a pool import, the 'import_progress' kstat contains " \ + "notes on the progress"