diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index dd0b3d848fe9..094be6fb6503 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1778,7 +1778,7 @@ Normally disabled because these datasets may be missing key data. .It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq u64 Minimum number of metaslabs to flush per dirty TXG. . -.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq uint +.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 85 Ns % Pq uint Allow metaslabs to keep their active state as long as their fragmentation percentage is no more than this value. An active metaslab that exceeds this threshold diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 7affbfac9dc7..582e99782f53 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -146,7 +146,7 @@ static uint_t zfs_mg_fragmentation_threshold = 95; * active metaslab that exceeds this threshold will no longer keep its active * status allowing better metaslabs to be selected. */ -static uint_t zfs_metaslab_fragmentation_threshold = 70; +static uint_t zfs_metaslab_fragmentation_threshold = 85; /* * When set will load all metaslabs when pool is first opened. @@ -2889,8 +2889,6 @@ metaslab_fini(metaslab_t *msp) kmem_free(msp, sizeof (metaslab_t)); } -#define FRAGMENTATION_TABLE_SIZE 17 - /* * This table defines a segment size based fragmentation metric that will * allow each metaslab to derive its own fragmentation value. This is done @@ -2901,33 +2899,41 @@ metaslab_fini(metaslab_t *msp) * us the fragmentation metric. This means that a high fragmentation metric * equates to most of the free space being comprised of small segments. * Conversely, if the metric is low, then most of the free space is in - * large segments. A 10% change in fragmentation equates to approximately - * double the number of segments. + * large segments. * - * This table defines 0% fragmented space using 16MB segments. Testing has - * shown that segments that are greater than or equal to 16MB do not suffer - * from drastic performance problems. Using this value, we derive the rest - * of the table. Since the fragmentation value is never stored on disk, it - * is possible to change these calculations in the future. + * This table defines 0% fragmented space using 1G segments. Using this value, + * we derive the rest of the table. This table originally went up to 16MB, but + * with larger recordsizes, larger ashifts, and use of raidz3, it is possible + * to have significantly larger allocations than were previously possible. + * Since the fragmentation value is never stored on disk, it is possible to + * change these calculations in the future. */ -static const int zfs_frag_table[FRAGMENTATION_TABLE_SIZE] = { +static const int zfs_frag_table[] = { 100, /* 512B */ 100, /* 1K */ 98, /* 2K */ 95, /* 4K */ - 90, /* 8K */ - 80, /* 16K */ - 70, /* 32K */ - 60, /* 64K */ - 50, /* 128K */ - 40, /* 256K */ - 30, /* 512K */ - 20, /* 1M */ - 15, /* 2M */ - 10, /* 4M */ - 5, /* 8M */ - 0 /* 16M */ + 92, /* 8K */ + 90, /* 16K */ + 85, /* 32K */ + 80, /* 64K */ + 75, /* 128K */ + 70, /* 256K */ + 60, /* 512K */ + 50, /* 1M */ + 40, /* 2M */ + 35, /* 4M */ + 30, /* 8M */ + 25, /* 16M */ + 20, /* 32M */ + 15, /* 64M */ + 10, /* 128M */ + 5, /* 256M */ + 2, /* 512M */ + 0, /* 1G */ }; +#define FRAGMENTATION_TABLE_SIZE \ + (sizeof (zfs_frag_table)/(sizeof (zfs_frag_table[0]))) /* * Calculate the metaslab's fragmentation metric and set ms_fragmentation.