diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index dd0b3d848fe9..9d83357fcc6d 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1778,7 +1778,7 @@ Normally disabled because these datasets may be missing key data. .It Sy zfs_min_metaslabs_to_flush Ns = Ns Sy 1 Pq u64 Minimum number of metaslabs to flush per dirty TXG. . -.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 70 Ns % Pq uint +.It Sy zfs_metaslab_fragmentation_threshold Ns = Ns Sy 77 Ns % Pq uint Allow metaslabs to keep their active state as long as their fragmentation percentage is no more than this value. An active metaslab that exceeds this threshold diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 7affbfac9dc7..353a99605913 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -146,7 +146,7 @@ static uint_t zfs_mg_fragmentation_threshold = 95; * active metaslab that exceeds this threshold will no longer keep its active * status allowing better metaslabs to be selected. */ -static uint_t zfs_metaslab_fragmentation_threshold = 70; +static uint_t zfs_metaslab_fragmentation_threshold = 77; /* * When set will load all metaslabs when pool is first opened. @@ -2889,8 +2889,6 @@ metaslab_fini(metaslab_t *msp) kmem_free(msp, sizeof (metaslab_t)); } -#define FRAGMENTATION_TABLE_SIZE 17 - /* * This table defines a segment size based fragmentation metric that will * allow each metaslab to derive its own fragmentation value. This is done @@ -2901,33 +2899,40 @@ metaslab_fini(metaslab_t *msp) * us the fragmentation metric. This means that a high fragmentation metric * equates to most of the free space being comprised of small segments. * Conversely, if the metric is low, then most of the free space is in - * large segments. A 10% change in fragmentation equates to approximately - * double the number of segments. + * large segments. * - * This table defines 0% fragmented space using 16MB segments. Testing has - * shown that segments that are greater than or equal to 16MB do not suffer - * from drastic performance problems. Using this value, we derive the rest - * of the table. Since the fragmentation value is never stored on disk, it - * is possible to change these calculations in the future. + * This table defines 0% fragmented space using 512M segments. Using this value, + * we derive the rest of the table. This table originally went up to 16MB, but + * with larger recordsizes, larger ashifts, and use of raidz3, it is possible + * to have significantly larger allocations than were previously possible. + * Since the fragmentation value is never stored on disk, it is possible to + * change these calculations in the future. */ -static const int zfs_frag_table[FRAGMENTATION_TABLE_SIZE] = { +static const int zfs_frag_table[] = { 100, /* 512B */ - 100, /* 1K */ - 98, /* 2K */ - 95, /* 4K */ - 90, /* 8K */ - 80, /* 16K */ - 70, /* 32K */ - 60, /* 64K */ - 50, /* 128K */ - 40, /* 256K */ - 30, /* 512K */ - 20, /* 1M */ - 15, /* 2M */ - 10, /* 4M */ - 5, /* 8M */ - 0 /* 16M */ + 99, /* 1K */ + 97, /* 2K */ + 93, /* 4K */ + 88, /* 8K */ + 83, /* 16K */ + 77, /* 32K */ + 71, /* 64K */ + 64, /* 128K */ + 57, /* 256K */ + 50, /* 512K */ + 43, /* 1M */ + 36, /* 2M */ + 29, /* 4M */ + 23, /* 8M */ + 17, /* 16M */ + 12, /* 32M */ + 7, /* 64M */ + 3, /* 128M */ + 1, /* 256M */ + 0, /* 512M */ }; +#define FRAGMENTATION_TABLE_SIZE \ + (sizeof (zfs_frag_table)/(sizeof (zfs_frag_table[0]))) /* * Calculate the metaslab's fragmentation metric and set ms_fragmentation.