Skip to content

Commit

Permalink
Static sync thread and write issue taskq assignments
Browse files Browse the repository at this point in the history
 - Given a reasonable number of syncthreads, assign each
   syncthread its own allocator.

 - Create a separate write issue taskq for a given number of
   CPUS and statically bind assign each taskq to a specified
   syncthread.

Signed-off-by: Edmund Nadolski <[email protected]>
  • Loading branch information
Edmund Nadolski committed Sep 28, 2023
1 parent f78febf commit 428cf89
Show file tree
Hide file tree
Showing 10 changed files with 286 additions and 63 deletions.
5 changes: 5 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,11 @@ extern void spa_sync_allpools(void);

extern uint_t zfs_sync_pass_deferred_free;

/* spa sync taskqueues */
taskq_t *spa_sync_tq_create(spa_t *spa, const char *name);
void spa_sync_tq_destroy(spa_t *spa);
void spa_select_allocator(zio_t *zio);

/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;

Expand Down
12 changes: 11 additions & 1 deletion include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ typedef struct spa_taskqs {
taskq_t **stqs_taskq;
} spa_taskqs_t;

/* one for each thread in the spa sync taskq */
typedef struct spa_syncthread_info {
kthread_t *sti_thread;
taskq_t *sti_wr_iss_tq; /* assigned wr_iss taskq */
} spa_syncthread_info_t;

typedef enum spa_all_vdev_zap_action {
AVZ_ACTION_NONE = 0,
AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */
Expand Down Expand Up @@ -265,6 +271,10 @@ struct spa {
int spa_alloc_count;
int spa_active_allocator; /* selectable allocator */

/* per-allocator sync thread taskqs */
taskq_t *spa_sync_tq;
spa_syncthread_info_t *spa_syncthreads;

spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
nvlist_t *spa_label_features; /* Features for reading MOS */
Expand Down Expand Up @@ -456,7 +466,7 @@ extern char *spa_config_path;
extern const char *zfs_deadman_failmode;
extern uint_t spa_slop_shift;
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent, zio_t *zio);
extern void spa_taskq_dispatch_sync(spa_t *, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags);
extern void spa_load_spares(spa_t *spa);
Expand Down
6 changes: 6 additions & 0 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
#define ZIO_FLAG_DELEGATED (1ULL << 30)

#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)

#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)

Expand Down Expand Up @@ -526,6 +529,9 @@ struct zio {

/* Taskq dispatching state */
taskq_ent_t io_tqent;

/* write issue taskq selection, based upon sync thread */
taskq_t *io_wr_iss_tq;
};

enum blk_verify_flag {
Expand Down
23 changes: 16 additions & 7 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,14 @@ If we have less than this amount of free space,
most ZPL operations (e.g. write, create) will return
.Sy ENOSPC .
.
.It Sy spa_num_allocators Ns = Ns Sy 4 Pq int
Determines the number of block alloctators to use per spa instance.
Capped by the number of actual CPUs in the system.
.Pp
This should be a multiple of the number of CPUs.
Note that setting this value too high could result in performance
degredation and/or excess fragmentation.
.
.It Sy spa_upgrade_errlog_limit Ns = Ns Sy 0 Pq uint
Limits the number of on-disk error log entries that will be converted to the
new format when enabling the
Expand Down Expand Up @@ -1971,13 +1979,6 @@ and may need to load new metaslabs to satisfy these allocations.
.It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq uint
Rewrite new block pointers starting in this pass.
.
.It Sy zfs_sync_taskq_batch_pct Ns = Ns Sy 75 Ns % Pq int
This controls the number of threads used by
.Sy dp_sync_taskq .
The default value of
.Sy 75%
will create a maximum of one thread per CPU.
.
.It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128 MiB Pc Pq uint
Maximum size of TRIM command.
Larger ranges will be split into chunks no larger than this value before
Expand Down Expand Up @@ -2264,6 +2265,14 @@ If
.Sy 0 ,
generate a system-dependent value close to 6 threads per taskq.
.
.It Sy zio_taskq_wr_iss_ncpus Ns = Ns Sy 32 Pq uint
Determines the number of CPUs to run write issue taskqs.
.Pp
While an optimal value will be system dependent, a suggested value
is the number of actual CPUs in the system, divided by the
.Sy spa_num_allocators
value.
.
.It Sy zvol_inhibit_dev Ns = Ns Sy 0 Ns | Ns 1 Pq uint
Do not create zvol device nodes.
This may slightly improve startup time on
Expand Down
5 changes: 4 additions & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -4587,7 +4587,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
}
}

/* May be called recursively from dbuf_sync_indirect(). */
/*
* Syncs out a range of dirty records for indirect or leaf dbufs. May be
* called recursively from dbuf_sync_indirect().
*/
void
dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx)
{
Expand Down
6 changes: 4 additions & 2 deletions module/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,7 @@ typedef struct sync_objset_arg {
objset_t *soa_os;
dmu_tx_t *soa_tx;
zio_t *soa_zio;
taskq_ent_t soa_tq_ent;
} sync_objset_arg_t;

static void
Expand Down Expand Up @@ -1688,8 +1689,8 @@ dmu_objset_sync_sublists_done(zio_t *zio)
}

/* sync_dnodes_finsh_task calls zil_sync on our behalf. */
(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
sync_dnodes_finish_task, soa, TQ_FRONT);
taskq_dispatch_ent(dmu_objset_pool(os)->dp_sync_taskq,
sync_dnodes_finish_task, soa, TQ_FRONT, &soa->soa_tq_ent);
}

/* Nonblocking objset sync. Called from dsl. */
Expand Down Expand Up @@ -1795,6 +1796,7 @@ dmu_objset_sync(objset_t *os, zio_t *rio, dmu_tx_t *tx)
soa->soa_os = os;
soa->soa_tx = tx;
soa->soa_zio = zio;
taskq_init_ent(&soa->soa_tq_ent);

/* sio is a child of the arc_write zio and parent of the sda_cio(s). */
zio_t *sio = zio_null(zio, os->os_spa, NULL,
Expand Down
5 changes: 2 additions & 3 deletions module/zfs/dsl_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_early_sync_tasks, spa,
offsetof(dsl_sync_task_t, dst_node));

dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
MIN(spa->spa_alloc_count, boot_ncpus), minclsyspri, 1, INT_MAX, 0);
dp->dp_sync_taskq = spa_sync_tq_create(spa, "dp_sync_taskq");

dp->dp_zil_clean_taskq = taskq_create("dp_zil_clean_taskq",
zfs_zil_clean_taskq_nthr_pct, minclsyspri,
Expand Down Expand Up @@ -403,7 +402,7 @@ dsl_pool_close(dsl_pool_t *dp)
txg_list_destroy(&dp->dp_dirty_dirs);

taskq_destroy(dp->dp_zil_clean_taskq);
taskq_destroy(dp->dp_sync_taskq);
spa_sync_tq_destroy(dp->dp_spa);

/*
* We can't set retry to TRUE since we're explicitly specifying
Expand Down
Loading

0 comments on commit 428cf89

Please sign in to comment.