From 465e2f5a9a7762fa24b1133cbe10c9b1dee84420 Mon Sep 17 00:00:00 2001
From: zikaeroh <48577114+zikaeroh@users.noreply.github.com>
Date: Sat, 24 Dec 2022 15:06:19 -0800
Subject: [PATCH] Add support for devices in swarm
Signed-off-by: zikaeroh <48577114+zikaeroh@users.noreply.github.com>
---
cli/command/service/create.go | 2 +
cli/command/service/formatter.go | 16 +++
cli/command/service/opts.go | 3 +
cli/compose/convert/service.go | 33 +++++
cli/compose/types/types.go | 1 -
docs/reference/commandline/service_create.md | 142 +++++++++----------
6 files changed, 122 insertions(+), 75 deletions(-)
diff --git a/cli/command/service/create.go b/cli/command/service/create.go
index d0543e60e36e..1aaff130b8f8 100644
--- a/cli/command/service/create.go
+++ b/cli/command/service/create.go
@@ -68,6 +68,8 @@ func newCreateCommand(dockerCli command.Cli) *cobra.Command {
flags.SetAnnotation(flagSysCtl, "version", []string{"1.40"})
flags.Var(&opts.ulimits, flagUlimit, "Ulimit options")
flags.SetAnnotation(flagUlimit, "version", []string{"1.41"})
+ flags.Var(&opts.devices, flagDevice, "Devices to add")
+ flags.SetAnnotation(flagDevice, "version", []string{"1.42"})
flags.Var(cliopts.NewListOptsRef(&opts.resources.resGenericResources, ValidateSingleGenericResource), "generic-resource", "User defined resources")
flags.SetAnnotation(flagHostAdd, "version", []string{"1.32"})
diff --git a/cli/command/service/formatter.go b/cli/command/service/formatter.go
index a5e91d2c5913..329d746f503f 100644
--- a/cli/command/service/formatter.go
+++ b/cli/command/service/formatter.go
@@ -116,6 +116,13 @@ Ulimits:
{{- range $k, $v := .ContainerUlimits }}
{{ $k }}: {{ $v }}
{{- end }}{{ end }}
+{{- if .ContainerDevices }}
+Devices:
+{{- range $port := .ContainerDevices }}
+ PathOnHost = {{ $port.PathOnHost }}
+ PathInContainer = {{ $port.PathInContainer }}
+ CgroupPermissions = {{ $port.CgroupPermissions }}
+{{- end }} {{ end -}}
{{- if .ContainerMounts }}
Mounts:
{{- end }}
@@ -487,6 +494,15 @@ func (ctx *serviceInspectContext) HasContainerUlimits() bool {
return len(ctx.Service.Spec.TaskTemplate.ContainerSpec.Ulimits) > 0
}
+
+func (ctx *serviceInspectContext) ContainerDevices() []container.DeviceMapping {
+ return ctx.Service.Spec.TaskTemplate.ContainerSpec.Devices;
+}
+
+func (ctx *serviceInspectContext) HasContainerDevices() bool {
+ return len(ctx.Service.Spec.TaskTemplate.ContainerSpec.Devices) > 0
+}
+
func (ctx *serviceInspectContext) HasResources() bool {
return ctx.Service.Spec.TaskTemplate.Resources != nil
}
diff --git a/cli/command/service/opts.go b/cli/command/service/opts.go
index 05b3b8055dcf..a231cee871eb 100644
--- a/cli/command/service/opts.go
+++ b/cli/command/service/opts.go
@@ -513,6 +513,7 @@ type serviceOptions struct {
capAdd opts.ListOpts
capDrop opts.ListOpts
ulimits opts.UlimitOpt
+ devices opts.ListOpts
resources resourceOptions
stopGrace opts.DurationOpt
@@ -559,6 +560,7 @@ func newServiceOptions() *serviceOptions {
capAdd: opts.NewListOpts(nil),
capDrop: opts.NewListOpts(nil),
ulimits: *opts.NewUlimitOpt(nil),
+ devices: opts.NewListOpts(nil),
}
}
@@ -1024,6 +1026,7 @@ const (
flagUlimit = "ulimit"
flagUlimitAdd = "ulimit-add"
flagUlimitRemove = "ulimit-rm"
+ flagDevice = "device"
)
func validateAPIVersion(c swarm.ServiceSpec, serverAPIVersion string) error {
diff --git a/cli/compose/convert/service.go b/cli/compose/convert/service.go
index e1ba6ffb9955..e4a47aee7d5f 100644
--- a/cli/compose/convert/service.go
+++ b/cli/compose/convert/service.go
@@ -118,6 +118,11 @@ func Service(
}
}
+ devices, err := convertDevices(service.Devices)
+ if err != nil {
+ return swarm.ServiceSpec{}, err
+ }
+
capAdd, capDrop := opts.EffectiveCapAddCapDrop(service.CapAdd, service.CapDrop)
serviceSpec := swarm.ServiceSpec{
@@ -153,6 +158,7 @@ func Service(
CapabilityAdd: capAdd,
CapabilityDrop: capDrop,
Ulimits: convertUlimits(service.Ulimits),
+ Devices: devices,
},
LogDriver: logDriver,
Resources: resources,
@@ -719,3 +725,30 @@ func convertUlimits(origUlimits map[string]*composetypes.UlimitsConfig) []*units
})
return ulimits
}
+
+func convertDevices(devices []string) ([]container.DeviceMapping, error) {
+ newDevices := make([]container.DeviceMapping, len(devices))
+ for i, device := range devices {
+ parts := strings.Split(device, ":")
+ if len(parts) < 1 || len(parts) > 3 {
+ return nil, errors.New("failed to parse device")
+ }
+
+ mapping := container.DeviceMapping{
+ PathOnHost: parts[0],
+ PathInContainer: parts[0],
+ }
+
+ if len(parts) > 1 {
+ mapping.PathInContainer = parts[1]
+ }
+
+ if len(parts) == 3 {
+ mapping.CgroupPermissions = parts[2]
+ }
+
+ newDevices[i] = mapping
+ }
+
+ return newDevices, nil
+}
diff --git a/cli/compose/types/types.go b/cli/compose/types/types.go
index ed2e10e85b77..5d468d41ec11 100644
--- a/cli/compose/types/types.go
+++ b/cli/compose/types/types.go
@@ -11,7 +11,6 @@ var UnsupportedProperties = []string{
"build",
"cgroupns_mode",
"cgroup_parent",
- "devices",
"domainname",
"external_links",
"ipc",
diff --git a/docs/reference/commandline/service_create.md b/docs/reference/commandline/service_create.md
index 1332defb4529..84259a5224b0 100644
--- a/docs/reference/commandline/service_create.md
+++ b/docs/reference/commandline/service_create.md
@@ -19,6 +19,7 @@ Options:
--container-label list Container labels
--credential-spec credential-spec Credential spec for managed service account (Windows only)
-d, --detach Exit immediately instead of waiting for the service to converge (default true)
+ --device device Add devices
--dns list Set custom DNS servers
--dns-option list Set DNS options
--dns-search list Set custom DNS search domains
@@ -148,12 +149,12 @@ $ docker service create --name redis --replicas=5 redis:3.0.6
4cdgfyky7ozwh3htjfw0d12qv
```
-The above command sets the *desired* number of tasks for the service. Even
+The above command sets the _desired_ number of tasks for the service. Even
though the command returns immediately, actual scaling of the service may take
-some time. The `REPLICAS` column shows both the *actual* and *desired* number
+some time. The `REPLICAS` column shows both the _actual_ and _desired_ number
of replica tasks for the service.
-In the following example the desired state is `5` replicas, but the current
+In the following example the desired state is `5` replicas, but the current
number of `RUNNING` tasks is `3`:
```console
@@ -410,7 +411,6 @@ volumes in a service:
The following options can only be used for bind mounts (`type=bind`):
-
Option |
@@ -465,25 +465,25 @@ The `bind-propagation` option defaults to `rprivate` for both bind mounts and
volume mounts, and is only configurable for bind mounts. In other words, named
volumes do not support bind propagation.
-- **`shared`**: Sub-mounts of the original mount are exposed to replica mounts,
- and sub-mounts of replica mounts are also propagated to the
- original mount.
-- **`slave`**: similar to a shared mount, but only in one direction. If the
- original mount exposes a sub-mount, the replica mount can see it.
- However, if the replica mount exposes a sub-mount, the original
- mount cannot see it.
-- **`private`**: The mount is private. Sub-mounts within it are not exposed to
- replica mounts, and sub-mounts of replica mounts are not
- exposed to the original mount.
-- **`rshared`**: The same as shared, but the propagation also extends to and from
- mount points nested within any of the original or replica mount
- points.
-- **`rslave`**: The same as `slave`, but the propagation also extends to and from
- mount points nested within any of the original or replica mount
- points.
-- **`rprivate`**: The default. The same as `private`, meaning that no mount points
- anywhere within the original or replica mount points propagate
- in either direction.
+- **`shared`**: Sub-mounts of the original mount are exposed to replica mounts,
+ and sub-mounts of replica mounts are also propagated to the
+ original mount.
+- **`slave`**: similar to a shared mount, but only in one direction. If the
+ original mount exposes a sub-mount, the replica mount can see it.
+ However, if the replica mount exposes a sub-mount, the original
+ mount cannot see it.
+- **`private`**: The mount is private. Sub-mounts within it are not exposed to
+ replica mounts, and sub-mounts of replica mounts are not
+ exposed to the original mount.
+- **`rshared`**: The same as shared, but the propagation also extends to and from
+ mount points nested within any of the original or replica mount
+ points.
+- **`rslave`**: The same as `slave`, but the propagation also extends to and from
+ mount points nested within any of the original or replica mount
+ points.
+- **`rprivate`**: The default. The same as `private`, meaning that no mount points
+ anywhere within the original or replica mount points propagate
+ in either direction.
For more information about bind propagation, see the
[Linux kernel documentation for shared subtree](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt).
@@ -492,7 +492,6 @@ For more information about bind propagation, see the
The following options can only be used for named volumes (`type=volume`):
-
Option |
@@ -546,12 +545,10 @@ The following options can only be used for named volumes (`type=volume`):
-
#### Options for tmpfs
The following options can only be used for tmpfs mounts (`type=tmpfs`);
-
Option |
@@ -567,26 +564,25 @@ The following options can only be used for tmpfs mounts (`type=tmpfs`);
-
#### Differences between "--mount" and "--volume"
The `--mount` flag supports most options that are supported by the `-v`
or `--volume` flag for `docker run`, with some important exceptions:
-- The `--mount` flag allows you to specify a volume driver and volume driver
- options *per volume*, without creating the volumes in advance. In contrast,
- `docker run` allows you to specify a single volume driver which is shared
- by all volumes, using the `--volume-driver` flag.
+- The `--mount` flag allows you to specify a volume driver and volume driver
+ options _per volume_, without creating the volumes in advance. In contrast,
+ `docker run` allows you to specify a single volume driver which is shared
+ by all volumes, using the `--volume-driver` flag.
-- The `--mount` flag allows you to specify custom metadata ("labels") for a volume,
- before the volume is created.
+- The `--mount` flag allows you to specify custom metadata ("labels") for a volume,
+ before the volume is created.
-- When you use `--mount` with `type=bind`, the host-path must refer to an *existing*
- path on the host. The path will not be created for you and the service will fail
- with an error if the path does not exist.
+- When you use `--mount` with `type=bind`, the host-path must refer to an _existing_
+ path on the host. The path will not be created for you and the service will fail
+ with an error if the path does not exist.
-- The `--mount` flag does not allow you to relabel a volume with `Z` or `z` flags,
- which are used for `selinux` labeling.
+- The `--mount` flag does not allow you to relabel a volume with `Z` or `z` flags,
+ which are used for `selinux` labeling.
#### Create a service using a named volume
@@ -610,7 +606,7 @@ the container.
Be aware that the default ("local") volume is a locally scoped volume driver.
This means that depending on where a task is deployed, either that task gets a
-*new* volume named "my-volume", or shares the same "my-volume" with other tasks
+_new_ volume named "my-volume", or shares the same "my-volume" with other tasks
of the same service. Multiple containers writing to a single shared volume can
cause data corruption if the software running inside the container is not
designed to handle concurrent processes writing to the same location. Also take
@@ -671,7 +667,7 @@ expression (AND match). Constraints can match node or Docker Engine labels as
follows:
| node attribute | matches | example |
-|----------------------|--------------------------------|-----------------------------------------------|
+| -------------------- | ------------------------------ | --------------------------------------------- |
| `node.id` | Node ID | `node.id==2ivku8v2gvtg4` |
| `node.hostname` | Node hostname | `node.hostname!=node-2` |
| `node.role` | Node role (`manager`/`worker`) | `node.role==manager` |
@@ -751,9 +747,9 @@ nodes in the swarm, one third of the tasks will be placed on the nodes
associated with each value. This is true even if there are more nodes with one
value than another. For example, consider the following set of nodes:
-- Three nodes with `node.labels.datacenter=east`
-- Two nodes with `node.labels.datacenter=south`
-- One node with `node.labels.datacenter=west`
+- Three nodes with `node.labels.datacenter=east`
+- Two nodes with `node.labels.datacenter=south`
+- One node with `node.labels.datacenter=west`
Since we are spreading over the values of the `datacenter` label and the
service has 9 replicas, 3 replicas will end up in each datacenter. There are
@@ -850,15 +846,15 @@ for those "peaks") results in 400MB of memory being wasted most of the time.
In short, you can take a more conservative or more flexible approach:
-- **Conservative**: reserve 500MB, and limit to 500MB. Basically you're now
- treating the service containers as VMs, and you may be losing a big advantage
- containers, which is greater density of services per host.
+- **Conservative**: reserve 500MB, and limit to 500MB. Basically you're now
+ treating the service containers as VMs, and you may be losing a big advantage
+ containers, which is greater density of services per host.
-- **Flexible**: limit to 500MB in the assumption that if the service requires
- more than 500MB, it is malfunctioning. Reserve something between the 100MB
- "normal" requirement and the 500MB "peak" requirement". This assumes that when
- this service is at "peak", other services or non-container workloads probably
- won't be.
+- **Flexible**: limit to 500MB in the assumption that if the service requires
+ more than 500MB, it is malfunctioning. Reserve something between the 100MB
+ "normal" requirement and the 500MB "peak" requirement". This assumes that when
+ this service is at "peak", other services or non-container workloads probably
+ won't be.
The approach you take depends heavily on the memory-usage patterns of your
workloads. You should test under normal and peak conditions before settling
@@ -1013,7 +1009,6 @@ registry value must be located in:
HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs
-
### Create services using templates
You can use templates for some flags of `service create`, using the syntax
@@ -1021,13 +1016,12 @@ provided by the Go's [text/template](https://golang.org/pkg/text/template/) pack
The supported flags are the following :
-- `--hostname`
-- `--mount`
-- `--env`
+- `--hostname`
+- `--mount`
+- `--env`
Valid placeholders for the Go template are listed below:
-
Placeholder |
@@ -1067,7 +1061,6 @@ Valid placeholders for the Go template are listed below:
-
#### Template example
In this example, we are going to set the template of the created containers based on the
@@ -1102,9 +1095,10 @@ $ docker service create --name myservice --isolation=process microsoft/nanoserve
```
Supported isolation modes on Windows are:
-- `default`: use default settings specified on the node running the task
-- `process`: use process isolation (Windows server only)
-- `hyperv`: use Hyper-V isolation
+
+- `default`: use default settings specified on the node running the task
+- `process`: use process isolation (Windows server only)
+- `hyperv`: use Hyper-V isolation
### Create services requesting Generic Resources (--generic-resources)
@@ -1140,12 +1134,12 @@ command `true`, which will return 0 and then exit.
Though Jobs are ultimately a different kind of service, they a couple of
caveats compared to other services:
-- None of the update or rollback configuration options are valid. Jobs can be
- updated, but cannot be rolled out or rolled back, making these configuration
- options moot.
-- Jobs are never restarted on reaching the `Complete` state. This means that
- for jobs, setting `--restart-condition` to `any` is the same as setting it to
- `on-failure`.
+- None of the update or rollback configuration options are valid. Jobs can be
+ updated, but cannot be rolled out or rolled back, making these configuration
+ options moot.
+- Jobs are never restarted on reaching the `Complete` state. This means that
+ for jobs, setting `--restart-condition` to `any` is the same as setting it to
+ `on-failure`.
Jobs are available in both replicated and global modes.
@@ -1182,13 +1176,13 @@ constraints has a Completed task.
## Related commands
-* [service inspect](service_inspect.md)
-* [service logs](service_logs.md)
-* [service ls](service_ls.md)
-* [service ps](service_ps.md)
-* [service rm](service_rm.md)
-* [service rollback](service_rollback.md)
-* [service scale](service_scale.md)
-* [service update](service_update.md)
+- [service inspect](service_inspect.md)
+- [service logs](service_logs.md)
+- [service ls](service_ls.md)
+- [service ps](service_ps.md)
+- [service rm](service_rm.md)
+- [service rollback](service_rollback.md)
+- [service scale](service_scale.md)
+- [service update](service_update.md)