diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c9d1068 --- /dev/null +++ b/.gitignore @@ -0,0 +1,170 @@ +*/__pycache__/ +.vscode/ +.prod.cert +venv +.venv/* +release.md + +### GH for Python + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + diff --git a/README.md b/README.md index 5763e88..4a2ba3e 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,16 @@ - [Longhorny](#longhorny) + - [What can it do?](#what-can-it-do) - [What you **need to know**](#what-you-need-to-know) - [Production use](#production-use) - [Objectives](#objectives) - [Requirements](#requirements) - [How to run](#how-to-run) + - [Optional data](#optional-data) - [Cluster-scope actions](#cluster-scope-actions) - [Volume-scope actions](#volume-scope-actions) - [Site-scope actions](#site-scope-actions) - [Command examples](#command-examples) + - [Some --data examples](#some---data-examples) - [Cluster](#cluster) - [Volume](#volume) - [Miscellaneous stuff](#miscellaneous-stuff) @@ -26,11 +29,31 @@ It grew out of my need to list cluster and volume pairs - which is something I'v The code in this repository is permissively licensed, so feel free to use and modify within the terms of the permissive Apache License 2.0. With a bit of extra work you could use the elements of this script to modify it for multi-relationship clusters or other purposes (e.g. send these to Splunk, etc.). +## What can it do? + +Quite a few things and most of them sort of work. Examples: + +- Pair two clusters for replication. Also list, and unpair (if they have no paired volumes between them) +- Pair one or more volume pairs for replication. Also list and unpair (one pair at a time). +- Find mismatched volume pairs +- Take a snapshot of all volumes at the source +- Reverse replication direction for all paired volumes +- Change replication mode for all or selected volumes (at the source) +- Prime the remote site from a list of volumes from the source site + +Each of these actions takes 1-2 seconds, so if you have a tidy and organized environment that isn't yet automated, Longhorny can hopefully save you some time. + +With Longhorny you can create 100 volumes at the remote site and set up volume pairing relationships in 10 seconds, for example. If you have 2 SolidFire clusters you may have done that, but if you stand-up new Kubernetes or Hyper-V clusters often, maybe you'd like to use some help. + +The same goes for site failover and failback. 10 seconds to failover, 10 seconds to failback (sync-back time is change- and network-dependent, but if not much has changed at the remote site, you may be able to sync back in 5 minutes). + ## What you **need to know** -**The recommended action is `--list`**, while the rest may or may not work for you. While basic care has been taken to avoid creating problems, I am the only person who wrote and tested this script so far so I wouldn't run configuration-modifying actions against production clusters without prior testing. I myself only need the `list` action - okay, I need `mismatch` and `report` as well - while the others were added as convenience but may not be rock-solid. +**The recommended action is `--list`**, while the rest may or may not work for you. While basic care has been taken to avoid creating problems, I am the only person who wrote and tested this script so far, so I wouldn't run configuration-modifying actions against production clusters without prior testing. I myself only need the `list` action - okay, I need `mismatch` and `report` as well - while the others were added as convenience but may not be rock-solid. -Longhorny is is **limited to supporting a simple 1-to-1, exclusively paired clusters**. One SRC cluster, one DST cluster, one pairing. It is expected to reject actions when it spots either the source or the destination has another relationship, so in order to work with multiple clusters you'd have to modify the code. +Longhorny is is **limited to supporting a simple 1-to-1, exclusively paired clusters**. One SRC cluster, one DST cluster, one pairing. It is expected to reject actions when it spots either the source or the destination has another cluster relationship, so in order to work with multiple clusters you'd have to modify the code. + +Longhorny presently **requires that API access to both sites be available**. If one site is down or unreachable, you may use [PowerShell commands](#powershell-to-help) to quickly force changes of the surviving site. Of course, you may also modify the source code to not attempt connecting to the remote SolidFire cluster and use Python functions in the code on only one side. ## Production use @@ -49,7 +72,9 @@ Longhorny's objective is to provide visibility into replicated SolidFire cluster Everything beyond that is extra (and maybe nice to have, assuming it works), but that's also what makes it deserve a repository of its own as it has other uses. So far I've already done more than I expected and I decided to publish the script to see if anyone has uses for other actions and/or wants to contribute. -I am not committed to expanding or improving Longhorny but I may do it if I come up with new ideas for it. For example, recently I wrote a script for mapping Kubernetes/Trident volumes to SolidFire volume IDs (available in [Awesome SolidFire](https://github.com/scaleoutsean/awesome-solidfire)), so the output of that script (i.e. a list of a Kubernetes cluster's volume IDs) could be used as the input to Longhorny. Are you thinking what I'm thinking? You get the idea. +I am not committed to expanding or improving Longhorny but I may do it if I come up with new ideas for it. For example, recently I wrote a script for mapping Kubernetes/Trident volumes to SolidFire volume IDs (available in [Awesome SolidFire](https://github.com/scaleoutsean/awesome-solidfire)), so the output of that script (i.e. a list of a Kubernetes cluster's volume IDs) could be used as the input to Longhorny. Are you thinking what I'm thinking? + +That, by the way, is the main reason why Longhorny doesn't output pretty tables. It's not an end in itself. Even now, most of Longhorny's output is Python lists or dictionaries that can be assigned to variables in Python shell for additional follow-up processing, but its code can be easily reused and incorporated in other scripts. ## Requirements @@ -62,65 +87,94 @@ I am not committed to expanding or improving Longhorny but I may do it if I come First we need to select one of the scopes (cluster, volume, site) and then one of the actions available in the scope (further below). Among the positional arguments below, `src` and `dst` are usually required for most actions. ```sh -usage: longhorny.py [-h] [--dry DRY] [--tlsv TLSV] [--src SRC] [--dst DST] [--data DATA] {cluster,volume,site} ... +~$ longhorny -h +usage: longhorny.py [-h] [--dry DRY] [--tlsv TLSV] [--src SRC] [--dst DST] {cluster,volume,site} ... positional arguments: {cluster,volume,site} options: -h, --help show this help message and exit - --dry DRY Dry run mode. It is NOT available for all actions, so don not make the assumption that with --dry any action will be zero impact. Enable with --dry on. Default: off - --tlsv TLSV Accept only verifiable TLS certificate when working with SolidFire cluster(s) with --tlsv 1. Default: 0 - --src SRC Source cluster: MVIP, username, password as a dictionary in Bash string representation: --src "{ 'mvip': '10.1.1.1', 'username':'admin', 'password':'*'}" - --dst DST Destination cluster: MVIP, username, password as a dictionary in Bash string representation: --src "{ 'mvip': '10.2.2.2', 'username':'admin', 'password':'*'}" - --data DATA One or more semicolon-delimited volume ID pairs for cluster and volume actions between SRC and DST clusters. Optional in --list actions. Ex: "--data '158,260;159;261' + --dry DRY Dry run mode. It is NOT available for all actions, so don not make the assumption that with --dry any action will be zero impact. + Enable with --dry on. Default: off. + --tlsv TLSV Accept only verifiable TLS certificate when working with SolidFire cluster(s) with --tlsv 1. Default: 0. + --src SRC Source cluster: MVIP, username, password as a dictionary in Bash string representation: --src "{ 'mvip': '10.1.1.1', + 'username':'admin', 'password':'*'}". + --dst DST Destination cluster: MVIP, username, password as a dictionary in Bash string representation: --src "{ 'mvip': '10.2.2.2', + 'username':'admin', 'password':'*'}". ``` -Note that `--dry` **is ignored** by many operations/actions. For example, every `--list` action ignores it. For more on `--dry` see the examples in which you're interested. If I remember correctly there are three of the more dangerous actions that consider `--dry on`, but additional actions may be made aware of it. Don't assume it's been implemented for everything that may be dangerous is all I'm saying. You may check the code and add it by yourself. +Note that `--dry` **is ignored** by many operations/actions. For example, every `--list` action ignores it. For more on `--dry` see the examples in which you're interested. If I remember correctly initially there are three of the more dangerous actions that consider `--dry on`, but additional actions may be made aware of it. Don't assume it's been implemented for everything that may be dangerous is all I'm saying. You may check the code and add it by yourself. There are `cluster`-, `volume`-, and `site`-level actions. For most of them you **must** provide a source (`--src`) and a destination (`--dst`). In some cases it is important that `--src` is really the source (i.e. the cluster where replication "originates" and where the volumes are read-write), so I encourage you to always verify that assignment. If your "SolidFires" have snake-oil TLS certificates, just omit `tlsv` (TLS verification) to leave it as-is (to accept snake-oil TLS certificates). Obviously, that's not what you should do, but I know most will. If you want to upload a valid TLS certificate to SolidFire clusters, see [this](https://scaleoutsean.github.io/2020/11/24/scary-bs-postman-ssl-certs.html) or RTFM. It takes 5 minutes to do it and then you can use `--tlsv 1` and avoid MITM attacks. -Some actions require `DATA` to work with. When that's the case, use `--data DATA` to provide it. Data format for `DATA` varies, but Longhorny is mostly about volume-related actions so remember that volume pairs are provided as comma-delimited pairs (SRC, DST) and multiple pairs are delimited by semicolon. `"1,50;2,51"` is two SRC/DST pairs, (1,50) and (2,51), between SRC and DST (notice that if you were to swap SRC and DST with `--src` and `--dst`, then `"50,1;51,2"` would be equivalent to that in terms of the equivalence of outcome of a volume pairing action). +### Optional data + +Some volume and site actions require or may accept `--data DATA`. Example: + +```sh +longhorny --src SRC --dst volume --list --data "135,230" +``` + +Without `--data`, all paired volumes get listed. If you have dozens and want to check just one pair, then that's the way + +Data format for `DATA` varies depending on action, but scope help (`volume -h`, `site -h`) has examples whenever `--data` argument is available or required. See more in [Command examples](#command-examples). ### Cluster-scope actions ```sh -usage: longhorny.py cluster [-h] (--list | --pair | --unpair) +~$ longhorny cluster -h +usage: longhorny.py cluster [-h] [--data DATA] (--list | --pair | --unpair) options: - -h, --help show this help message and exit - --list List cluster pairing between SRC and DST clusters. Requires paired SRC and DST clusters. Does not require --data argument because cluster params are given in --src, --dst. - --pair Pair SRC and DST clusters for replication. Requires SRC and DST without existing pairing relationships - multi-relationships are not supported. - --unpair Unpair SRC and DST clusters. Requires SRC and DST in exclusive, mutual pairing relationship and no volume pairings. + -h, --help show this help message and exit + --data DATA Optional data input for selected cluster actions (where indicated in site action help). Not all cluster actions require or accept it. + --list List cluster pairing between SRC and DST clusters. Requires paired SRC and DST clusters. Ignores --data because each cluster params are + always available from --src, --dst. + --pair Pair SRC and DST for replication. Requires SRC and DST without existing pairing relationships. Multi-relationships are not supported. + Ignores --data. + --unpair Unpair SRC and DST clusters. Requires SRC and DST in exclusive, mutual pairing relationship and no volume pairings. Ignores --data. ``` `list` lists paired clusters. Harmless. `pair` changes cluster configuration on both sides: SRC gets paired with DST. No data is destroyed, but if this action succeeds SRC and DST (clusters) will be in a paired relationship. -`unpair` does the opposite from pair. It also changes cluster configuration on both sides, so be very careful if you have replication relationships set up. You shouldn't be able to `unpair` if there are valid replication pairs for volumes, but be careful with this action in any case. +`unpair` does the opposite from `pair`. It also changes cluster configuration on both sides (by removing the sole cluster pairing relationship), so be very careful if you have replication relationships set up - you shouldn't be able to `unpair` if there is at least one valid volumes replication pair, but be careful nevertheless. ### Volume-scope actions ```sh -usage: longhorny.py volume [-h] (--list | --pair | --unpair | --prime-dst | --mismatched | --reverse | --snapshot | --set-mode | --set-status | --report) +~$ longhorny volume -h +usage: longhorny.py volume [-h] [--data DATA] + (--list | --pair | --unpair | --prime-dst | --mismatched | --reverse | --snapshot | --set-mode | --set-status | --report) options: -h, --help show this help message and exit - --list List volumes correctly paired for replication between SRC and DST cluster. Requires paired SRC and DST clusters. Optional --data argument lists specific volume pair(s). - --pair Pair volumes for Async replication between SRC and DST clusters. Takes a semicolon-delimited list of volume IDs from SRC and DST in --data. Requires paired SRC and DST clusters. - --unpair Unpair volumes paired for replication between SRC and DST clusters. Requires paired SRC and DST clusters and at least one volume pairing relationship. - --prime-dst Prepare DST cluster for replication by creating volumes from SRC. Takes one 2-element list of account IDs (SRC,DST) and another of volume IDs on SRC. Creates volumes with identical properties - (name, size, etc.) on DST. Ex: --data "1,2;444,555" - --mismatched Check for and report any volumes in asymmetric pair relationships (one-sided and volume size mismatch). Requires paired SRC and DST clusters. - --reverse Reverse direction of volume replication. You MUST stop workloads using current SRC as readWrite will be flipped to replicationTarget. - --snapshot Take a crash-consistent snapshot of all volumes paired for replication at SRC and with expiration t+168 hours. - --set-mode Change replication mode on all SRC volumes in active replication relationship to DST. Options: Sync, Async, SnapshotsOnly (ex: --data "SnapshotsOnly"). Requires existing cluster and volume pairing - relationships between SRC and DST. WARNING: SnapshotsOnly replicates nothing if no snapshots enabled for remote replication are taken at SRC (create_snapshot(enable_remote_replication=True)). - --set-status Set all SRC (WARNING: SRC, not DST!) relationships to resume or pause state in --data. Ex: --data "pause" sets all SRC volume relationships to pause. - --report Report volume pairing relationships between SRC and DST, including mismatched and bidirectional. Requires paired SRC and DST clusters. Optional --data arguments: all, SRC, DST (default: all). + --data DATA Optional data input for selected volume actions (where indicated in volume action help). Not all volume actions require or accept it. + --list List volumes correctly paired for replication between SRC and DST cluster. Requires paired SRC and DST clusters. Optional --data argument + lists specific volume pair(s). + --pair Pair volumes for Async replication between SRC and DST clusters. Takes a semicolon-delimited list of volume IDs from SRC and DST in --data + (e.g. --data "111,555;112,600"). Requires paired SRC and DST clusters. + --unpair Unpair volumes paired for replication between SRC and DST clusters. Requires paired SRC and DST clusters and at least one volume pairing + relationship. Takes --data argument with only one pair at a time. Ex: --data "111,555". + --prime-dst Prepare DST cluster for replication by creating volumes from SRC. Creates volumes with identical properties (name, size, etc.) on DST. . + Takes one 2-element list of account IDs (SRC account ID,DST account ID) and another of volume IDs on SRC. Ex: --data "1,22;444,555". + --mismatched Check for and report any volumes in asymmetric pair relationships (one-sided and volume size mismatch). Requires paired SRC and DST + clusters. Ignores --data. + --reverse Reverse direction of volume replication. You should stop workloads using current SRC (readWrite) volumes before using this action as SRC + side will be flipped to replicationTarget and SRC iSCSI clients disconnected. Ignores --data. + --snapshot Take crash-consistent snapshot of all volumes paired for replication at SRC. Use --data to specify non-default retention (1-720) in hours + and snapshot name (<16b string). Ex: --data "24;apple". Default: "168;long168h-snap". + --set-mode Change replication mode on specific SRC volumes ID(s) in active replication relationship to DST. Mode: Sync, Async, SnapshotsOnly. Example: + --data "SnapshotsOnly;101,102,103"). Requires existing cluster and volume pairing relationships between SRC and DST. WARNING: SnapshotsOnly + replicates nothing if no snapshots are enabled for remote replication (create_snapshot(enable_remote_replication=True)). + --set-status Set all SRC relationships to resume or pause state in --data. Ex: --data "pause" sets all SRC volume relationships to manual pause. --data + "resume" resumes paused replication at SRC. (WARNING: applies to SRC, not DST). + --report TODO: Report volume pairing relationships between SRC and DST, including mismatched and bidirectional. Requires paired SRC and DST + clusters. Optional --data arguments: all, SRC, DST (default: all). ``` Yeah, I know, I got carried away. @@ -138,45 +192,53 @@ Ok, so `list` does the same thing as it for clusters - it also lists, only volum Therefore, `--src SRC --dst DST --data "10,20;101,102,103,104" volume --prime-dst` would take volumes 101-104 belonging to the account ID 10 at the SRC site and create very similar (apart from volume IDs, for example) volumes for the account ID 20 at the DST site. Then we could prepare those new volumes for pairing so that the source site's volume IDs 101-104 can be paired with (say) the destination site's volume IDs 40-43 using `--data "101,40;102,41;103,42;104,43 volume --pair`. Diff-priming the destination would be useful when a bunch of volumes are added to the source side, but I don't know if anyone has that problem so I haven't attempted to solve it. -Note that `prime-dst` doesn't change the new volumes at the destination to `replicationTarget`. Why's that? Because I don't know if you want to do that (you may want to do something else before you do that, such as import them to Hyper-V and log out of those targets before you flip them to replicationTarget), and because it's a more conservative approach. `prime-dst` can be risky if you provide a list of 5000 volumes and the destination runs out of +Note that `prime-dst` changes the new volumes at the destination to `replicationTarget`, based on the logic that most users would want to immediately pair them. If you need to flip them to readWrite mode (and later back, for pairing), see the PowerShell commands below. `mismatched` aims to find mismatched volumes on both the source and destination cluster. Maybe one site's volume is bigger, SRC/101 is paired to DST/40, but DST/40 isn't paired to SRC/101, etc. The idea is if you have a bunch of volumes and things seem out of control, maybe `mismatched` can save you troubleshooting time. +Can Longhorny help you recover from a mismatch? It could, but I don't like the idea of "helping" you change the state of a bunch of mismatched volumes at once. See the example of a two-side mismatch further below, and imagine you have dozens. Before fixing that, one should ask how did they even get in that situation and what else might be wrong. To recover from a mismatch, the remaining one-sided relationship must be deleted, and only then a new one created, so as you can imagine it's a sensitive operation when guessing is involved. + +A simple way to recover is delete the one-sided relationships reported by `--mismatched` (they can't be "restored" anyway) and use the volume IDs to create new ones from the side specified in `--src`. (See the example in examples section.) + `reverse` changes the direction of replication, so obviously this one is **dangerous**. If SRC is replicationTarget and DST is readWrite (that is, replication flows DST=>SRC), `reverse` flips that so that replication starts flowing SRC=>DST. This **may cause** unplanned downtime if you're not careful because flipping a volume from readWrite to replicationTarget disconnects all iSCSI clients, so if you issue this against the wrong `--src` and `--dst`, you may start getting calls, alerts and emails soon after that. -I think `volume --reverse`, as currently implemented, will really work only for rehearsals, when you have to run it once to try DST cluster, and then reverse again to return to SRC as "production". Why is there no ability to failover selected volumes? +I think `volume --reverse`, as currently implemented, will really work only for DR/BC rehearsals when you have to run it once to try DST cluster and then reverse again to return to SRC as "production". Why is there no ability to failover selected volumes? - If you need to failover a handful of volumes no matter where the rest of them run, that's just 5 lines of PowerShell - It's easy to take a list of IDs and failover just those volumes, but the next problem becomes when you want to flip the rest of them (or flip those back)? Who's going to figure out what should be replicated where? I think this can get messy quickly and at least with my level of skills (i.e. not high) I don't think I'd want to take on that task - A simpler, safer and less ambitious idea is to use a smaller, dedicated script for dealing with groups of volumes. If you have 12 Kubernetes clusters and each Kubernetes admin runs their own script with their SolidFire storage account ID in `--data "${ACCOUNT_ID}"`, it's fine to offload all pairing and flipping to them. But if you do that then the SolidFire storage admin should in my opinion go into "100% read-only" mode and just use `list` and `report` send output to something like SolidFire Collector and observe in Grafana what the Kubernetes guys are up to -TODO: `snapshot` does what you think it does - it takes a snapshot to minimize potential damage before one makes a stupid move. Note that Longhorny never deletes volumes, so snapshots are safe from it. But if you mix in other code that deletes volumes or worse, snapshots may still be destroyed together with volumes by that other code (in which case we could take a site-replicating snapshot (we don't do that, as it may involve a surge of data replication), but then we may need to wait, etc. so ... no). Anyway, `DATA` settings are optional for `snapshot` action and by default it's taken with 168 hours (1 week) expiration time. You may override that with something like `--data "72;mysnap"` (expiration: `72` hours; snapshot name `mysnap`). +`snapshot` does what you think it does - it takes a snapshot to minimize potential damage before one makes a stupid or desperate move. Note that Longhorny never deletes volumes, so snapshots are safe from whatever you do in Longhorny. But if you mix in other code that deletes volumes or worse, snapshots may still be destroyed together with volumes by that other code (in which case we could take a site-replicating snapshot (Longhorny doesn't do that, as it could cause a surge of data replication activity, then we may need to wait until that's done (assuming DST is available at all), etc. so ... no). + +Anyway, `DATA` setting is optional for `snapshot` action and by default snapshot of all local volumes is taken so that it expires in 168h (1 week). You may override that with something like `--data "72;mysnap"` (expiration: `72` hours; snapshot name `mysnap`). And they're taken individually, so if you need to take some snapshots of Consistency Groups, do it separately if you can't stop those applications prior to running Longhorny's `snapshot` action. I've been thinking about adding additional options but --data "..." isn't very good and would need a rewrite to make those options action-specific which would take more work, so not for time being. `set-mode` helps you change the default (Async) to other (Sync, or SnapshotOnly) mode. SolidFire's volume-pairing API method has Async hard-coded in it, so once remote pairing has been done you may use `set-mode` to change to another and back. RTFM and the TR linked at the top for additional details. `set-status` pauses or resumes replication. If replication is going from DST=>SRC (i.e. DST side is read-write) and you need to pause replication at source if replication you would run `--src DST --dst SRC --data "pause" volume --set-status` (because DST is the source). That would put all volumes in manually paused state. Similarly, `--data "resume"` would resume. If you wanted to pause the destination (in this case, SRC) you'd try `--src SRC --data "pause" volume --set-status`. -TODO: `report` is like `list`, completely read-only, except that it its result is slightly different. "Slightly???" Why do we need yet another action for that? List *actually* lists volume pairing relationships, whereas `report` reports on volume pairings, and if I wanted to see what's misconfigured or broken, `report` may give me that whereas `list` may not. Given that both INs and OUTs are very different, I don't want to bloat `list` to 500 lines of code. +TODO: `report` is like `list`, a completely read-only action, except that it its result is slightly different. "Slightly???" Why do we need yet another action for that? List *actually* lists volume pairing relationships, whereas `report` reports on volume pairings, and if I wanted to see what's misconfigured or broken, `report` may give me that whereas `list` may not. Given that both INs and OUTs are very different, I don't want to bloat `list` to 500 lines of code. I'm still thinking what I'd like to see and how it should be shown. ### Site-scope actions **CAUTION:** these may be **dangerous**. I'm not sure there's a strong case for them, so they are work-in-progress and may not care about `--dry on`. I would advise against using them without prior testing of the exact scenarios you aim to deal with or visual code inspection. ```sh -usage: longhorny.py site [-h] (--detach-site | --set-access) +~$ longhorny site -h +usage: longhorny.py site [-h] [--data DATA] (--detach-site | --set-access) options: -h, --help show this help message and exit - --detach-site Remove replication relationships on SRC cluster for the purpose of taking over when DST is unreachable. Requires paired SRC and DST clusters. WARNING: there is no way to re-attach. Disconnected - clusters and volumes need repairing from scratch. - --set-access Change access property on all SRC volumes with replication relationship to DST. Options: readWrite, replicationTarget (ex: --data "readWrite"). Requires existing cluster and volume pairing - relationships between SRC and DST. WARNING: may stop/interrupt DST->SRC or SRC->DST replication. - + --data DATA Optional data input for selected site actions (where indicated in site action help). Not all site actions require or accept it. + --detach-site Remove replication relationships on SRC cluster for the purpose of taking over when DST is unreachable. Requires paired SRC and DST + clusters. WARNING: there is no way to re-attach. Disconnected cluster- and volume-relationships need to be removed and re-created. + --set-access Change access property on all SRC volumes with replication relationship to DST. Options: readWrite, replicationTarget (ex: --data + "readWrite"). Requires existing cluster and volume pairing relationships between SRC and DST. WARNING: may stop/interrupt DST->SRC or + SRC->DST replication. ``` TODO: `detach-site` attempts to remove replication configuration from --src (`--src SRC`). -`set-access` changes access mode on volumes paired for replication **at the source**. To change access mode for the other site, use `--src OTHER`. +`set-access` changes access mode on volumes paired for replication **at the source**. To change access mode for the other site, use `--src OTHER`. When I started working on site actions I thought they may be useful, but later I realized it can be a slippery slope. For example, the remote site DST may or may not be offline. If it's offline (or for whatever reason unreachable), site actions will not be able to work as they attempt to connect to `--dst` as well which means actions may not be useful for their main use case. @@ -188,9 +250,18 @@ If you're not sure how something works, I may post all examples and details in a But more importantly, I wouldn't suggest to anyone to use Longhorny on real clusters without having own VM-based sandbox where experimentation may be done freely. +### Some --data examples + + +```sh +~$ volume --src SRC --dst DST --list --data "111,222" # list only SRC/DST pair 111,222 +~$ volume --src SRC --dst DST --prime-dst --data "1,10,333,444" # prime account 10 on DST with SRC account ID 1's volume IDs 333 and 444 +~$ volume --src SRC --dst DST --snapshot --data "1;test" # take a snapshot all SRC volumes, retain for 1 hours, and name each "test" +``` + ### Cluster -Checks if SRC and DST are paired and if so, outputs pairing configuration. +Checks if SRC and DST are paired and if so, outputs their pairing configuration. **Pair clusters** without offering the passwords, so that you get prompted to enter them: @@ -284,7 +355,7 @@ Longhorny's `cluster --unpair` won't unpair SRC and/or DST cluster if: 'volumePairUUID': '3406d44a-081c-4841-8838-46f14feaac5e'}] ``` -Both volume relationships are Async, the pairs are `[(158,260),(164,391)]`. If we wanted to `pair` these we'd do `--data "158,260; 164,391"`. +Both volume relationships are Async, the pairs are `[(158,260),(164,391)]`. If we wanted to `pair` these we'd do `--data "158,260;164,391"`. **Pair volumes** for replication. To replicate volume IDs 1 and 2 from site SRC to site DST, you need DST volumes to exist prior to running this action and they must have some identical properties and one different property: @@ -295,22 +366,64 @@ Both volume relationships are Async, the pairs are `[(158,260),(164,391)]`. If w You may also want the same QoS settings or QoS Policy contents, but that's optional. ```sh -longhorny --src SRC --dst DST --data "1,44;2,55" volume --pair +longhorny --src SRC --dst DST volume --pair --data "1,44;2,45" ``` -Another reminder about the direction of replication: volume IDs 1 and 2 exist at SRC so assumed that `--src` is readWrite. To replicate in the opposite direction, use `--src REMOTE` and provide the remote volume IDs as the first element of each `DATA` pair. +Output of `volume --pair` is the same as `volume --list` - it returns all paired volumes if it succeeds. If it fails, it tells you what went wrong. + +Another reminder about the direction of replication: volume IDs 1 and 2 exist at SRC so assumed that `--src` is readWrite. The direction is decided by access mode (goes from readWrite to replicationTarget volume), but Longhorny considers IDs in the order of SRC, DST. That is, if both sites have volumes 1, 2, 44, and 45, then the direction would flow from the site specified with `--src`. No account ID, QoS settings or other non-essential details are asked for because in this action the destination volumes must already exist and this Longhorny `volume` level operation does not touch storage account assignment or modify any volume settings except the replication-related settings (pair, unpair, reverse, etc.). Only `--prime-dst` can create new volumes, but even that action does not delete volumes. If there's no cluster peering between SRC and DST cluster, volume pairing operation will fail immediately. SolidFire requires cluster peering to be in place before volume pairs can be configured. -**Reverse replication direction** with `--reverse` action. +**Unpair** with dry run ON: -**Important assumption**: in this scenario I assume the entire cluster of something (Hyper-V, etc) needs to be switched to the site of destination, and **all paired volumes need to be reversed** and activated there. There's no attempt to take a list of volume IDs, some account ID or whatever and failover just two volumes for one database or an individual account's volumes (see [PowerShell](#powershell-to-help) examples for these "small scope" actions). Don't use this action if you want to failover just some of the paired volumes. +```sh +~$ longhorny.py --dry on volume --unpair --data "163,390" -If you change access status to replicationTarget all existing iSCSI connections to the volume are instantly terminated. You should stop workloads on the site that needs to change to replicationTarget mode or they'll be disconnected anyway (which is disruptive to clients using the volume(s)). Also expect some routine OS-level errors on the host side if they remain logged into targets switching to replicationTarget access mode, but those can likely be ignored as long as volumes going to replicationTarget mode have been dismounted (although they may still be logged on by the host). +VOLUMES REPORT FOR SPECIFIED VOLUME PAIR(S): [(163, 390)] -I assume that in normal life if replication is flowing from `--src SRC` to `--dst DST`, no one will try reverse the direction *unless* they can't access SRC. So as far as the risk of reversing in the wrong direction is concerned, it's rather small: SRC goes down, you won't be able to "reverse" anyway because Longhorny won't be able to connect. You'll have to manually select all volumes set to replicate from SRC to DST, pause that replication, and switch the DST side to readWrite. +[{'clusterPairID': 55, + 'localVolumeID': 163, + 'localVolumeName': 'srcvol', + 'remoteVolumeName': 'dstvol', + 'remoteReplicationMode': 'SnapshotsOnly', + 'remoteReplicationPauseLimit': 3145728000, + 'remoteReplicationStateSnapshots': 'PausedDisconnected', + 'remoteReplicationState': 'PausedDisconnected', + 'remoteVolumeID': 390, + 'volumePairUUID': '9e626d68-1037-459c-b097-360433f6e65b'}] + +===> Dry run: replication relationship for volume IDs that would be removed (SRC, DST): [(163, 390)] +``` + +**Unpair without dry run** (default) is almost identical. One volume pair *at most* can be unpaired at a time, in order to prevent disasters due to typos. + +```raw +VOLUMES REPORT FOR SPECIFIED VOLUME PAIR(S): [(163, 390)] + +[{'clusterPairID': 55, + 'localVolumeID': 163, + 'localVolumeName': 'srcvol', + 'remoteVolumeName': 'dstvol', + 'remoteReplicationMode': 'SnapshotsOnly', + 'remoteReplicationPauseLimit': 3145728000, + 'remoteReplicationStateSnapshots': 'PausedDisconnected', + 'remoteReplicationState': 'PausedDisconnected', + 'remoteVolumeID': 390, + 'volumePairUUID': '9e626d68-1037-459c-b097-360433f6e65b'}] +WARNING:root:Dry run in unpair action is OFF. Value: off +WARNING:root:Volume IDs unpaired at SRC/DST: {'local': 163, 'remote': 390} +``` + +**Reverse replication direction** with `--reverse` action. There's a 15 second count-down before direction change. + +**Important assumption**: in this scenario I assume the entire cluster of something (Hyper-V, etc) needs to be made active at the destination and **all paired volumes need to be reversed in terms of replication direction** to be made available for read-write access. There's no attempt to take a list of volume IDs, some account ID or whatever and failover just two volumes for one database or an individual account's volumes (see [PowerShell](#powershell-to-help) examples for these "small scope" actions). Don't use this action if you want to failover just some of the paired volumes. + +If you change access status to replicationTarget all existing iSCSI connections to the volume are instantly terminated. You should stop workloads on the site that needs to change to replicationTarget mode or they'll be disconnected anyway (which is disruptive to clients using the volume(s) and may lead to unplanned downtime or even data loss). Also expect some routine OS-level errors on the host side if they remain logged into targets switching to replicationTarget access mode, but those can likely be ignored as long as volumes going to replicationTarget mode have been dismounted (although they may still be logged on by the host). + +I assume that in normal life if replication is flowing from `--src SRC` to `--dst DST`, no one will try reverse the direction *unless* they can't access the source site. So as far as the risk of reversing in the wrong direction is concerned, it's rather small: if the source site goes down, you won't be able to "reverse" anyway because Longhorny won't be able to connect to that site to coordinate. You'll have to do this manually, by selecting all volumes set to replicate from SRC to DST on the destination cluster, pause that replication, and switch the DST side to readWrite. In other words, unilaterally change the surviving site to read-write mode. **Prime destination volumes** when you're setting up a DR site and have many volumes to create at the destination at once. @@ -327,14 +440,38 @@ Priming requires two mandatory and one optional input: - (required) pair of accounts IDs; one from the source (to whom the volumes belong) and one from the destination, to whom the new volumes should be assigned - (required) list of volume IDs from the source - their properties will be used to create volumes at the remote site -Find **mismatched** volumes with `volume --mismatched`. I may still modify the format of this response, but basically it gives a view from the source vs. a view from the destination and - if the views aren't symmetric - a warning that makes it easier to figure it out may be logged at the end. +```sh +~$ longhorny --src SRC --dst DST volume --prime-dst --data "1,5;640,641,642" +``` + +The above uses volumes 640-642 from the source site's Account ID 1 as templates for three new volumes at the remote site. The destination account ID is 5. + +Find **mismatched** volumes with `volume --mismatched`. It gives a view from the source vs. a view from the destination and - if the views aren't symmetric - a warning that makes it easier to figure it out may be logged at the end. ```raw -[(158, 260), (163, 390), (164, 391)] -[(260, 158), (262, 162), (390, 163), (391, 164)] -WARNING:root:Mismatch found at DST: vol ID 262 in relationship a8051542-9a13-4d73-bb7f-7110483b70f4 found at DST, but paired volume ID at SRC not found: 162 +WARNING:root:Volume ID 169 is paired on SRC but not on DST cluster. +WARNING:root:Volume ID 391 is paired on DST but not on SRC cluster. +WARNING:root:Mismatch found at SRC: vol ID 169 in relationship 6164784e-3b80-41b2-9673-5d9f006cc49a found at SRC, but relationship from paired SRC volume ID is missing: 407. +WARNING:root:Mismatch found at DST: vol ID 391 in relationship f4b57253-d8b6-4c89-b4c2-f73f65784b69 found at SRC, but relationship from paired SRC volume ID is missing: 164. + +MISMATCHED PAIRED VOLUMES ONLY: + +[{'PROD': {'volumeID': 169, + 'volumePairUUID': '6164784e-3b80-41b2-9673-5d9f006cc49a', + 'mismatchSite': 'DR', + 'remoteVolumeID': 407}}, + {'DR': {'volumeID': 391, + 'volumePairUUID': 'f4b57253-d8b6-4c89-b4c2-f73f65784b69', + 'remoteSite': 'PROD', + 'remoteVolumeID': 164}}] ``` +`mismatched` output also contains a list of all paired volumes with volume-level (not volume pairing-level) details, which makes it convenient for copy-paste into other commands (or sending to an infrastructure monitoring system, which was the original idea - to use this in SFC). In the case above, as volume-pair relationships can't be "restored", a way to recover is: + +- Manually remove 407,169 at DR and 164,391 at PROD site +- Run `--src PROD --dst DR --data "169,407;164,391 volume --pair` to rebuild the relationships +- Potentially `--set-mode` to `SnapshotsOnly` or `Sync` if those weren't `Async` before getting damaged + **Unpair volumes** is a sensitive operation. Although it "only" unpairs and doesn't delete volumes, it's suggested to use it with `--dry on` (default is `off`!) before actually letting it remove a pairing. ```sh @@ -343,11 +480,19 @@ longhorny -dry on --data "167,393" volume --unpair With `--dry on`, we only get a "preview" similar to this: -```sh +```raw Remove data tuple: [(167, 393)] -===> Dry run: volume IDs that would be removed (SRC, DST): [(167, 393)] +===> Dry run: replication relationship for volume IDs that would be removed (SRC, DST): [(167, 393)] ``` +**Snapshot** currently takes a snapshot of *all* volumes at the source. In DATA, the first digit is "retention in hours" (1-720) and the second part is snapshot name. + +```sh +longhorny --src SRC --dst DST volume --snapshot --data "1;long1h" +``` + +The main idea is to be able to roll-back locally (these snapshots are not replicated) to something before making desperate moves. Since snapshots are taken individually, if you have applications that use multiple volumes, you should stop them before running this command. + ## Miscellaneous stuff ### PowerShell to help @@ -406,15 +551,15 @@ VolumeID Name 157 pvc-a5f21571-e002-493f-b2dc-df01f40c1fa1 ``` -We can pipe that to `Set-SFVolume` or store that result in a variable, e.g. `$kvols`, and then pipe that to some other command: +We can pipe that to `Set-SFVolume` or store result in a variable, e.g. `$kvols`, and then pipe that to some other command: ```powershell $kvols.VolumeID | Set-SFVolume -Access readWrite ``` -Again, 2 lines vs. 100 when you have to consider a variety of other possibilities. +Again, 2 lines vs. 100, when you don't have to consider a variety of other possibilities. -That's why there's no urgency to further develop `site`-level commands, although it'd be nice if I had some ideas about specific use cases which I'd want to address with Longhorny. +That's why there's no urgency to further develop `site`-level commands, although it'd be nice to have them if I had some ideas about specific use cases to address with Longhorny. ### Site object (SRC, DST) and Longhorny CLI arguments @@ -445,7 +590,7 @@ For me the best way to run Longhorny is: - Two shell terminals (one dragged to left (or to the top) and the other to the right (or to the bottom)) - The left has `--src SRC --dst DST`, the right has `--src DST --dst SRC` (with or without password value provided in the site object string) -Then you just go to the right window and as long as you don't copy over *entire* lines (with the wrong --src and --dst in them), you can be fine. +Then you just go to the correct "site terminal" and as long as you don't copy over *entire* lines (with the wrong --src and --dst in them), you can be fine. Then Longhorny uses SolidFire Python SDK to establish a connection to each site. @@ -466,7 +611,7 @@ For production use, I strongly recommend having two single-node test clusters. Here's what you need for testing and development: -- Two mid-sized VMs for SolidFire "sites" - 2 x 16 GB RAM (NOTE: SolidFire Demo VM is an OVA file, and if you don't have VMware you can use the free ESXi 7, deploy it to KVM or Hyper-V, and deploy OVA to ESXi VM - needs 16 GB per VM) +- Two mid-sized VMs for SolidFire "sites" - 2 x 16 GB RAM (NOTE: SolidFire Demo VM is an OVA file, and if you don't have VMware you can use the free ESXi 7, deploy it to KVM or Hyper-V, and deploy OVA to ESXi VM - which requires 16 GB per VM plus say 4 GB for ESXi) - Two mid-sized VMs for compute "site" resource, e.g. Hyper-V, KVM, Kubernetes, etc. - 2 x 16 GB RAM You can get the free SolidFire Demo VM from NetApp Support (login required). It's limited to 100 volumes - more than enough for testing. SolidFire Demo VM allows multiple storage accounts (each being a different iSCSI client), so just two SolidFire VMs can accommodate testing with several VMs simulating different Kubernetes or other clusters at each "site" (VM group). SolidFire Demo VM is well-behaved and an excellent tool for test/development as long as you don't need high performance (it's limited to 3000 IOPS). @@ -478,7 +623,7 @@ As of now it's a mess because SolidFire SDK 12.3.1 was released, but there are s - [Update version-related info and publish on PyPi](https://github.com/solidfire/solidfire-sdk-python/issues/60) - [Usage of dash-separated 'description-file' will not be supported starting Sep 26, 2024](https://github.com/solidfire/solidfire-sdk-python/issues/65) -Older versions are available on `pip`, but may have other bugs. +Older versions are available with `pip`, but may have bugs that have been solved since. See the Github [issues](https://github.com/solidfire/solidfire-sdk-python/issues) for more. diff --git a/longhorny.py b/longhorny.py new file mode 100755 index 0000000..72cbbf5 --- /dev/null +++ b/longhorny.py @@ -0,0 +1,1705 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# sfc.py + +############################################################################### +# Synopsis: # +# Longhorny manages SolidFire cluster and volume pairing/unpairing/reporting # +# # +# Author: @scaleoutSean # +# https://github.com/scaleoutsean/longhorny # +# License: the Apache License Version 2.0 # +############################################################################### + +# References: +# 1) SolidFire API documentation: +# https://docs.netapp.com/us-en/element-software/api/ +# 2) SolidFire Python SDK: +# https://solidfire-sdk-python.readthedocs.io/en/latest/index.html +# 3) NetApp Element Software Remote Replication - Feature Description and +# Deployment Guide (TR-4741): +# https://www.netapp.com/media/10607-tr4741.pdf + +import time +import argparse +import ast +import datetime +import logging +import os +import pprint +from getpass import getpass +from solidfire.factory import ElementFactory +from solidfire import common +from solidfire.common import LOG +from solidfire.common import ApiServerError +from solidfire.common import SdkOperationError +common.setLogLevel(logging.ERROR) + + +def cluster(args): + if args.list: + pairing = report_cluster_pairing(src, dst) + print("\nCLUSTER (MUTUAL) PAIRING REPORT:\n") + pprint.pp(pairing) + elif args.pair: + pair_cluster(src, dst) + elif args.unpair: + unpair_cluster(src, dst) + else: + logging.warning( + "Cluster action not recognized. If this is unhandled, there may be a parsing bug or misconfiguration.") + return + + +def report_cluster_pairing(src: dict, dst: dict) -> dict: + """ + Print to console cluster pairing information between SRC and DST. + """ + pairing = get_cluster_pairing(src, dst) + if len(pairing[src['clusterName']]) == 0 and len( + pairing[dst['clusterName']]) == 0: + print("Neither cluster has existing cluster pairing relationship(s).") + logging.warning("Neither " + + str(src['clusterName']) + + " nor " + + str(dst['clusterName']) + + " has any cluster pairing relationships.") + elif len(pairing[src['clusterName']]) == 0 or len(pairing[dst['clusterName']]) == 0: + logging.warning("One of the clusters is not paired. Number of relationships (SRC/DST): " + + str(len(pairing[src['clusterName']])) + "/" + str(len(pairing[dst['clusterName']])) + ".") + else: + for site in pairing: + for i in pairing[site]: + if i['clusterPairUUID'] in [j['clusterPairUUID'] + for j in pairing[site]]: + logging.info( + "Clusters are paired through clusterPairUUID " + str(i['clusterPairUUID']) + ".") + else: + logging.warning( + "Clusters have pairing relationships but are not mutually paired. Foreign relationship: " + + str( + i['clusterPairUUID']) + + ".") + return (pairing) + + +def get_cluster_pairing(src: dict, dst: dict) -> dict: + """ + Return dict with per-site cluster pairings on both clusters. + + May return several or no pairing relationships for each cluster, including with other clusters. + """ + pairing = {} + for site in src, dst: + logging.info("Querying site: " + str(site['mvip']) + ".") + try: + r = site['sfe'].list_cluster_pairs().to_json()['clusterPairs'] + logging.info( + "Result for site: " + + site['mvip'] + + " : ", + str(r) + + ".") + pairing[site['clusterName']] = r + except common.ApiServerError as e: + logging.error( + "Error listing cluster pairs for cluster: " + str(site['clusterName']) + ".") + logging.error("Error: " + str(e)) + exit(100) + return pairing + + +def get_exclusive_cluster_pairing(src: dict, dst: dict) -> dict: + """ + Return dict with 1-to-1 pairing relationships between SRC and DST clusters if and only if that one cluster paring relationship exists. + """ + pairing = get_cluster_pairing(src, dst) + if not len(pairing[src['clusterName']]) == 1 and not len( + pairing[dst['clusterName']]) == 1: + logging.warning("Number of cluster pair relationships (SRC/DST): " + + str(len(pairing[src['clusterName']])) + "/" + str(len(pairing[dst['clusterName']])) + ".") + return {} + else: + for site in pairing: + for i in pairing[site]: + if i['clusterPairUUID'] in [j['clusterPairUUID'] + for j in pairing[site]]: + logging.info( + "Clusters are paired through clusterPairUUID " + str(i['clusterPairUUID']) + ".") + else: + logging.warning( + "Clusters have pairing relationships but are not mutually paired. Foreign relationship: " + + str( + i['clusterPairUUID']) + + ".") + return pairing + + +def pair_cluster(src: dict, dst: dict): + """ + Pair SRC and DST clusters. + + Works only if each cluster has no existing cluster pairing relationships. + """ + pairing = get_cluster_pairing(src, dst) + if pairing[src['clusterName']] == [] and pairing[dst['clusterName']] == []: + try: + pairing_key = src['sfe'].start_cluster_pairing().to_json()[ + 'clusterPairingKey'] + resp = dst['sfe'].complete_cluster_pairing(pairing_key).to_json() + if isinstance(resp['clusterPairID'], int): + logging.info("Pairing is now complete. Cluster " + + src['clusterName'] + + "returned cluster pair ID " + + str(resp['clusterPairID']) + + ".") + except common.ApiServerError as e: + logging.error("Error: Unable to pair clusters: " + str(e)) + exit(100) + exclusive_pairing = get_exclusive_cluster_pairing(src, dst) + print("\nCLUSTER PAIRING STATUS AFTER PAIRING:\n") + pprint.pp(exclusive_pairing) + return + else: + logging.warning( + "Clusters are already paired, paired with more than one cluster or in an incomplete pairing state. Use cluster --list to view current status. Exiting.") + exit(100) + + +def unpair_cluster(src: dict, dst: dict): + """ + Unpair clusters identified by SRC and DST. + + Requires that each cluster is not paired or has no more than one cluster pairing relationship. + """ + pairing = get_exclusive_cluster_pairing(src, dst) + if pairing == {}: + logging.error( + "Clusters are not paired, in an incomplete pairing state or there is some other problem. Use cluster --list to view current status.") + exit(100) + cluster_pair_ids = [] + for site in src, dst: + try: + resp = site['sfe'].list_cluster_pairs().to_json()['clusterPairs'] + cluster_pair_ids.append( + (site['clusterName'], resp[0]['clusterPairID'])) + except common.ApiServerError as e: + logging.error("Error: Unable to list cluster pairs: " + str(e)) + logging.error("Error: " + str(e)) + exit(100) + volume_relationships_check = list_volume(src, dst, []) + if len(volume_relationships_check) > 0 or volume_relationships_check != []: + logging.error( + "One or both clusters are already have paired volumes. Please unpair all paired volumes first.") + exit(100) + if len(resp) == 0 or len(resp) > 1: + logging.error( + "Zero or more than one cluster pairs found. Cluster unpairing action requires a 1-to-1 mutual and exclusive relationship. Exiting.") + exit(100) + else: + for site_id_tuple in cluster_pair_ids: + if site_id_tuple[0] == src['clusterName']: + site = src + try: + resp = site['sfe'].remove_cluster_pair( + site_id_tuple[1]).to_json() + except common.ApiServerError as e: + logging.error( + "Error: Unable to unpair clusters: " + str(e)) + exit(100) + else: + site = dst + try: + resp = site['sfe'].remove_cluster_pair( + site_id_tuple[1]).to_json() + except common.ApiServerError as e: + logging.error( + "Error: Unable to unpair clusters: " + str(e)) + exit(100) + exclusive_pairing = get_exclusive_cluster_pairing(src, dst) + print("\nCLUSTER PAIRING STATUS AFTER UNPAIRING:\n") + pprint.pp(exclusive_pairing) + return + + +def volume(args): + if args.list: + try: + try: + volume_pair = data_type(args.data) + logging.info( + "Data provided for listing volumes. Querying pair: " + + str(volume_pair) + + " for pairing status.") + except BaseException: + volume_pair = [] + logging.info( + "Trying to list volumes in list " + + str(volume_pair) + + " for pairing status.") + list_volume(src, dst, volume_pair) + except Exception as e: + logging.error("Error: " + str(e)) + exit(200) + elif args.report: + try: + if args.data is None or args.data == '': + logging.error( + "No data provided for volume report customization. Using default value: [].") + report_data = {} + else: + report_data = {} + pass + report_volume_replication_status(src, dst, report_data) + except Exception as e: + logging.error("Error: " + str(e)) + exit(200) + elif args.pair: + pair_data = data_type(args.data) + pair_volume(src, dst, pair_data) + elif args.unpair: + try: + data = data_type(args.data) + except BaseException: + logging.error( + "Error: Unpair data missing or not understood. Presently only one pair is supported per unpair action, ex: --data '1,2'. Exiting.") + exit(200) + if data is None or data == []: + logging.error( + "No data found for unpairing. By default, unpair action unpairs nothing rather than everything. Exiting.") + exit(200) + unpair_volume(src, dst, data) + elif args.prime_dst: + av_data = account_volume_data(args.data) + prime_destination_volumes(src, dst, av_data) + elif args.reverse: + reverse_replication(src, dst) + elif args.snapshot: + if args.data is None or args.data == '': + logging.info( + "No data input provided. Using default values: 168,long168h-snap.") + data = '168;long168h-snap' + else: + data = args.data + snap_data = snapshot_data(data) + snapshot_site(src, dst, snap_data) + elif args.mismatched: + list_mismatched_pairs(src, dst) + elif args.set_mode: + if args.data is None: + logging.error( + "Replication takes two inputs; the mode and one or more volume IDs from the replication source (e.g. --data 'Async;100,101)'. Exiting.") + exit(200) + else: + volume_mode = replication_data(args.data) + logging.info("Desired replication type: " + + str(volume_mode[0]) + + " for volume ID(s)" + + str(volume_mode[1]) + + ".") + set_volume_replication_mode(src, dst, volume_mode) + elif args.set_status: + if args.data is None: + logging.error( + "Replication state must be specified. Use --data 'pause' or --data 'resume'. Exiting.") + exit(200) + else: + state = replication_state(args.data) + if state == 'pause' or state == 'resume': + logging.info("Desired replication state: " + state) + set_volume_replication_state(src, dst, state) + else: + logging.warning("Volume action not recognized.") + return + + +def report_volume_replication_status( + src: dict, dst: dict, report_data: dict) -> dict: + """ + Print out volume replication report in dictionary format. + """ + print( + "TODO: report_volume_replication_status using report_data: " + + str(report_data)) + return + + +def snapshot_site(src: dict, dst: dict, snap_data: list) -> dict: + """ + Create local crash-consistent snapshot for all individual volumes using paired volumes on SRC. + + Longhorny takes individual snapshots rather than one monster-sized group snapshot. + That means that should a snapshot fail (e.g. volume has 32 snapshots), it will not affect the taking of other snapshots. + That also means that this function runs slower than a group snapshot, and does nothing for consistency groups. + Do not count on being to able to restore data that resides on different volumes without group snapshots. + """ + logging.warning("NOTE: this function creates individual snapshots and does nothing for groups of active inter-related volumes. If you have applications that span multiple volumes, you may need to use group snapshots or dismount volumes prior to taking snapshot with this function!") + logging.warning( + "Taking individual snapshots at SRC using params: " + + str(snap_data)) + paired_volumes = list_volume(src, dst, []) + if paired_volumes == []: + logging.error( + "No paired volumes found. Ensure volumes are paired before taking a snapshot.") + exit(200) + snapshot_retention = str(snap_data[0]) + ':00:00' # "HH:MM:SS" + snapshot_name = snap_data[1] + for v in paired_volumes: + try: + r = src['sfe'].create_snapshot( + v['localVolumeID'], retention=snapshot_retention, name=snapshot_name).to_json()['snapshot'] + snap_meta = "volume ID: " + str(r['volumeID']) + ", snapshot ID: " + str( + r['snapshotID']) + ", snapshot name: " + str(r['name']) + ", expiration time" + str(r['expirationTime']) + logging.info("Snapshot created for volume ID " + + str(v['localVolumeID']) + ": " + snap_meta) + except common.ApiServerError as e: + logging.error( + "Error creating snapshot for volume ID " + + str(v['localVolumeID']) + + ".") + logging.error("Error: " + str(e)) + exit(200) + return + + +def list_mismatched_pairs(src: dict, dst: dict) -> dict: + """ + List mismatched volume pairs. + + Mismatched pairs are those for which there's a unilateral pairing, volume sizes do not match or something else appears wrong. + """ + params = {'isPaired': True} + existing_pairs = {} + src_pairs = src['sfe'].invoke_sfapi( + method='ListVolumes', + parameters=params)['volumes'] + dst_pairs = dst['sfe'].invoke_sfapi( + method='ListVolumes', + parameters=params)['volumes'] + src_ids = [(i['volumeID'], i['volumePairs'][0]['remoteVolumeID']) + for i in src_pairs] + dst_ids = [(i['volumeID'], i['volumePairs'][0]['remoteVolumeID']) + for i in dst_pairs] + if len(src_ids) != len(dst_ids): + logging.warning("SRC and DST have different number of volume pairings at SRC/DST: " + + str(len(src_ids)) + "/" + str(len(dst_ids)) + ".") + src_account_ids = [(i['volumeID'], i['accountID']) for i in src_pairs] + dst_account_ids = [(i['volumeID'], i['accountID']) for i in dst_pairs] + if len(set([i[1] for i in src_account_ids])) > 1: + logging.warning("Multiple account IDs found on paired volumes at SRC: " + + str(len(set([i[1] for i in src_account_ids]))) + ".") + if len(set([i[1] for i in dst_account_ids])) > 1: + logging.warning("Multiple account IDs found on paired volumes at DST: " + + str(len(set([i[1] for i in dst_account_ids]))) + ".") + mismatch = [] + for p in src_ids: + pr = (p[1], p[0]) + if pr not in dst_ids: + logging.warning("Volume ID " + + str(p[0]) + + " is paired on SRC but not on DST cluster.") + for p in dst_ids: + pr = (p[1], p[0]) + if pr not in src_ids: + logging.warning("Volume ID " + + str(p[0]) + + " is paired on DST but not on SRC cluster.") + if len(src_ids) == 0 and len(dst_ids) == 0: + logging.warning("No volumes found on one or both sides.") + return + elif len(src_ids) == 0 or len(dst_ids) == 0: + logging.warning( + "One or both sides have no paired volumes. Number of paired volumes at SRC/DST:" + + len(src_ids) + + "/" + + len(dst_ids) + + ".") + return + else: + site_pairs = {} + s_list = [] + for pair in src_pairs: + kvs = { + 'accountID': pair['accountID'], + 'volumeID': pair['volumeID'], + 'name': pair['name'], + 'deleteTime': pair['deleteTime'], + 'purgeTime': pair['purgeTime'], + 'totalSize': pair['totalSize'], + 'enable512e': pair['enable512e'], + 'volumePairUUID': pair['volumePairs'][0]['volumePairUUID'], + 'remoteVolumeID': pair['volumePairs'][0]['remoteVolumeID'], + 'remoteVolumeName': pair['volumePairs'][0]['remoteVolumeName'] + } + if 'qos' in pair.keys(): + qos = pair['qos'] + pair['qos'] = qos + else: + qos_policy_id = pair['qosPolicyID'] + pair['qosPolicyID'] = qos_policy_id + s_list.append(kvs) + site_pairs[src['clusterName']] = s_list + d_list = [] + for pair in dst_pairs: + kvs = { + 'accountID': pair['accountID'], + 'volumeID': pair['volumeID'], + 'name': pair['name'], + 'deleteTime': pair['deleteTime'], + 'purgeTime': pair['purgeTime'], + 'totalSize': pair['totalSize'], + 'enable512e': pair['enable512e'], + 'volumePairUUID': pair['volumePairs'][0]['volumePairUUID'], + 'remoteVolumeID': pair['volumePairs'][0]['remoteVolumeID'], + 'remoteVolumeName': pair['volumePairs'][0]['remoteVolumeName'] + } + if 'qos' in pair.keys(): + pair['qos'] = qos + else: + qos_policy_id = pair['qosPolicyID'] + pair['qosPolicyID'] = qos_policy_id + d_list.append(kvs) + site_pairs[dst['clusterName']] = d_list + unique = [] + for i in site_pairs[src['clusterName']]: + mismatch = {} + if i['volumePairUUID'] not in ( + j['volumePairUUID'] for j in site_pairs[dst['clusterName']]): + mismatch[src['clusterName']] = { + 'volumeID': i['volumeID'], + 'volumePairUUID': i['volumePairUUID'], + 'volumePairUUID': i['volumePairUUID'], + 'mismatchSite': dst['clusterName'], + 'remoteVolumeID': i['remoteVolumeID']} + logging.warning("Mismatch found at SRC: vol ID " + + str(i['volumeID']) + + " in relationship " + + str(i['volumePairUUID']) + + " found at SRC, but relationship from paired SRC volume ID is missing: " + + str(i['remoteVolumeID']) + + ".") + unique.append(mismatch) + else: + pass + for i in site_pairs[dst['clusterName']]: + mismatch = {} + if i['volumePairUUID'] not in ( + j['volumePairUUID'] for j in site_pairs[src['clusterName']]): + mismatch[dst['clusterName']] = { + 'volumeID': i['volumeID'], + 'volumePairUUID': i['volumePairUUID'], + 'remoteSite': src['clusterName'], + 'remoteVolumeID': i['remoteVolumeID']} + logging.warning("Mismatch found at DST: vol ID " + + str(i['volumeID']) + + " in relationship " + + str(i['volumePairUUID']) + + " found at SRC, but relationship from paired SRC volume ID is missing: " + + str(i['remoteVolumeID']) + + ".") + unique.append(mismatch) + else: + pass + print("\nMISMATCHED PAIRED VOLUMES ONLY:\n") + pprint.pp(unique) + return + + +def prime_destination_volumes(src, dst, data): + """ + Creates volumes on DST cluster for specified account, with volume properties based on list of Volume IDs from SRC but set to replicationTarget mode. + + Takes a two inputs, the first of which is a pair of SRC and DST account IDs and the second is a list of volume IDs (VOL1, VOL2) to use as templates at the remote site. + """ + try: + try: + src_account_vols = src['sfe'].list_volumes_for_account(data[0][0]).to_json()[ + 'volumes'] + except BaseException: + logging.error( + "Error getting account volumes for source site account ID: " + + str( + data[0][0]) + + ". Make sure the account ID exists. Exiting.") + exit(200) + # list of SRC Volume IDs to be used as templates + src_vid = [i for i in data[1]] + for v in src_account_vols: + if v['volumeID'] in src_vid: + logging.info("Volume ID " + str(v['volumeID']) + " found to belong to account ID " + str( + data[0][0]) + ". Checking the volume for existing replication relationships (must be none).") + if 'volumePairs' in v.keys() and v['volumePairs'] != []: + logging.error("Error: Volume ID " + + str(v['volumeID']) + + " has replication relationships. Volumes used for priming must not be already paired. Exiting.") + exit(200) + except BaseException: + logging.error( + "Error getting account volumes for account ID: " + + str( + data[0][0]) + + ". All of the SRC volume IDs must be owned by the specific SRC account ID. Exiting.") + exit(200) + src_volumes = [] + for v in src_account_vols: + if v['volumeID'] in src_vid: # skip volumes not in the DATA list of volume IDs + if 'qos' in v.keys(): + src_volume = { + 'volumeID': v['volumeID'], + 'enable512e': v['enable512e'], + 'fifoSize': v['fifoSize'], + 'minFifoSize': v['minFifoSize'], + 'name': v['name'], + 'qos': v['qos'], + 'totalSize': v['totalSize']} + else: + src_volume = { + 'volumeID': v['volumeID'], + 'enable512e': v['enable512e'], + 'fifoSize': v['fifoSize'], + 'minFifoSize': v['minFifoSize'], + 'name': v['name'], + 'qosPolicyID': v['qosPolicyID'], + 'totalSize': v['totalSize']} + src_volumes.append(src_volume) + logging.warning( + "SRC volumes to be used as template for volume creation on DST cluster:") + pprint.pp(src_volumes) + try: + dst_account_id = dst['sfe'].get_account_by_id(data[0][1]).to_json()[ + 'account']['accountID'] + if dst_account_id == data[0][1]: + print("DST account exists (DATA vs API response): " + + str(data[0][1]) + "," + str(dst_account_id) + ".") + logging.info("DST account exists (DATA vs API response): " + + str(data[0][1]) + "," + str(dst_account_id) + ".") + except BaseException: + logging.error("DST account ID " + + str(data[0][1]) + + " does not exist or cannot be queried. Exiting.") + exit(200) + dst_volumes = [] + for v in src_volumes: + if 'qos' in v.keys(): + params = { + 'accountID': dst_account_id, + 'name': v['name'], + 'totalSize': v['totalSize'], + 'enable512e': v['enable512e'], + 'qos': v['qos'], + 'fifoSize': v['fifoSize'], + 'minFifoSize': v['minFifoSize']} + else: + print( + "QoS not found in volume properties. Using qosPolicyID instead:", + v['qosPolicyID']) + params = { + 'accountID': dst_account_id, + 'name': v['name'], + 'totalSize': v['totalSize'], + 'enable512e': v['enable512e'], + 'qosPolicyID': v['qos'], + 'fifoSize': v['fifoSize'], + 'minFifoSize': v['minFifoSize']} + try: + logging.info( + "Creating volume on DST cluster using params: " + + str(params) + + ".") + dst_volume = dst['sfe'].invoke_sfapi( + method='CreateVolume', parameters=params) + dst_volumes.append( + (v['volumeID'], dst_volume['volume']['volumeID'])) + except ApiServerError as e: + logging.error("Error creating volume on DST cluster: " + str(e)) + exit(200) + try: + if len(dst_volumes) < 500: + logging.info( + "Less than 500 volumes to be modified. Using the bulk volume modification API.") + r = dst['sfe'].modify_volumes( + [i[1] for i in dst_volumes], access='replicationTarget') + else: + logging.info( + "More than 500 volumes to be modified. Using the individual volume modification API. Inspect DST for correctness before pairing.") + for i in dst_volumes: + try: + r = dst['sfe'].modify_volume( + i[1], access='replicationTarget') + logging.info("Modified volume ID " + + str(i[1]) + + " to access mode replicationTarget.") + except ApiServerError as e: + logging.error( + "Error modifying volume ID " + + str( + i[1]) + + " to access mode replicationTarget. Exiting loop to prevent massive mismatches in access mode of new volumes at DST.") + logging.error("Error: " + str(e)) + exit(200) + except ApiServerError as e: + print("API server response code: ", e) + logging.warning( + "setting DST volumes to access mode: replicationTarget. Please review and remediate. API server message: ", + str(e)) + if dst_volumes != []: + print("DST volumes created [(SRC,DST)..]: " + + str([i[1] for i in dst_volumes])) + dst_volumes_str = ";".join( + [str(i[0]) + "," + str(i[1]) for i in dst_volumes]) + print( + "New DST volumes ought to be in access mode replicationTarget. Inspect new DST volumes for correctness and then you may pair the volumes with data argument: --data " + + "\"" + + dst_volumes_str + + "\".") + pprint.pp(dst_volumes) + return + + +def list_volume(src: dict, dst: dict, volume_pair: list) -> dict: + """ + List mutually paired volumes on SRC and DST cluster. + + If volume pair list is not provided, list all mutually paired volumes. + If volume pair list is provided, list only those volume pairs if such pair(s) exist in a paired relationship. + Volumes paired asymmetrically (one-sided, or different volume sizes) are not listed as they're considered mismatched (see volume --mismatch). + """ + pairing = get_exclusive_cluster_pairing(src, dst) + if pairing == {}: + logging.error( + "Clusters are already paired with more than one cluster or in an incomplete pairing state. Use cluster --list to view current status.") + exit(200) + paired_volumes = [] + if volume_pair == []: + logging.info( + "No volume pair data provided. Listing all paired active volumes.") + params = { + 'isPaired': True, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + volume = src['sfe'].invoke_sfapi( + method='ListVolumes', parameters=params)['volumes'] + elif isinstance(volume_pair, list): + volume_ids = [] + for i in volume_pair: + if not isinstance(i, tuple): + logging.error("Volume pair data not understood. Exiting.") + exit(200) + else: + volume_ids.append(int(i[0])) + params = { + 'volumeIDs': volume_ids, + 'isPaired': True, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + volume = src['sfe'].invoke_sfapi( + method='ListVolumes', parameters=params)['volumes'] + else: + logging.error("Volume pair data not understood. Exiting.") + exit(200) + for v in volume: + if 'volumePairs' in v.keys() and v['volumePairs'] != []: + if v['volumePairs'] is not None and not len(v['volumePairs']) > 1: + for rr in v['volumePairs']: + if rr['clusterPairID'] != pairing[src['clusterName'] + ][0]['clusterPairID']: + print("Suspicious volume:", str(v['volumeID']) + + " Name: " + + v['name'], "with pairing *cluster* relationship ID", rr['clusterPairID'], "does not match the cluster pair ID " + + str(pairing[src['clusterName']][0]['clusterPairID']) + + ". Ensure the volume is not paired. Exiting.") + logging.error("Found volume paired with with a cluster other than DST. Exiting. Use cluster --list or volume --list to verify one-to-one cluster peering relationship. Unknown clusterPairID " + str( + rr['clusterPairID']) + " found for volume ID/name:" + str(v['volumeID']) + ", " + v['name'] + ".") + exit(200) + else: + logging.info("Confirmed that volume" + + str(v['volumeID']) + + " is paired with clusterPairID " + + str(pairing[src['clusterName']][0]['clusterPairID']) + + ".") + paired_info = { + 'clusterPairID': rr['clusterPairID'], + 'localVolumeID': v['volumeID'], + 'localVolumeName': v['name'], + 'remoteVolumeName': rr['remoteVolumeName'], + 'remoteReplicationMode': rr['remoteReplication']['mode'], + 'remoteReplicationPauseLimit': rr['remoteReplication']['pauseLimit'], + 'remoteReplicationStateSnapshots': rr['remoteReplication']['snapshotReplication']['state'], + 'remoteReplicationState': rr['remoteReplication']['snapshotReplication']['state'], + 'remoteVolumeID': rr['remoteVolumeID'], + 'volumePairUUID': rr['volumePairUUID'] + } + logging.info("Paired volume found for SRC volume ID " + + str(v['volumeID']) + + ", name " + + str(v['volumeID']) + + " - remote volume " + + str(rr['remoteVolumeID']) + + ", name " + + str(rr['remoteVolumeName']) + + ".") + paired_volumes.append(paired_info) + else: + print( + "Suspicious volume:", str( + v['volumeID']) + " Name: " + v['name']) + logging.warning( + "Volume is paired with more than one volume. Use cluster --list to verify one-to-one cluster peering relationship. Volume ID and name:" + + str( + v['volumeID']) + + "," + + v['name']) + else: + logging.info("No paired volumes found for volume" + v['name']) + if volume_pair == []: + print("\nPAIRED VOLUMES REPORT:\n") + pprint.pp(paired_volumes) + elif isinstance(volume_pair, list): + print( + "\nVOLUMES REPORT FOR SPECIFIED VOLUME PAIR(S): " + + str(volume_pair) + + "\n") + pprint.pp(paired_volumes) + else: + logging.error("Volume pair data not understood. Exiting.") + exit(200) + return paired_volumes + + +def pair_volume(src: dict, dst: dict, data: tuple) -> dict: + """ + Pair volume pairs on SRC and DST clusters. + + A pairing action may result in (eventual or immediate) warning, which may be benign or require action for replication to work. + pair_volume only pairs and does not remedy possible warnings or fix network and firewall issues preventing replication. + https://docs.netapp.com/us-en/element-software/storage/reference_replication_volume_pairing_warnings.html + """ + pairing = get_cluster_pairing(src, dst) + if not pairing[src['clusterName'] + ][0]['clusterPairID'] == pairing[src['clusterName']][0]['clusterPairID']: + logging.error("Clusters pair IDs do not match. SRC/DST:" + + str(pairing[src['clusterName']][0]['clusterPairID']) + + "," + + str(pairing[dst['clusterName']][0]['clusterPairID']) + + ". Exiting.") + exit(200) + paired_volumes = list_volume(src, dst, []) + src_volume_ids = [item['localVolumeID'] for item in paired_volumes] + if len(src_volume_ids) != 0: + s_params = { + 'volumeIDs': src_volume_ids, + 'isPaired': True, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + try: + src_vol = src['sfe'].invoke_sfapi( + method='ListVolumes', parameters=s_params)['volumes'] + except BaseException: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + exit(200) + src_vol_mode = [v['access'] for v in src_vol] + if list(set(src_vol_mode)) != ['readWrite']: + logging.error("SRC volume access mode is not suitable for pairing. SRC site volumes are in mode: " + + str(src_vol_mode[0]) + + ". Direction of replication must be from readWrite to replicationTarget. Swap SRC/DST and change volume ID order (SRC first). Exiting.") + + for v_pair in data: + s_params = { + 'volumeIDs': [ + v_pair[0]], + 'isPaired': False, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + d_params = { + 'volumeIDs': [ + v_pair[1]], + 'isPaired': False, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + try: + src_vol = src['sfe'].invoke_sfapi( + method='ListVolumes', parameters=s_params) + dst_vol = dst['sfe'].invoke_sfapi( + method='ListVolumes', parameters=d_params) + if src_vol['volumes'] == [] or dst_vol['volumes'] == []: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + exit(200) + except common.ApiServerError as e: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + logging.error("Error: " + str(e)) + exit(200) + prop_keys = ['blockSize', 'enable512e', 'status', 'totalSize'] + if src_vol['volumes'][0]['access'] == 'readWrite' and dst_vol['volumes'][0]['access'] == 'replicationTarget': + logging.info( + "Volume access mode suitable for SRC and DST volumes:", + src_vol['volumes'][0]['access'], + dst_vol['volumes'][0]['status']) + for k in prop_keys: + if src_vol['volumes'][0][k] == dst_vol['volumes'][0][k]: + logging.info("Volume property match for key: " + + k + + " SRC: " + + str(src_vol['volumes'][0][k]) + + " DST: " + + str(dst_vol['volumes'][0][k]) + + ".") + else: + logging.error("Volume property mismatch for key: " + + str(k) + + ". SRC: " + + str(src_vol['volumes'][0][k]) + + " DST: " + + str(dst_vol['volumes'][0][k]) + + ". Ensure consistency of settings before pairing. Exiting.") + if k == 'totalSize': + logging.error( + "Volume size mismatch. Enlarge the smaller volume or create a new pair with identical sizes and try again.") + if k == 'enable512e': + logging.error( + "One of the volumes has to be recreated so that both have the same enable512e setting.") + exit(200) + else: + logging.error( + "Volume access mode not suitable (SRC/DST): " + + src_vol['volumes'][0]['access'] + + " and " + + dst_vol['volumes'][0]['access'] + + ". Verify direction of cluster replication and set the DST volume ID to reaplicationTarget. Exiting.") + exit(200) + for v_pair in data: + try: + src_key = src['sfe'].start_volume_pairing(v_pair[0]) + params = { + 'volumeID': v_pair[1], + 'volumePairingKey': src_key.volume_pairing_key} + dst['sfe'].invoke_sfapi( + method='CompleteVolumePairing', + parameters=params) + logging.warning("Pairing has been successful. SRC volume ID " + + str(v_pair[0]) + + " has been paired with DST volume ID " + + str(v_pair[1]) + + ".") + except common.ApiServerError as e: + logging.error( + "Error pairing volumes. SolidFire API returned an error. ") + logging.error("Error: " + str(e)) + exit(200) + return + + +def unpair_volume(src: dict, dst: dict, data: tuple) -> dict: + """ + Unpair pair of volumes on SRC and DST cluster. + + Pairing relationship must exist on both sides. If the pair is not symmetric, exit with error. + """ + paired_volumes = list_volume(src, dst, data) + pvt = [((item['localVolumeID'], item['remoteVolumeID'])) + for item in paired_volumes] + if len(data) == 1 and data[0] in pvt: + delete_pair = dict(zip(['local', 'remote'], data[0])) + if args.dry == True or args.dry == 'True' or args.dry == 'true' or args.dry == 'on' or args.dry == 'On' or args.dry == 'ON': + logging.info( + "Dry run in unpair action is ON. Value: " + str(args.dry)) + print( + "\n===> Dry run: replication relationship for volume IDs that would be removed (SRC, DST):", + data) + else: + logging.warning( + "Dry run in unpair action is OFF. Value: " + str(args.dry)) + try: + src['sfe'].remove_volume_pair(delete_pair['local']) + dst['sfe'].remove_volume_pair(delete_pair['remote']) + logging.warning( + "Volume IDs unpaired at SRC/DST: " + + str(delete_pair)) + except common.ApiServerError as e: + logging.error("Error unpairing volumes. Exiting.") + logging.error("Error: " + str(e)) + exit(200) + return + elif len(data) > 1 and data[0] in pvt: + logging.error( + "More than one volume pair found. That could be risky and is currently not supported. Multiple pairs should be deleted one by one. Exiting.") + exit(200) + elif len(data) == 1 or data[0] not in pvt: + logging.error( + "Volume pair not found in list of volume replication pairs. Use --list to verify, including SRC and DST settings. Exiting.") + exit(200) + else: + logging.error( + "Volume pair not found in list of volume replication pairs. Use --list to verify, including SRC and DST settings. Exiting.") + exit(200) + + +def set_volume_replication_mode(src: dict, dst: dict, replication_mode: list): + """ + Modify all paired volumes at SRC to use Sync, Async or SnapshotsOnly replication type. + + SRC must be in readWrite mode with relationships to DST, so --src value matters. + Sync replication type may noticeably degrade performance and will not work with inter-cluster latency over 8ms. Sub-5ms recommended. See latency value in cluster --list. + Async is the default SolidFire type of replication and the SolidFire API creates Async pairings by default. Sub-20ms recommended. Maximum latency for Async replication is 8ms (see latency value in cluster --list). + Snapshot-Only type of replication replicates only snapshots (enabled for remote replication) and nothing else. Recommended for low bandwidth, high latency (>5ms) connections. + """ + if replication_mode[1] == []: + logging.warning( + "No volume IDs provided. All volumes will be set to" + + replication_mode[0] + + " replication mode.") + logging.info("Modify paired volumes to use " + + replication_mode[0] + + " replication mode at SRC. SRC volume IDs: " + str(replication_mode[1]) + ". [] means ALL volumes.") + paired_volumes = list_volume(src, dst, []) + if replication_mode[1] == []: + src_volume_ids = [item['localVolumeID'] for item in paired_volumes] + else: + src_volume_ids = [] + for i in replication_mode[1]: + if i in [item['localVolumeID'] for item in paired_volumes]: + logging.info( + "Volume ID " + + str(i) + + " found in list of currently paired volumes at SRC.") + src_volume_ids.append(i) + else: + logging.error( + "Volume ID " + + str(i) + + " not found in list of currently paired volumes at SRC. Are you sure you got the right site or paired volume IDs? Exiting.") + exit(200) + s_params = { + 'volumeIDs': src_volume_ids, + 'isPaired': True, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + try: + src_vol = src['sfe'].invoke_sfapi( + method='ListVolumes', parameters=s_params)['volumes'] + if src_vol == []: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + exit(200) + except common.ApiServerError as e: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + logging.error("Error: " + str(e)) + exit(200) + src_vol_mode = [v['access'] for v in src_vol] + if list(set(src_vol_mode)) != ['readWrite']: + logging.error("SRC volume access mode is not suitable for pairing. Specified SRC site volumes are in mode: " + + str(src_vol_mode[0]) + + ". Direction of replication must be from readWrite to replicationTarget. Changes must be made on the source where replication originates. Maybe you wanted to change mode at DST? Exiting.") + exit(200) + if args.dry == True or args.dry == 'True' or args.dry == 'true' or args.dry == 'on' or args.dry == 'On' or args.dry == 'ON': + logging.info( + "Dry run on replication mode change is ON. Value: " + str(args.dry)) + print("\n===> Dry run: Replication mode of SRC volume IDs that would be changed to " + + replication_mode[0] + " : " + str(src_volume_ids) + ".") + logging.info( + "DRY RUN on replication mode change for volumes at SRC. No changes will be made. Action would change SRC volume ID(s) " + + str(src_volume_ids) + + " to " + + str(replication_mode[1]) + + " while no changes would be done to DST volumes.") + else: + logging.warning( + "Dry run on replication mode change for volumes at SRC is OFF. Value: " + + str( + args.dry) + + ". Setting replication mode to " + + str(replication_mode[0]) + + ".") + for vid in src_volume_ids: + try: + src['sfe'].modify_volume_pair( + vid, mode=replication_mode[0]) + logging.info( + "Set replication mode on SRC volume " + + str(vid) + + " to " + + str(replication_mode[0]) + + ".") + except common.ApiServerError as e: + logging.error( + "Error setting replication status on SRC volume " + + str(vid) + + " to " + + str(replication_mode[0]) + + ". Exiting.") + logging.error("Error: " + str(e)) + exit(200) + + return + + +def set_volume_replication_state( + src: dict, dst: dict, replication_status: str): + """ + Modify all paired volumes to pause or resume their replication. + + Works on volume pairs that are already paired. If DST is disconnected, status may change only on SRC. + """ + if replication_status == 'pause': + pause_replication = True + elif replication_status == 'resume': + pause_replication = False + else: + logging.error("Invalid desired replication state proposed. Exiting.") + exit(200) + paired_volumes = list_volume(src, dst, []) + if len(paired_volumes) == 0 or paired_volumes is None or paired_volumes == []: + logging.error("No paired volumes found. Exiting.") + exit(200) + src_volume_ids = [item['localVolumeID'] for item in paired_volumes] + if args.dry == True or args.dry == 'True' or args.dry == 'true' or args.dry == 'on' or args.dry == 'On' or args.dry == 'ON': + logging.info( + "Dry run on replication status change is ON. Value: " + str(args.dry)) + print("\n===> Dry run: SRC volume IDs that would be changed to " + + replication_status + " : " + str(src_volume_ids) + ".") + logging.info( + "DRY RUN on access property change for volumes at SRC. No changes will be made. Action would change SRC volume ID(s) " + + str(src_volume_ids) + + " to " + + str(replication_status) + + " and ignore mode of DST volumes.") + else: + logging.warning( + "Dry run on replication state change for volumes at SRC is OFF. Value: " + + str( + args.dry) + + ". Setting paused_manual to " + + str(pause_replication) + + ".") + for vid in src_volume_ids: + try: + src['sfe'].modify_volume_pair( + vid, paused_manual=pause_replication) + logging.info( + "Set replication status on SRC volume " + + str(vid) + + " to " + + str(replication_status) + + ".") + except common.ApiServerError as e: + logging.error( + "Error setting replication status on SRC volume " + + str(vid) + + " to " + + str(replication_status) + + ". Exiting.") + logging.error("Error: " + str(e)) + exit(200) + return + + +def reverse_replication(src: dict, dst: dict) -> dict: + """ + Reverse direction of volume replication at SRC to inbound. + + Pausing / Resuming Volume replication manually causes the transmission of data to cease or resume. + Changing access mode of replication causes the mode to change direction. + """ + cluster_pair_name_id = src['sfe'].list_cluster_pairs().to_json()[ + 'clusterPairs'] + if len(cluster_pair_name_id) != 1: + for i in cluster_pair_name_id: + logging.warning("Reviewing cluster pair ID: " + + str(i['clusterPairID'])) + if i['clusterName'] != dst['clusterName']: + logging.error( + "Found Unconfigured cluster pairing or other pairing with another cluster on cluster " + + str( + src['clusterName']) + + ". Exiting.") + exit(200) + else: + continue + + else: + logging.info("Cluster pair ID should be DST cluster MVIP: " + + str(dst['mvip']) + + ". DST cluster MVIP of paired cluster is:" + + str(cluster_pair_name_id[0]['mvip']) + + " and pair ID against which we will verify is: " + + str(cluster_pair_name_id[0]['clusterPairID']) + + ".") + paired_volumes = list_volume(src, dst, []) + if len(paired_volumes) == 0 or paired_volumes is None or paired_volumes == []: + logging.error("No paired volumes found. Exiting.") + exit(200) + src_volume_ids = [item['localVolumeID'] for item in paired_volumes] + dst_volume_ids = [item['remoteVolumeID'] for item in paired_volumes] + s_params = { + 'volumeIDs': src_volume_ids, + 'isPaired': True, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + d_params = { + 'volumeIDs': dst_volume_ids, + 'isPaired': True, + 'volumeStatus': 'active', + 'includeVirtualVolumes': False} + try: + src_vol = src['sfe'].invoke_sfapi( + method='ListVolumes', parameters=s_params)['volumes'] + dst_vol = dst['sfe'].invoke_sfapi( + method='ListVolumes', parameters=d_params)['volumes'] + if src_vol == [] or dst_vol == []: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + exit(200) + except common.ApiServerError as e: + logging.error( + "Error getting volume information. Use --list to make sure the volumes exist and SRC and DST are correct. Exiting.") + logging.error("Error: " + str(e)) + exit(200) + s = 15 # 15 seconds grace period before action + if list(set([v['access'] for v in src_vol])) == ['replicationTarget'] and list( + set([v['access'] for v in dst_vol])) == ['readWrite']: + logging.warning( + "SRC is currently replicationTarget, DST is currently readWrite. Will reverse direction to make SRC readWrite and DST replicationTarget in " + + str(s) + + " seconds.") + reverse_src_mode = 'readWrite' + reverse_dst_mode = 'replicationTarget' + logging.info( + "All SRC and DST volumes are in consistent access mode. SRC: " + + str(src_vol) + + " DST: " + + str(dst_vol) + + ". Proceeding with reversal in " + + str(s) + + " seconds. Press CTRL+C to abort.") + countdown(s) + elif list(set([v['access'] for v in src_vol])) == ['readWrite'] and list(set([v['access'] for v in dst_vol])) == ['replicationTarget']: + logging.warning( + "SRC is currently readWrite, DST is currently replicationTarget. Will reverse direction to make SRC replicationTarget and DST readWrite in " + + str(s) + + " seconds.") + reverse_src_mode = 'replicationTarget' + reverse_dst_mode = 'readWrite' + logging.info( + "All SRC and DST volumes are in consistent access mode. SRC: " + + str(src_vol) + + " DST: " + + str(dst_vol) + + ". Proceeding with reversal in " + + str(s) + + " seconds. Press CTRL+C to abort.") + countdown(s) + else: + logging.error("SRC and DST volumes are not in expected mode. SRC: " + + str(list(set([v['access'] for v in src_vol]))[0]) + + " DST: " + + str(list(set([v['access'] for v in dst_vol]))[0]) + + ". Exiting.") + exit(200) + if args.dry == True or args.dry == 'True' or args.dry == 'true' or args.dry == 'on' or args.dry == 'On' or args.dry == 'ON': + logging.info( + "Dry run on reversal of replication direction is ON. Value: " + str(args.dry) + ".") + print("\n===> Dry run: volume IDs that would be changed to " + + reverse_src_mode + " at SRC:", src_volume_ids) + print("\n===> Dry run: volume IDs that would be changed to " + + reverse_dst_mode + " at DST:", dst_volume_ids) + logging.info( + "DRY RUN on access mode reversal for volume pairs. No changes will be made. Action would change SRC volume ID(s) " + + str(src_volume_ids) + + " to " + + str(reverse_src_mode) + + " and DST volume ID(s) " + + str(dst_volume_ids) + + " to " + + str(reverse_dst_mode) + + ".") + else: + logging.warning( + "Dry run on reversal of replication direction is OFF. Value: " + str(args.dry) + ".") + if len(paired_volumes) < 500: + try: + src['sfe'].modify_volumes( + src_volume_ids, access=reverse_src_mode) + dst['sfe'].modify_volumes( + dst_volume_ids, access=reverse_dst_mode) + logging.info( + "Reversed access mode on SRC and DST. SRC Volume IDs: " + + str(src_volume_ids) + + ".") + except common.ApiServerError as e: + logging.error( + "Failed to reverse volume access mode on SRC and DST volumes. Please check and remedy. Exiting.") + logging.error("Error: " + str(e)) + exit(200) + else: + logging.warning( + "Many volumes found, pause, reversal and resume will be done one by one. Volume count: " + + str( + len(paired_volumes)) + + ".") + for item in paired_volumes: + try: + dst['sfe'].modify_volume_pair( + item['remoteVolumeID'], paused_manual=True) + src['sfe'].modify_volume_pair( + item['localVolumeID'], paused_manual=True) + logging.info("Paused replication on SRC volume ID: " + + str(item['localVolumeID']) + + " and DST volume ID: " + + str(item['remoteVolumeID']) + + ".") + dst['sfe'].modify_volume( + item['remoteVolumeID'], access=reverse_dst_mode) + src['sfe'].modify_volume( + item['localVolumeID'], access=reverse_src_mode) + logging.info("Reversed access mode on SRC volume ID: " + + str(item['localVolumeID']) + + " and DST volume ID: " + + str(item['remoteVolumeID']) + + ".") + dst['sfe'].modify_volume_pair( + item['remoteVolumeID'], paused_manual=False) + src['sfe'].modify_volume_pair( + item['localVolumeID'], paused_manual=False) + logging.info("Unpaused replication on SRC volume ID: " + + str(item['localVolumeID']) + + " and DST volume ID: " + + str(item['remoteVolumeID']) + + ".") + except common.ApiServerError as e: + logging.error( + "Failed to reverse volume access mode on SRC and DST volumes. Please check and remedy. Exiting.") + logging.error("Error: " + str(e)) + exit(200) + return + + +def site(args): + """ + Site-level actions for already paired SolidFire clusters. + """ + if args.detach_site: + detach_site(src, dst) + elif args.set_access: + if args.data is None: + logging.error( + "Access mode must be specified. Use --data 'readWrite' or --data 'replicationTarget'. Exiting.") + exit(300) + else: + volume_access_property = access_type(args.data) + if access_type == 'readWrite': + logging.info("Desired access mode: " + volume_access_property) + elif access_type == 'replicationTarget': + logging.info("Desired access mode: " + volume_access_property) + set_site_volume_access_property(src, dst, volume_access_property) + else: + logging.warning("Site action not recognized") + return + + +def detach_site(src, dst): + """ + Unilaterally remove cluster pairing between SRC and DST at SRC cluster. + + Leaves DST with broken cluster pairing relationship. + """ + print("TODO: Remove cluster pairing at SRC.") + return + + +def set_site_volume_access_property(src: dict, dst: dict, mode: str): + """ + Modify all paired volumes on SRC to readWrite mode. + + Makes no changes on DST cluster. Use volume --reverse to reverse direction of replication (i.e. change mode on both sites). + """ + paired_volumes = list_volume(src, dst, []) + if len(paired_volumes) == 0 or paired_volumes is None or paired_volumes == []: + logging.error("No paired volumes found. Exiting.") + exit(300) + src_volume_ids = [item['localVolumeID'] for item in paired_volumes] + if args.dry == True or args.dry == 'True' or args.dry == 'true' or args.dry == 'on' or args.dry == 'On' or args.dry == 'ON': + logging.info( + "Dry run on unilateral access property change is ON. Value: " + str(args.dry)) + print( + "\n===> Dry run: SRC volume IDs that would be changed to " + + mode + + " : " + + str(src_volume_ids) + + ".") + logging.info( + "DRY RUN on unilateral access property change for volumes at SRC. No changes will be made. Action would change SRC volume ID(s) " + + str(src_volume_ids) + + " to " + + str(mode) + + " and ignore mode of DST volumes.") + else: + logging.warning( + "Dry run on unilateral access property change for volumes at SRC is OFF. Value: " + + str( + args.dry) + + ".") + if len(src_volume_ids) < 500: + try: + src['sfe'].modify_volumes(src_volume_ids, access=mode) + logging.info( + "Set volume access mode on SRC volumes " + + str(src_volume_ids) + + " to " + + str(mode) + + ".") + except BaseException: + logging.error( + "Error modifying volume access mode on SRC volumes. This causes mismatch and may prevent storage access on one or more volumes. Exiting.") + exit(300) + else: + logging.warning( + "Over 500 volumes found. Pause, reversal and resume will be done one by one. Volume count: " + + str( + len(src_volume_ids)) + + ".") + for item in src_volume_ids: + try: + src['sfe'].modify_volume(item, access=mode) + logging.info( + "Set volume access on SRC volume ID: " + + str(item) + + " to " + + str(mode) + + ".") + except BaseException: + logging.error( + "Error modifying volume access mode on SRC volume ID: " + + str(item) + + ". This causes mismatch and may prevent storage access on one or more volumes. Exiting.") + exit(300) + return + + +def countdown(s: int): + """ + Countdown timer for s seconds. + """ + for i in range(s, 0, -1): + print(i) + time.sleep(1) + return + + +def data_type(s): + if s == '': + return [] + try: + return [tuple(map(int, item.split(','))) for item in s.split(';')] + except BaseException: + logging.error( + "Pairs must be a semi-colon-separated list of comma-separated items (e.g. '1,51' or '1,51;2,52'). Exiting.") + exit(4) + + +def account_data(s): + try: + return [tuple(map(int, item.split(','))) for item in s.split(';')] + except BaseException: + logging.error( + "Account data must be a semi-colon-separated list of comma-separated items (e.g. '1,8;333,444'). Exiting.") + exit(4) + + +def account_volume_data(s): + try: + try: + s = s.split(';') + return tuple(map(int, s[0].split(','))), [int(i) + for i in s[1].split(',')] + except BaseException: + logging.error( + "Account IDs from SRC and DST must be semi-colon-separated from list of one or more comma-separated volume IDs (e.g. '1,8;330,331,332'). Exiting.") + exit(4) + except BaseException: + logging.error( + "Account data must be a semi-colon-separated list of comma-separated items (e.g. '1,8;333,444'). Exiting.") + exit(4) + + +def access_type(s: str) -> str: + if s == 'readwrite' or s == 'readWrite': + return 'readWrite' + elif s == 'replicationTarget' or s == 'replicationtarget': + return 'replicationTarget' + else: + logging.error( + "Volume access property must be one of 'readWrite' or 'replicationTarget', not " + + s + + ". Exiting.") + exit(4) + + +def replication_data(s: str) -> list: + data = [None, None] + s = s.split(';') + if s[0].lower() not in ['sync', 'async', 'snapshotsonly']: + logging.error( + "Replication mode must be one of 'Sync', 'Async', or 'SnapshotsOnly'. Exiting.") + exit(4) + else: + data[0] = s[0] + if s[1] == '': + data[1] = [] + logging.warning( + "Will change all volumes to specified replication mode.") + try: + data[1] = [int(i) for i in s[1].split(',')] + except BaseException: + logging.error( + "Volume ID(s) must be a one or more integers following the first semicolon after the replication mode string, e.g. --data 'Async;55'. Exiting.") + exit(4) + if data[0] == 'Sync' or data[0] == 'sync': + logging.warning("Replication mode set to Sync.") + return ['Sync', data[1]] + elif data[0] == 'Async' or data[0] == 'async': + logging.info("Desired replication is Async.") + return ['Async', data[1]] + elif data[0] == 'SnapshotsOnly' or data[0] == 'snapshotsonly': + logging.info("Desired replication is SnapshotsOnly.") + return ['SnapshotsOnly', data[1]] + else: + logging.error( + "Replication mode must be one of 'Sync', 'Async', or 'SnapshotsOnly', followed by a comma and one or more SRC volume IDs, e.g. 'Async;55,56'. Exiting.") + exit(4) + + +def replication_state(s: str) -> str: + if s.lower() not in ['pause', 'paused', 'resume', + 'pausedmanual', 'resume', 'resumed']: + logging.error( + "Replication state 'pausedManual=True' is represented with 'pause' or 'resume'. Use --data 'pause'|'resume'. Exiting.") + exit(4) + if s == 'pause' or s == 'pausedManual' or s == 'Pause': + return 'pause' + elif s == 'resume' or s == 'Resume': + return 'resume' + else: + logging.error("Replication mode must be one of 'pause', 'resume'.") + + +def snapshot_data(s: str) -> list: + s = s.split(';') + try: + if int(s[0]) < 1 or int(s[0]) > 720: + logging.error( + "Snapshot expiration time must be between 1h and 720h. Exiting.") + exit(4) + else: + return [int(s[0]), s[1]] + except BaseException: + logging.error( + "Snapshot data must be a semi-colon-separated list of integer and string (e.g. --data '168;my_snapshot'). Exiting.") + exit(1) + + +def report_data(s): + # TODO: presently not in use + pass + + +global src, dst + +parser = argparse.ArgumentParser() + +parser.add_argument( + '--dry', + type=str, + default='off', + help='Dry run mode. It is NOT available for all actions, so don not make the assumption that with --dry any action will be zero impact. Enable with --dry on. Default: off.') +parser.add_argument( + '--tlsv', + type=int, + default=None, + help='Accept only verifiable TLS certificate when working with SolidFire cluster(s) with --tlsv 1. Default: 0.') +parser.add_argument( + '--src', + default=os.environ.get( + 'SRC', + ''), + help='Source cluster: MVIP, username, password as a dictionary in Bash string representation: --src "{ \'mvip\': \'10.1.1.1\', \'username\':\'admin\', \'password\':\'*\'}".') +parser.add_argument( + '--dst', + default=os.environ.get( + 'DST', + ''), + help='Destination cluster: MVIP, username, password as a dictionary in Bash string representation: --src "{ \'mvip\': \'10.2.2.2\', \'username\':\'admin\', \'password\':\'*\'}".') + +subparsers = parser.add_subparsers() + +cluster_parser = subparsers.add_parser('cluster') +cluster_parser.add_argument_group('cluster') +cluster_parser.add_argument( + '--data', + default='', + help='Optional data input for selected cluster actions (where indicated in site action help). Not all cluster actions require or accept it.') + +cluster_action = cluster_parser.add_mutually_exclusive_group(required=True) +cluster_action.add_argument( + '--list', + action='store_true', + help='List cluster pairing between SRC and DST clusters. Requires paired SRC and DST clusters. Ignores --data because each cluster params are always available from --src, --dst.') +cluster_action.add_argument( + '--pair', + action='store_true', + required=False, + help='Pair SRC and DST for replication. Requires SRC and DST without existing pairing relationships. Multi-relationships are not supported. Ignores --data.') +cluster_action.add_argument( + '--unpair', + action='store_true', + required=False, + help='Unpair SRC and DST clusters. Requires SRC and DST in exclusive, mutual pairing relationship and no volume pairings. Ignores --data.') + +cluster_parser.set_defaults(func=cluster) + +volume_parser = subparsers.add_parser('volume') +volume_parser.add_argument_group('volume') +volume_parser.add_argument( + '--data', + default='', + help='Optional data input for selected volume actions (where indicated in volume action help). Not all volume actions require or accept it.') + +volume_action = volume_parser.add_mutually_exclusive_group(required=True) +volume_action.add_argument( + '--list', + action='store_true', + help='List volumes correctly paired for replication between SRC and DST cluster. Requires paired SRC and DST clusters. Optional --data argument lists specific volume pair(s).') +volume_action.add_argument( + '--pair', + action='store_true', + required=False, + help='Pair volumes for Async replication between SRC and DST clusters. Takes a semicolon-delimited list of volume IDs from SRC and DST in --data (e.g. --data "111,555;112,600"). Requires paired SRC and DST clusters.') +volume_action.add_argument( + '--unpair', + action='store_true', + required=False, + help='Unpair volumes paired for replication between SRC and DST clusters. Requires paired SRC and DST clusters and at least one volume pairing relationship. Takes --data argument with only one pair at a time. Ex: --data "111,555".') +volume_action.add_argument( + '--prime-dst', + action='store_true', + required=False, + help='Prepare DST cluster for replication by creating volumes from SRC. Creates volumes with identical properties (name, size, etc.) on DST. . Takes one 2-element list of account IDs (SRC account ID,DST account ID) and another of volume IDs on SRC. Ex: --data "1,22;444,555".') +volume_action.add_argument( + '--mismatched', + action='store_true', + required=False, + help='Check for and report any volumes in asymmetric pair relationships (one-sided and volume size mismatch). Requires paired SRC and DST clusters. Ignores --data.') +volume_action.add_argument( + '--reverse', + action='store_true', + required=False, + help='Reverse direction of volume replication. You should stop workloads using current SRC (readWrite) volumes before using this action as SRC side will be flipped to replicationTarget and SRC iSCSI clients disconnected. Ignores --data.') +volume_action.add_argument( + '--snapshot', + action='store_true', + required=False, + help='Take crash-consistent snapshot of all volumes paired for replication at SRC. Use --data to specify non-default retention (1-720) in hours and snapshot name (<16b string). Ex: --data "24;apple". Default: "168;long168h-snap".') +volume_action.add_argument( + '--set-mode', + action='store_true', + required=False, + help='Change replication mode on specific SRC volumes ID(s) in active replication relationship to DST. Mode: Sync, Async, SnapshotsOnly. Example: --data "SnapshotsOnly;101,102,103"). Requires existing cluster and volume pairing relationships between SRC and DST. WARNING: SnapshotsOnly replicates nothing if no snapshots are enabled for remote replication (create_snapshot(enable_remote_replication=True)).') +volume_action.add_argument( + '--set-status', + action='store_true', + required=False, + help='Set all SRC relationships to resume or pause state in --data. Ex: --data "pause" sets all SRC volume relationships to manual pause. --data "resume" resumes paused replication at SRC. (WARNING: applies to SRC, not DST).') +volume_action.add_argument( + '--report', + action='store_true', + required=False, + help='TODO: Report volume pairing relationships between SRC and DST, including mismatched and bidirectional. Requires paired SRC and DST clusters. Optional --data arguments: all, SRC, DST (default: all).') + +volume_parser.set_defaults(func=volume) + +site_parser = subparsers.add_parser('site') +site_parser.add_argument_group('site') +site_parser.add_argument( + '--data', + default='', + help='Optional data input for selected site actions (where indicated in site action help). Not all site actions require or accept it.') +site_action = site_parser.add_mutually_exclusive_group(required=True) +site_action.add_argument( + '--detach-site', + action='store_true', + help='Remove replication relationships on SRC cluster for the purpose of taking over when DST is unreachable. Requires paired SRC and DST clusters. WARNING: there is no way to re-attach. Disconnected cluster- and volume-relationships need to be removed and re-created.') +site_action.add_argument( + '--set-access', + action='store_true', + required=False, + help='Change access property on all SRC volumes with replication relationship to DST. Options: readWrite, replicationTarget (ex: --data "readWrite"). Requires existing cluster and volume pairing relationships between SRC and DST. WARNING: may stop/interrupt DST->SRC or SRC->DST replication.') +site_parser.set_defaults(func=site) + +args = parser.parse_args() + +if args.src is not None or args.dst is not None: + try: + src = ast.literal_eval(args.src) + dst = ast.literal_eval(args.dst) + except BaseException: + logging.error( + "Unable to parse SRC or DST. Review help and try again. Exiting.") + exit(1) + +if src['password'] == '': + src['password'] = getpass("Enter password for SRC cluster (not logged): ") +if dst['password'] == '': + dst['password'] = getpass("Enter password for DST cluster (not logged): ") + +if args.tlsv == 1: + src['tlsv'] = True + dst['tlsv'] = True + logging.info("TLS verification is ON.") +else: + src['tlsv'] = False + dst['tlsv'] = False + logging.info("TLS verification is OFF.") +try: + src['sfe'] = ElementFactory.create( + src['mvip'], + src['username'], + src['password'], + verify_ssl=bool( + src['tlsv']), + print_ascii_art=False) + dst['sfe'] = ElementFactory.create( + dst['mvip'], + dst['username'], + dst['password'], + verify_ssl=bool( + src['tlsv']), + print_ascii_art=False) +except common.SdkOperationError as e: + logging.error(e) + exit(2) +except Exception as e: + logging.error("Error: " + str(e)) + exit(2) +try: + src['clusterName'] = src['sfe'].get_cluster_info().to_json()[ + 'clusterInfo']['name'] + dst['clusterName'] = dst['sfe'].get_cluster_info().to_json()[ + 'clusterInfo']['name'] + logging.info( + "SRC cluster name: " + + src['clusterName'] + + " and DST cluster name: " + + dst['clusterName'] + + " obtained.") +except common.ApiServerError as e: + logging.error("Error: " + str(e)) + exit(3) +except Exception as e: + logging.error( + "Error, possibly due to one or both clusters being unreachable: " + + str(e)) + exit(3) + +args.func(args)