Skip to content

Commit

Permalink
Merge pull request #28 from lilab-bcb/gcloud-storage
Browse files Browse the repository at this point in the history
Replace gsutil by gcloud storage
  • Loading branch information
yihming authored Feb 19, 2025
2 parents db262a1 + e8a0897 commit d03706a
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 68 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
name: test
on:
push:
branches: [ master ]
branches: [ main ]
pull_request:
branches: [ master ]
branches: [ main ]

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
python-version: [ 3.9]
python-version: [ '3.9', '3.10', '3.11', '3.12' ]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
9 changes: 5 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ repos:
# - id: double-quote-string-fixer # for single quotes: uncomment and add black config “skip-string-normalization”
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.6
hooks:
- id: docformatter
args: ["--wrap-summaries=100", "--wrap-descriptions=100"]
- id: ruff
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
Expand Down
37 changes: 13 additions & 24 deletions strato/backends/_gcp.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
import shutil
from subprocess import check_call
from subprocess import DEVNULL, check_call


class GCPBackend:
def __init__(self):
if shutil.which("gsutil") is None:
if shutil.which("gcloud") is None:
raise Exception("google-cloud-sdk is not installed!")
self._backend = "gcp"
self._call_prefix = [
"gsutil",
"-q",
"-o",
"GSUtil:parallel_composite_upload_threshold=150M",
"gcloud",
"storage",
"--no-user-output-enabled",
# "-o",
# "GSUtil:parallel_composite_upload_threshold=150M",
]

def copy(self, recursive, parallel, ionice, filenames, quiet, dryrun):
def copy(self, recursive, ionice, filenames, quiet, dryrun):
# FIXME make local target directories to mimic behavior of aws and local backends
call_args = (
["ionice", "-c", "2", "-n", "7"]
if ionice and (shutil.which("ionice") is not None)
else []
)
call_args += self._call_prefix
if parallel:
call_args.append("-m")
call_args.append("cp")
if recursive:
call_args.append("-r")
Expand All @@ -33,7 +32,7 @@ def copy(self, recursive, parallel, ionice, filenames, quiet, dryrun):
if not dryrun:
check_call(call_args)

def sync(self, parallel, ionice, source, target, quiet, dryrun):
def sync(self, ionice, source, target, quiet, dryrun):
# If target folder is local.
if len(target.split("://")) == 1:
import os
Expand All @@ -47,18 +46,14 @@ def sync(self, parallel, ionice, source, target, quiet, dryrun):
else []
)
call_args += self._call_prefix
if parallel:
call_args.append("-m")
call_args.extend(["rsync", "-d", "-r", source, target])
call_args.extend(["rsync", "--delete-unmatched-destination-objects", "-r", source, target])
if not quiet or dryrun:
print(" ".join(call_args))
if not dryrun:
check_call(call_args)

def delete(self, recursive, parallel, filenames, quiet, dryrun):
def delete(self, recursive, filenames, quiet, dryrun):
call_args = self._call_prefix.copy()
if parallel:
call_args.append("-m")
call_args.append("rm")
if recursive:
call_args.append("-r")
Expand All @@ -70,11 +65,5 @@ def delete(self, recursive, parallel, filenames, quiet, dryrun):

def stat(self, filename):
assert filename.startswith("gs://"), "Must be a GS URI!"
is_folder = True if filename[-1] == "/" else False

if is_folder:
call_args = ["gsutil", "-q", "stat", filename + "*"]
else:
call_args = ["gsutil", "-q", "stat", filename]

check_call(call_args)
call_args = ["gcloud", "storage", "ls", filename]
check_call(call_args, stdout=DEVNULL)
3 changes: 2 additions & 1 deletion strato/backends/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def sync(self, ionice, source, target, quiet, dryrun):
if shutil.which("rsync") is None:
raise Exception("rsync is not installed!")
# target = os.path.dirname(target)
os.makedirs(target, exist_ok=True)
if not dryrun:
os.makedirs(target, exist_ok=True)
call_args = (
["ionice", "-c", "2", "-n", "7"]
if ionice and (shutil.which("ionice")) is not None
Expand Down
16 changes: 9 additions & 7 deletions strato/commands/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
strato cp s3://my-bucket/source_folder/*.zip /target_folder/
# GCP upload
strato cp -m -r --ionice file1 folder2 gs://my-bucket/target_folder/
strato cp -r --ionice file1 folder2 gs://my-bucket/target_folder/
# GCP download
mkdir /target_folder
strato cp -m gs://my-bucket/source_folder/*.zip /target_folder/
strato cp gs://my-bucket/source_folder/*.zip /target_folder/
# On local machine
strato cp -r file1 folder2 /target_folder/
"""


def copy_files(recursive, parallel, ionice, filenames, profile, quiet, dryrun):
def copy_files(recursive, ionice, filenames, profile, quiet, dryrun):
backend = get_backend(filenames)

if backend == "aws":
Expand All @@ -32,7 +32,7 @@ def copy_files(recursive, parallel, ionice, filenames, profile, quiet, dryrun):
from strato.backends import GCPBackend

be = GCPBackend()
be.copy(recursive, parallel, ionice, filenames, quiet, dryrun)
be.copy(recursive, ionice, filenames, quiet, dryrun)
else:
from strato.backends import LocalBackend

Expand All @@ -57,7 +57,7 @@ def main(argsv):
"-m",
dest="parallel",
action="store_true",
help="Run operations in parallel. Only available for GCP backend.",
help=argparse.SUPPRESS,
)
parser.add_argument(
"--ionice",
Expand All @@ -73,7 +73,10 @@ def main(argsv):
help="AWS profile. Only works for aws backend, and use the default profile if not provided.",
)
parser.add_argument(
"--quiet", dest="quiet", action="store_true", help="Hide the underlying command."
"--quiet",
dest="quiet",
action="store_true",
help="Hide the underlying command.",
)
parser.add_argument(
"--dryrun",
Expand All @@ -88,7 +91,6 @@ def main(argsv):
args = parser.parse_args(argsv)
copy_files(
args.recursive,
args.parallel,
args.ionice,
args.files,
args.profile,
Expand Down
15 changes: 9 additions & 6 deletions strato/commands/rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

example_text = """Examples:
strato rm s3://my-bucket/file1 s3://my-bucket/folder2/
strato rm -m gs://my-bucket/file1 gs://my-bucket/folder2 gs://my-bucket/folder3/*.zip
strato rm gs://my-bucket/file1 gs://my-bucket/folder2 gs://my-bucket/folder3/*.zip
strato rm file1 folder2
"""


def delete_files(recursive, parallel, filenames, profile, quiet, dryrun):
def delete_files(recursive, filenames, profile, quiet, dryrun):
backend = get_backend(filenames)

if backend == "aws":
Expand All @@ -22,7 +22,7 @@ def delete_files(recursive, parallel, filenames, profile, quiet, dryrun):
from strato.backends import GCPBackend

be = GCPBackend()
be.delete(recursive, parallel, filenames, quiet, dryrun)
be.delete(recursive, filenames, quiet, dryrun)
else:
from strato.backends import LocalBackend

Expand Down Expand Up @@ -54,10 +54,13 @@ def main(argsv):
"-m",
dest="parallel",
action="store_true",
help="Run operations in parallel. Only available for GCP backend.",
help=argparse.SUPPRESS,
)
parser.add_argument(
"--quiet", dest="quiet", action="store_true", help="Hide the underlying command."
"--quiet",
dest="quiet",
action="store_true",
help="Hide the underlying command.",
)
parser.add_argument(
"--dryrun",
Expand All @@ -69,4 +72,4 @@ def main(argsv):
)

args = parser.parse_args(argsv)
delete_files(args.recursive, args.parallel, args.files, args.profile, args.quiet, args.dryrun)
delete_files(args.recursive, args.files, args.profile, args.quiet, args.dryrun)
15 changes: 9 additions & 6 deletions strato/commands/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

example_text = """Examples:
strato sync source_folder s3://my-bucket/target_folder
strato sync -m --ionice source_folder gs://my-bucket/target_folder
strato sync --ionice source_folder gs://my-bucket/target_folder
strato sync source_folder target_folder
"""


def synchronize_folders(parallel, ionice, source, target, profile, quiet, dryrun):
def synchronize_folders(ionice, source, target, profile, quiet, dryrun):
backend = get_backend([source, target])

if backend == "aws":
Expand All @@ -22,7 +22,7 @@ def synchronize_folders(parallel, ionice, source, target, profile, quiet, dryrun
from strato.backends import GCPBackend

be = GCPBackend()
be.sync(parallel, ionice, source, target, quiet, dryrun)
be.sync(ionice, source, target, quiet, dryrun)
else:
from strato.backends import LocalBackend

Expand All @@ -40,7 +40,7 @@ def main(argsv):
"-m",
dest="parallel",
action="store_true",
help="Run operations in parallel. Only available for GCP backend.",
help=argparse.SUPPRESS,
)
parser.add_argument(
"--ionice",
Expand All @@ -56,7 +56,10 @@ def main(argsv):
help="AWS profile. Only works for aws backend, and use the default profile if not provided.",
)
parser.add_argument(
"--quiet", dest="quiet", action="store_true", help="Hide the underlying command."
"--quiet",
dest="quiet",
action="store_true",
help="Hide the underlying command.",
)
parser.add_argument(
"--dryrun",
Expand All @@ -68,5 +71,5 @@ def main(argsv):

args = parser.parse_args(argsv)
synchronize_folders(
args.parallel, args.ionice, args.source, args.target, args.profile, args.quiet, args.dryrun
args.ionice, args.source, args.target, args.profile, args.quiet, args.dryrun
)
2 changes: 1 addition & 1 deletion strato/tests/helpers.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
gsutil = "gsutil -q -o GSUtil:parallel_composite_upload_threshold=150M"
gcloud = "gcloud storage --no-user-output-enabled"
8 changes: 4 additions & 4 deletions strato/tests/test_cp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from strato.commands import cp
from strato.tests.helpers import gsutil
from strato.tests.helpers import gcloud


def test_cp_file_aws(capsys):
Expand All @@ -25,17 +25,17 @@ def test_cp_dir_aws(capsys, trailing_slash):

def test_cp_file_gcp(capsys):
cp.main(["file1", "gs://foo/bar/", "--dryrun"])
assert gsutil + " cp file1 gs://foo/bar/\n" == capsys.readouterr().out
assert gcloud + " cp file1 gs://foo/bar/\n" == capsys.readouterr().out


def test_cp_dir_gcp(capsys):
cp.main(["dir1", "gs://foo/bar", "-r", "--dryrun"])
assert gsutil + " cp -r dir1 gs://foo/bar\n" == capsys.readouterr().out
assert gcloud + " cp -r dir1 gs://foo/bar\n" == capsys.readouterr().out


def test_cp_file_local(capsys):
cp.main(["file1", "/bar/foo", "--dryrun"])
assert "cp file1 /bar/foo\n" == capsys.readouterr().out
assert "mkdir -p /bar\ncp file1 /bar/foo\n" == capsys.readouterr().out


def test_cp_dir_local(capsys):
Expand Down
6 changes: 3 additions & 3 deletions strato/tests/test_rm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from strato.commands import rm
from strato.tests.helpers import gsutil
from strato.tests.helpers import gcloud


def test_rm_aws(capsys):
Expand All @@ -16,12 +16,12 @@ def test_rm_aws_recursive(capsys):

def test_rm_gcp(capsys):
rm.main(["gs://foo/bar/", "--dryrun"])
assert gsutil + " rm gs://foo/bar/\n" == capsys.readouterr().out
assert gcloud + " rm gs://foo/bar/\n" == capsys.readouterr().out


def test_rm_gcp_recursive(capsys):
rm.main(["gs://foo/bar/", "--dryrun", "--recursive"])
assert gsutil + " rm -r gs://foo/bar/\n" == capsys.readouterr().out
assert gcloud + " rm -r gs://foo/bar/\n" == capsys.readouterr().out


def test_rm_local(capsys):
Expand Down
17 changes: 10 additions & 7 deletions strato/tests/test_sync.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
from strato.commands import sync
from strato.tests.helpers import gsutil
from strato.tests.helpers import gcloud


def test_sync_aws(capsys):
sync.main(["file1", "s3://foo/bar/", "--dryrun"])
sync.main(["folder1", "s3://foo/bar/", "--dryrun"])
assert (
"aws s3 sync --delete --only-show-errors file1 s3://foo/bar/\n" == capsys.readouterr().out
"aws s3 sync --delete --only-show-errors folder1 s3://foo/bar/\n" == capsys.readouterr().out
)


def test_sync_gcp(capsys):
sync.main(["file1", "gs://foo/bar/", "--dryrun"])
assert gsutil + " rsync -d -r file1 gs://foo/bar/\n" == capsys.readouterr().out
sync.main(["folder1", "gs://foo/bar/", "--dryrun"])
assert (
gcloud + " rsync --delete-unmatched-destination-objects -r folder1 gs://foo/bar/\n"
== capsys.readouterr().out
)


def test_sync_local(capsys):
sync.main(["file1", "/bar/foo", "--dryrun"])
assert "rsync -r --delete file1 /bar\n" == capsys.readouterr().out
sync.main(["folder1", "/bar/foo", "--dryrun"])
assert "rsync -r --delete folder1/ /bar/foo/\n" == capsys.readouterr().out

0 comments on commit d03706a

Please sign in to comment.