Skip to content

Commit

Permalink
fix rclone bin
Browse files Browse the repository at this point in the history
  • Loading branch information
C-Loftus committed Jan 28, 2025
1 parent 1d047c7 commit 6fa37bf
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ rclone.conf

tmp*/
storage/

rclone-current*.zip
rclone.zip
5 changes: 0 additions & 5 deletions Docker/dagster/Dockerfile_user_code
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ FROM python:3.10-slim
COPY requirements.txt user_code_requirements.txt
RUN pip install -r user_code_requirements.txt

# install rclone
RUN apt-get -y update; apt-get -y install curl unzip
COPY Docker/dagster/rclone.sh rclone.sh
RUN bash rclone.sh

# configs and runtime code
WORKDIR /opt/dagster/app
COPY userCode/ /opt/dagster/app/userCode
Expand Down
9 changes: 0 additions & 9 deletions Docker/dagster/rclone.sh

This file was deleted.

96 changes: 93 additions & 3 deletions userCode/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import asyncio
from datetime import datetime
import os
import platform
import shutil
import subprocess
from typing import Optional, Tuple
import zipfile
from aiohttp import ClientSession, ClientTimeout
from bs4 import BeautifulSoup
from dagster import (
Expand Down Expand Up @@ -73,7 +77,93 @@ def nabu_config():
f.write(templated_data)


def ensure_local_bin_in_path():
"""Ensure ~/.local/bin is in the PATH."""
local_bin = os.path.expanduser("~/.local/bin")
if local_bin not in os.environ["PATH"].split(os.pathsep):
os.environ["PATH"] += os.pathsep + local_bin
return local_bin


@asset
def rclone_binary():
"""Download the rclone binary to a user-writable location in the PATH."""
local_bin = ensure_local_bin_in_path()
os.makedirs(local_bin, exist_ok=True)

# Check if rclone is already installed in ~/.local/bin
rclone_path = os.path.join(local_bin, "rclone")
if os.path.isfile(rclone_path):
print(f"Rclone is already installed at {rclone_path}.")
return

# Determine the platform
system = platform.system().lower()
arch = platform.machine().lower()

# Map system and architecture to the appropriate Rclone download URL
if system == "linux" and arch in ("x86_64", "amd64"):
download_url = "https://downloads.rclone.org/rclone-current-linux-amd64.zip"
elif system == "darwin" and arch in ("arm64", "aarch64"):
download_url = "https://downloads.rclone.org/rclone-current-osx-arm64.zip"
else:
raise SystemError(
"Unsupported system or architecture: {} on {}".format(arch, system)
)

# Download the file
def download_file(url, dest):
print(f"Downloading Rclone from {url}...")
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(dest, "wb") as f:
shutil.copyfileobj(response.raw, f)
print("Download complete.")
else:
raise RuntimeError(
f"Failed to download file. HTTP Status Code: {response.status_code}"
)

zip_file = "rclone.zip"
download_file(download_url, zip_file)

# Extract the downloaded zip file
with zipfile.ZipFile(zip_file, "r") as zip_ref:
print("Extracting Rclone...")
zip_ref.extractall("rclone_extracted")

# Change to the extracted directory
extracted_dir = next(
(
d
for d in os.listdir("rclone_extracted")
if os.path.isdir(os.path.join("rclone_extracted", d))
),
None,
)
if not extracted_dir:
raise FileNotFoundError("Extracted Rclone directory not found.")

extracted_path = os.path.join("rclone_extracted", extracted_dir)

# Copy the Rclone binary to ~/.local/bin
rclone_binary = os.path.join(extracted_path, "rclone")
if not os.path.isfile(rclone_binary):
raise FileNotFoundError("Rclone binary not found in extracted directory.")

print(f"Installing Rclone to {local_bin}...")
shutil.copy(rclone_binary, rclone_path)
os.chmod(rclone_path, 0o755) # Set executable permissions

print("Verifying Rclone installation...")
subprocess.run(["rclone", "version"], check=True)

os.remove(zip_file)
shutil.rmtree("rclone_extracted")
print("Installation complete.")


@asset(deps=[rclone_binary])
def rclone_config() -> str:
"""Create the rclone config by templating the rclone.conf.j2 template"""
get_dagster_logger().info("Creating rclone config")
Expand Down Expand Up @@ -101,9 +191,9 @@ def gleaner_config(context: AssetExecutionContext):
sources = []
names: set[str] = set()

assert (
len(Lines) > 0
), f"No sitemaps found in sitemap index {REMOTE_GLEANER_SITEMAP}"
assert len(Lines) > 0, (
f"No sitemaps found in sitemap index {REMOTE_GLEANER_SITEMAP}"
)

for line in Lines:
basename = REMOTE_GLEANER_SITEMAP.removesuffix(".xml")
Expand Down

0 comments on commit 6fa37bf

Please sign in to comment.