Skip to content

Commit

Permalink
Fix generation of commit history after moving providers (#43412)
Browse files Browse the repository at this point in the history
Another teething problem after moving providers in #42505. After
moving providers, the history of the current folder in "providers"
only contains changes after the move - it does not include
changes from before the move - and since we always regenerate
the full list of commits - they were missing.

We cannot use `--follow` - because `git log --follow` only
works for single files, not directories, but since the move
was very predictable ("airflow/providers/nnn" ->
"airflow/providers/src/airflow/providers/nnn") we can add the old
path to `git log` command to get both - pre and post move commit
history.
  • Loading branch information
potiuk authored Oct 27, 2024
1 parent 2957ff5 commit 4e3c96f
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
refresh_provider_metadata_with_provider_id,
render_template,
)
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT
from airflow_breeze.utils.run_utils import run_command
from airflow_breeze.utils.shared_options import get_verbose
from airflow_breeze.utils.versions import get_version_tag
Expand Down Expand Up @@ -186,11 +187,14 @@ class PrepareReleaseDocsUserQuitException(Exception):
}


def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = None) -> list[str]:
def _get_git_log_command(
folder_paths: list[Path] | None = None, from_commit: str | None = None, to_commit: str | None = None
) -> list[str]:
"""Get git command to run for the current repo from the current folder.
The current directory should always be the package folder.
:param folder_paths: list of folder paths to check for changes
:param from_commit: if present - base commit from which to start the log from
:param to_commit: if present - final commit which should be the start of the log
:return: git command to run
Expand All @@ -207,7 +211,8 @@ def _get_git_log_command(from_commit: str | None = None, to_commit: str | None =
git_cmd.append(from_commit)
elif to_commit:
raise ValueError("It makes no sense to specify to_commit without from_commit.")
git_cmd.extend(["--", "."])
folders = [folder_path.as_posix() for folder_path in folder_paths] if folder_paths else ["."]
git_cmd.extend(["--", *folders])
return git_cmd


Expand Down Expand Up @@ -307,18 +312,24 @@ def _get_all_changes_for_package(
get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' exist.")
result = run_command(
["git", "rev-parse", current_tag_no_suffix],
cwd=provider_details.source_provider_package_path,
cwd=AIRFLOW_SOURCES_ROOT,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
providers_folder_paths = [
provider_details.source_provider_package_path,
provider_details.old_source_provider_package_path,
]
if not reapply_templates_only and result.returncode == 0:
if get_verbose():
get_console().print(f"[info]The tag {current_tag_no_suffix} exists.")
# The tag already exists
result = run_command(
_get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix),
cwd=provider_details.source_provider_package_path,
_get_git_log_command(
providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix
),
cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
Expand All @@ -333,8 +344,10 @@ def _get_all_changes_for_package(
last_doc_only_hash = doc_only_change_file.read_text().strip()
try:
result = run_command(
_get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash),
cwd=provider_details.source_provider_package_path,
_get_git_log_command(
providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash
),
cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
Expand Down Expand Up @@ -387,8 +400,8 @@ def _get_all_changes_for_package(
for version in provider_details.versions[1:]:
version_tag = get_version_tag(version, provider_package_id)
result = run_command(
_get_git_log_command(next_version_tag, version_tag),
cwd=provider_details.source_provider_package_path,
_get_git_log_command(providers_folder_paths, next_version_tag, version_tag),
cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
Expand All @@ -402,7 +415,7 @@ def _get_all_changes_for_package(
next_version_tag = version_tag
current_version = version
result = run_command(
_get_git_log_command(next_version_tag),
_get_git_log_command(providers_folder_paths, next_version_tag),
cwd=provider_details.source_provider_package_path,
capture_output=True,
text=True,
Expand Down
7 changes: 7 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.path_utils import (
AIRFLOW_OLD_PROVIDERS_DIR,
AIRFLOW_PROVIDERS_NS_PACKAGE,
BREEZE_SOURCES_ROOT,
DOCS_ROOT,
Expand Down Expand Up @@ -75,6 +76,7 @@ class ProviderPackageDetails(NamedTuple):
full_package_name: str
pypi_package_name: str
source_provider_package_path: Path
old_source_provider_package_path: Path
documentation_provider_package_path: Path
changelog_path: Path
provider_description: str
Expand Down Expand Up @@ -385,6 +387,10 @@ def get_source_package_path(provider_id: str) -> Path:
return AIRFLOW_PROVIDERS_NS_PACKAGE.joinpath(*provider_id.split("."))


def get_old_source_package_path(provider_id: str) -> Path:
return AIRFLOW_OLD_PROVIDERS_DIR.joinpath(*provider_id.split("."))


def get_documentation_package_path(provider_id: str) -> Path:
return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', '-')}"

Expand Down Expand Up @@ -515,6 +521,7 @@ def get_provider_details(provider_id: str) -> ProviderPackageDetails:
full_package_name=f"airflow.providers.{provider_id}",
pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.', '-')}",
source_provider_package_path=get_source_package_path(provider_id),
old_source_provider_package_path=get_old_source_package_path(provider_id),
documentation_provider_package_path=get_documentation_package_path(provider_id),
changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst",
provider_description=provider_info["description"],
Expand Down
1 change: 1 addition & 0 deletions dev/breeze/src/airflow_breeze/utils/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def find_airflow_sources_root_to_operate_on() -> Path:
AIRFLOW_SOURCES_ROOT = find_airflow_sources_root_to_operate_on().resolve()
AIRFLOW_WWW_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "www"
AIRFLOW_UI_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "ui"
AIRFLOW_OLD_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
AIRFLOW_PROVIDERS_PROJECT = AIRFLOW_SOURCES_ROOT / "providers"
AIRFLOW_PROVIDERS_SRC = AIRFLOW_PROVIDERS_PROJECT / "src"
AIRFLOW_PROVIDERS_NS_PACKAGE = AIRFLOW_PROVIDERS_SRC / "airflow" / "providers"
Expand Down
30 changes: 25 additions & 5 deletions dev/breeze/tests/test_provider_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import random
import string
from pathlib import Path

import pytest

Expand Down Expand Up @@ -97,28 +98,47 @@ def test_get_version_tag(version: str, provider_id: str, suffix: str, tag: str):


@pytest.mark.parametrize(
"from_commit, to_commit, git_command",
"folder_paths, from_commit, to_commit, git_command",
[
(None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]),
(None, None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]),
(
None,
"from_tag",
None,
["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag", "--", "."],
),
(
None,
"from_tag",
"to_tag",
["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag...to_tag", "--", "."],
),
(
[Path("a"), Path("b")],
"from_tag",
"to_tag",
[
"git",
"log",
"--pretty=format:%H %h %cd %s",
"--date=short",
"from_tag...to_tag",
"--",
"a",
"b",
],
),
],
)
def test_get_git_log_command(from_commit: str | None, to_commit: str | None, git_command: list[str]):
assert _get_git_log_command(from_commit, to_commit) == git_command
def test_get_git_log_command(
folder_paths: list[str] | None, from_commit: str | None, to_commit: str | None, git_command: list[str]
):
assert _get_git_log_command(folder_paths, from_commit, to_commit) == git_command


def test_get_git_log_command_wrong():
with pytest.raises(ValueError, match=r"to_commit without from_commit"):
_get_git_log_command(None, "to_commit")
_get_git_log_command(None, None, "to_commit")


@pytest.mark.parametrize(
Expand Down

0 comments on commit 4e3c96f

Please sign in to comment.