From e206a0cbfa54e13fa9008d7217c1843ab80cb788 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 9 Aug 2024 08:22:02 -0600 Subject: [PATCH] chore: Prepare for 40.0.0 release (#801) * use new changelog script from DataFusion * Update changelog --- CHANGELOG.md | 35 ++++++++++ dev/release/README.md | 2 +- dev/release/generate-changelog.py | 111 +++++++++++++++++++++--------- requirements.in | 1 + 4 files changed, 115 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32a74d82..305af572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,41 @@ # DataFusion Python Changelog +## [40.0.0](https://github.com/apache/datafusion-python/tree/40.0.0) (2024-08-09) + +This release consists of 18 commits from 4 contributors. See credits at the end of this changelog for more information. + +- Update changelog for 39.0.0 [#742](https://github.com/apache/datafusion-python/pull/742) (andygrove) +- build(deps): bump uuid from 1.8.0 to 1.9.1 [#744](https://github.com/apache/datafusion-python/pull/744) (dependabot[bot]) +- build(deps): bump mimalloc from 0.1.42 to 0.1.43 [#745](https://github.com/apache/datafusion-python/pull/745) (dependabot[bot]) +- build(deps): bump syn from 2.0.67 to 2.0.68 [#746](https://github.com/apache/datafusion-python/pull/746) (dependabot[bot]) +- Tsaucer/find window fn [#747](https://github.com/apache/datafusion-python/pull/747) (timsaucer) +- Python wrapper classes for all user interfaces [#750](https://github.com/apache/datafusion-python/pull/750) (timsaucer) +- Expose array sort [#764](https://github.com/apache/datafusion-python/pull/764) (timsaucer) +- Upgrade protobuf and remove GH Action googletest-installer [#773](https://github.com/apache/datafusion-python/pull/773) (Michael-J-Ward) +- Upgrade Datafusion 40 [#771](https://github.com/apache/datafusion-python/pull/771) (Michael-J-Ward) +- Bugfix: Calling count with None arguments [#768](https://github.com/apache/datafusion-python/pull/768) (timsaucer) +- Add in user example that compares a two different approaches to UDFs [#770](https://github.com/apache/datafusion-python/pull/770) (timsaucer) +- Add missing exports for wrapper modules [#782](https://github.com/apache/datafusion-python/pull/782) (timsaucer) +- Add PyExpr to_variant conversions [#793](https://github.com/apache/datafusion-python/pull/793) (Michael-J-Ward) +- Add missing expressions to wrapper export [#795](https://github.com/apache/datafusion-python/pull/795) (timsaucer) +- Doc/cross reference [#791](https://github.com/apache/datafusion-python/pull/791) (timsaucer) +- Re-Enable `num_centroids` to `approx_percentile_cont` [#798](https://github.com/apache/datafusion-python/pull/798) (Michael-J-Ward) +- UDAF process all state variables [#799](https://github.com/apache/datafusion-python/pull/799) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 9 Tim Saucer + 4 Michael J Ward + 3 dependabot[bot] + 2 Andy Grove +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + ## [39.0.0](https://github.com/apache/datafusion-python/tree/39.0.0) (2024-06-25) **Merged pull requests:** diff --git a/dev/release/README.md b/dev/release/README.md index c4372c83..93c2f97b 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -63,7 +63,7 @@ We maintain a `CHANGELOG.md` so our users know what has been changed between rel The changelog is generated using a Python script: ```bash -$ GITHUB_TOKEN= ./dev/release/generate-changelog.py apache/datafusion-python 24.0.0 HEAD > dev/changelog/25.0.0.md +$ GITHUB_TOKEN= ./dev/release/generate-changelog.py 24.0.0 HEAD 25.0.0 > dev/changelog/25.0.0.md ``` This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py index af097ce9..5645d2f7 100755 --- a/dev/release/generate-changelog.py +++ b/dev/release/generate-changelog.py @@ -20,23 +20,20 @@ from github import Github import os import re - +import subprocess def print_pulls(repo_name, title, pulls): - if len(pulls) > 0: + if len(pulls) > 0: print("**{}:**".format(title)) print() - for pull, commit in pulls: + for (pull, commit) in pulls: url = "https://github.com/{}/pull/{}".format(repo_name, pull.number) - print( - "- {} [#{}]({}) ({})".format( - pull.title, pull.number, url, commit.author.login - ) - ) + print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login)) print() -def generate_changelog(repo, repo_name, tag1, tag2): +def generate_changelog(repo, repo_name, tag1, tag2, version): + # get a list of commits between two tags print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr) comparison = repo.compare(tag1, tag2) @@ -55,45 +52,95 @@ def generate_changelog(repo, repo_name, tag1, tag2): all_pulls.append((pull, commit)) # we split the pulls into categories - # TODO: make categories configurable breaking = [] bugs = [] docs = [] enhancements = [] + performance = [] + other = [] # categorize the pull requests based on GitHub labels print("Categorizing pull requests", file=sys.stderr) - for pull, commit in all_pulls: + for (pull, commit) in all_pulls: + # see if PR title uses Conventional Commits - cc_type = "" - # cc_scope = '' - cc_breaking = "" - parts = re.findall(r"^([a-z]+)(\([a-z]+\))?(!)?:", pull.title) + cc_type = '' + cc_scope = '' + cc_breaking = '' + parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title) if len(parts) == 1: parts_tuple = parts[0] - cc_type = parts_tuple[0] # fix, feat, docs, chore - # cc_scope = parts_tuple[1] # component within project - cc_breaking = parts_tuple[2] == "!" + cc_type = parts_tuple[0] # fix, feat, docs, chore + cc_scope = parts_tuple[1] # component within project + cc_breaking = parts_tuple[2] == '!' labels = [label.name for label in pull.labels] - # print(pull.number, labels, parts, file=sys.stderr) - if "api change" in labels or cc_breaking: + if 'api change' in labels or cc_breaking: breaking.append((pull, commit)) - elif "bug" in labels or cc_type == "fix": + elif 'bug' in labels or cc_type == 'fix': bugs.append((pull, commit)) - elif "enhancement" in labels or cc_type == "feat": + elif 'performance' in labels or cc_type == 'perf': + performance.append((pull, commit)) + elif 'enhancement' in labels or cc_type == 'feat': enhancements.append((pull, commit)) - elif "documentation" in labels or cc_type == "docs": + elif 'documentation' in labels or cc_type == 'docs' or cc_type == 'doc': docs.append((pull, commit)) + else: + other.append((pull, commit)) # produce the changelog content print("Generating changelog content", file=sys.stderr) + + # ASF header + print("""\n""") + + print(f"# Apache DataFusion Python {version} Changelog\n") + + # get the number of commits + commit_count = subprocess.check_output(f"git log --pretty=oneline {tag1}..{tag2} | wc -l", shell=True, text=True).strip() + + # get number of contributors + contributor_count = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2} | wc -l", shell=True, text=True).strip() + + print(f"This release consists of {commit_count} commits from {contributor_count} contributors. " + f"See credits at the end of this changelog for more information.\n") + print_pulls(repo_name, "Breaking changes", breaking) + print_pulls(repo_name, "Performance related", performance) print_pulls(repo_name, "Implemented enhancements", enhancements) print_pulls(repo_name, "Fixed bugs", bugs) print_pulls(repo_name, "Documentation updates", docs) - print_pulls(repo_name, "Merged pull requests", all_pulls) + print_pulls(repo_name, "Other", other) + # show code contributions + credits = subprocess.check_output(f"git shortlog -sn {tag1}..{tag2}", shell=True, text=True).rstrip() + + print("## Credits\n") + print("Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) " + "per contributor.\n") + print("```") + print(credits) + print("```\n") + + print("Thank you also to everyone who contributed in other ways such as filing issues, reviewing " + "PRs, and providing feedback on this release.\n") def cli(args=None): """Process command line arguments.""" @@ -101,19 +148,17 @@ def cli(args=None): args = sys.argv[1:] parser = argparse.ArgumentParser() - parser.add_argument( - "project", help="The project name e.g. apache/datafusion-python" - ) - parser.add_argument("tag1", help="The previous release tag") - parser.add_argument("tag2", help="The current release tag") + parser.add_argument("tag1", help="The previous commit or tag (e.g. 0.1.0)") + parser.add_argument("tag2", help="The current commit or tag (e.g. HEAD)") + parser.add_argument("version", help="The version number to include in the changelog") args = parser.parse_args() token = os.getenv("GITHUB_TOKEN") + project = "apache/datafusion-python" g = Github(token) - repo = g.get_repo(args.project) - generate_changelog(repo, args.project, args.tag1, args.tag2) - + repo = g.get_repo(project) + generate_changelog(repo, project, args.tag1, args.tag2, args.version) if __name__ == "__main__": - cli() + cli() \ No newline at end of file diff --git a/requirements.in b/requirements.in index b2a1a48d..1b7f6205 100644 --- a/requirements.in +++ b/requirements.in @@ -23,3 +23,4 @@ pytest ruff toml importlib_metadata; python_version < "3.8" +PyGitHub