Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing incremental by files, safer version of incremental backup. #383

Merged
merged 3 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ CLI Help output::
log level to use (default: info, possible levels:
debug, info, warning, error, critical)
-i, --incremental incremental backup
--incremental-by-files incremental backup using modified time of files
--starred include JSON output of starred repositories in backup
--all-starred include starred repositories in backup [*]
--watched include JSON output of watched repositories in backup
Expand Down Expand Up @@ -239,6 +240,12 @@ Using (``-i, --incremental``) will only request new data from the API **since th

This means any blocking errors on previous runs can cause a large amount of missing data in backups.

Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something.

Still saver than the previous version.

Specifically, issues and pull requests are handled like this.

Known blocking errors
---------------------

Expand Down
29 changes: 25 additions & 4 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ def parse_args(args=None):
dest="incremental",
help="incremental backup",
)
parser.add_argument(
"--incremental-by-files",
action="store_true",
dest="incremental_by_files",
help="incremental backup based on modification date of files",
)
parser.add_argument(
"--starred",
action="store_true",
Expand Down Expand Up @@ -1114,16 +1120,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
comments_template = _issue_template + "/{0}/comments"
events_template = _issue_template + "/{0}/events"
for number, issue in list(issues.items()):
issue_file = "{0}/{1}.json".format(issue_cwd, number)
if args.incremental_by_files and os.path.isfile(issue_file):
modified = os.path.getmtime(issue_file)
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
if modified > issue["updated_at"]:
logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number))
continue

if args.include_issue_comments or args.include_everything:
template = comments_template.format(number)
issues[number]["comment_data"] = retrieve_data(args, template)
if args.include_issue_events or args.include_everything:
template = events_template.format(number)
issues[number]["event_data"] = retrieve_data(args, template)

issue_file = "{0}/{1}.json".format(issue_cwd, number)
with codecs.open(issue_file, "w", encoding="utf-8") as f:
with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f:
json_dump(issue, f)
os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic


def backup_pulls(args, repo_cwd, repository, repos_template):
Expand Down Expand Up @@ -1176,6 +1190,13 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
comments_template = _pulls_template + "/{0}/comments"
commits_template = _pulls_template + "/{0}/commits"
for number, pull in list(pulls.items()):
pull_file = "{0}/{1}.json".format(pulls_cwd, number)
if args.incremental_by_files and os.path.isfile(pull_file):
modified = os.path.getmtime(pull_file)
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
if modified > pull["updated_at"]:
logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number))
continue
if args.include_pull_comments or args.include_everything:
template = comments_regular_template.format(number)
pulls[number]["comment_regular_data"] = retrieve_data(args, template)
Expand All @@ -1185,9 +1206,9 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
template = commits_template.format(number)
pulls[number]["commit_data"] = retrieve_data(args, template)

pull_file = "{0}/{1}.json".format(pulls_cwd, number)
with codecs.open(pull_file, "w", encoding="utf-8") as f:
with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f:
json_dump(pull, f)
os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic


def backup_milestones(args, repo_cwd, repository, repos_template):
Expand Down