diff --git a/README.rst b/README.rst index 9e01f26..5dcef95 100644 --- a/README.rst +++ b/README.rst @@ -80,6 +80,7 @@ CLI Help output:: log level to use (default: info, possible levels: debug, info, warning, error, critical) -i, --incremental incremental backup + --incremental-by-files incremental backup using modified time of files --starred include JSON output of starred repositories in backup --all-starred include starred repositories in backup [*] --watched include JSON output of watched repositories in backup @@ -239,6 +240,12 @@ Using (``-i, --incremental``) will only request new data from the API **since th This means any blocking errors on previous runs can cause a large amount of missing data in backups. +Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something. + +Still saver than the previous version. + +Specifically, issues and pull requests are handled like this. + Known blocking errors --------------------- diff --git a/github_backup/github_backup.py b/github_backup/github_backup.py index ebd4f01..d845d5b 100644 --- a/github_backup/github_backup.py +++ b/github_backup/github_backup.py @@ -181,6 +181,12 @@ def parse_args(args=None): dest="incremental", help="incremental backup", ) + parser.add_argument( + "--incremental-by-files", + action="store_true", + dest="incremental_by_files", + help="incremental backup based on modification date of files", + ) parser.add_argument( "--starred", action="store_true", @@ -1114,6 +1120,14 @@ def backup_issues(args, repo_cwd, repository, repos_template): comments_template = _issue_template + "/{0}/comments" events_template = _issue_template + "/{0}/events" for number, issue in list(issues.items()): + issue_file = "{0}/{1}.json".format(issue_cwd, number) + if args.incremental_by_files and os.path.isfile(issue_file): + modified = os.path.getmtime(issue_file) + modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") + if modified > issue["updated_at"]: + logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number)) + continue + if args.include_issue_comments or args.include_everything: template = comments_template.format(number) issues[number]["comment_data"] = retrieve_data(args, template) @@ -1121,9 +1135,9 @@ def backup_issues(args, repo_cwd, repository, repos_template): template = events_template.format(number) issues[number]["event_data"] = retrieve_data(args, template) - issue_file = "{0}/{1}.json".format(issue_cwd, number) - with codecs.open(issue_file, "w", encoding="utf-8") as f: + with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f: json_dump(issue, f) + os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic def backup_pulls(args, repo_cwd, repository, repos_template): @@ -1176,6 +1190,13 @@ def backup_pulls(args, repo_cwd, repository, repos_template): comments_template = _pulls_template + "/{0}/comments" commits_template = _pulls_template + "/{0}/commits" for number, pull in list(pulls.items()): + pull_file = "{0}/{1}.json".format(pulls_cwd, number) + if args.incremental_by_files and os.path.isfile(pull_file): + modified = os.path.getmtime(pull_file) + modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ") + if modified > pull["updated_at"]: + logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number)) + continue if args.include_pull_comments or args.include_everything: template = comments_regular_template.format(number) pulls[number]["comment_regular_data"] = retrieve_data(args, template) @@ -1185,9 +1206,9 @@ def backup_pulls(args, repo_cwd, repository, repos_template): template = commits_template.format(number) pulls[number]["commit_data"] = retrieve_data(args, template) - pull_file = "{0}/{1}.json".format(pulls_cwd, number) - with codecs.open(pull_file, "w", encoding="utf-8") as f: + with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f: json_dump(pull, f) + os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic def backup_milestones(args, repo_cwd, repository, repos_template):