-
Notifications
You must be signed in to change notification settings - Fork 0
/
legacy-import.py
executable file
·114 lines (98 loc) · 4.57 KB
/
legacy-import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
# This script reads a legacy database for https://git.cs.uni-paderborn.de/fsmi/fsmi-klausurarchiv
# and sends it's metadata to an instance of the new system.
# It does not send the actual documents, since we were not absolutely sure that the system as safe
# at the time of this writing.
from itertools import chain
from pathlib import Path
import argparse
import requests
from tqdm import tqdm
parser = argparse.ArgumentParser(
description="Transport item metadata from an old archive to a new one")
parser.add_argument("-s", "--server", type=str, nargs=1,
help="The URL via which the archive is available")
parser.add_argument("-u", "--user", type=str, nargs=1,
help="The username to use for authentication")
parser.add_argument("-p", "--password", type=str, nargs=1,
help="The password fo use for authentication")
parser.add_argument("-a", "--archive", type=Path, nargs=1,
help="The path to the old archive, ending in '50-fertig'")
args = parser.parse_args()
SERVER = args.server[0]
USER = args.user[0]
PASSWORD = args.password[0]
OLD_ARCHIVE_FOLDER = args.archive[0]
r = requests.post(f"{SERVER}/v1/login",
json={"username": USER, "password": PASSWORD})
assert r.status_code == 200, f"Status was {r.status_code}"
cookies = r.cookies
courses = {value["long_name"]: key for key, value in requests.get(
f"{SERVER}/v1/courses", cookies=cookies).json().items()}
authors = {value["name"]: key for key, value in requests.get(
f"{SERVER}/v1/authors", cookies=cookies).json().items()}
folders = {value["name"]: key for key, value in requests.get(
f"{SERVER}/v1/authors", cookies=cookies).json().items()}
for folder_path in tqdm(list((OLD_ARCHIVE_FOLDER / Path("alle-ordner")).iterdir())):
if not folder_path.is_dir():
continue
folder_name = folder_path.name.split(" - ")
folder_name = folder_name[0] + " - " + folder_name[1]
if folder_name not in folders:
r = requests.post(f"{SERVER}/v1/folders",
json={"name": folder_name}, cookies=cookies)
assert r.status_code == 201, f"Status was {r.status_code}"
folders[folder_name] = r.json()["id"]
folder_id = folders[folder_name]
for item_path in folder_path.glob("*.pdf"):
stem = item_path.stem
(course_names, date, filename, _, author_names) = stem.split(" - ")
name = course_names
course_ids = list()
for course_name in set(course_names.split("; ")):
if course_name not in courses:
r = requests.post(f"{SERVER}/v1/courses",
json={"long_name": course_name, "short_name": ""}, cookies=cookies)
assert r.status_code == 201, f"Status was {r.status_code}"
courses[course_name] = r.json()["id"]
course_ids.append(courses[course_name])
author_ids = list()
for author_name in set(author_names.split("; ")):
if author_name not in authors:
r = requests.post(f"{SERVER}/v1/authors",
json={"name": author_name}, cookies=cookies)
assert r.status_code == 201, f"Status was {r.status_code}"
authors[author_name] = r.json()["id"]
author_ids.append(authors[author_name])
if item_path.exists():
r = requests.post(f"{SERVER}/v1/documents",
json={"filename": "Klausur.pdf", "downloadable": False, "content_type": "application/pdf"}, cookies=cookies)
assert r.status_code == 201, f"Status was {r.status_code}"
document_id = r.json()["id"]
r = requests.post(
f"{SERVER}/v1/upload?id={document_id}",
data=open(item_path, mode="rb"),
headers={
"Content-type": "application/pdf"
},
cookies=cookies
)
assert r.status_code == 200, f"Status was {r.status_code}"
else:
document_id = None
r = requests.post(
f"{SERVER}/v1/items",
json={
"name": name,
"date": date,
"documents": [document_id] if document_id is not None else [],
"authors": author_ids,
"courses": course_ids,
"folders": [folder_id],
"visible": True
},
cookies=cookies
)
if r.status_code != 201:
print(r.json())
assert r.status_code == 201, f"Status was {r.status_code}"