forked from indictechcom/wikifile-transfer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
114 lines (91 loc) · 3.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import datetime
import requests
import mwparserfromhell
from templatelist import TEMPLATES
def download_image(src_project, src_lang, src_filename):
src_endpoint = "https://"+ src_lang + "." + src_project + ".org/w/api.php"
param = {
"action": "query",
"format": "json",
"prop": "imageinfo",
"titles": src_filename,
"iiprop": "url",
"iilocalonly": 1
}
page = requests.get(url=src_endpoint, params=param).json()['query']['pages']
try:
image_url = list (page.values()) [0]["imageinfo"][0]["url"]
except KeyError:
return None
# Create a unique file name based on time
current_time = str(datetime.datetime.now())
get_filename = current_time.replace(':', '_')
get_filename = get_filename.replace(' ', '_')
# Download the Image File
r = requests.get(image_url, allow_redirects=True)
filename = get_filename + "." + r.headers.get('content-type').replace('image/', '')
open("temp_images/" + filename, 'wb').write(r.content)
return filename
def process_upload(file_path, tr_filename, src_fileext, tr_endpoint, ses):
# API Parameter to get CSRF Token
csrf_param = {
"action": "query",
"meta": "tokens",
"format": "json"
}
response = requests.get(url=tr_endpoint, params=csrf_param, auth=ses)
csrf_token = response.json()["query"]["tokens"]["csrftoken"]
# API Parameter to upload the file
upload_param = {
"action": "upload",
"filename": tr_filename + "." + src_fileext,
"format": "json",
"token": csrf_token,
"ignorewarnings": 1
}
# Read the file for POST request
file = {
'file': open(file_path, 'rb')
}
response = requests.post(url=tr_endpoint, files=file, data=upload_param, auth=ses).json()
# Try block to get Link and URL
try:
wikifile_url = response["upload"]["imageinfo"]["descriptionurl"]
file_link = response["upload"]["imageinfo"]["url"]
except KeyError:
return None
return {
"wikipage_url": wikifile_url,
"file_link": file_link
}
def get_localized_wikitext(wikitext, src_endpoint, target_lang):
wikicode = mwparserfromhell.parse(wikitext)
for template in wikicode.filter_templates():
if template.name.strip() in TEMPLATES:
if template.has("Article"):
article_value = template.get("Article")
if article_value:
article_title = article_value.value.strip()
lang_param = {
"action": "query",
"format": "json",
"prop": "langlinks",
"titles": article_title,
"formatversion": "2"
}
try:
response = requests.get(url=src_endpoint, params=lang_param)
response.raise_for_status()
langlinks = response.json()["query"]["pages"][0]["langlinks"]
for langlink in langlinks:
if langlink["lang"] == target_lang:
template.add("Article", langlink["title"])
break
except:
return str(wikicode)
return str(wikicode)
def getHeader():
agent = 'Wikifile-transfer/1.0 (https://wikifile-transfer.toolforge.org; [email protected])'
return {
'User-Agent': agent
}