-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinternet_archive_client.py
180 lines (142 loc) · 5.6 KB
/
internet_archive_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from internetarchive import upload, get_item, get_session, search_items, get_user_info
from dotenv import load_dotenv
from urllib.parse import quote
import requests
import os
from io import BytesIO
load_dotenv()
iaEmail = os.environ.get('IA_EMAIL')
iaPassword = os.environ.get('IA_PASSWORD')
s3AccessKey = os.environ.get('S3_ACCESS_KEY')
s3Secret = os.environ.get('S3_SECRET')
def create_item(collection: str, title: str, description: str) -> str:
"""
Creates a new item in the Internet Archive with the specified metadata.
Args:
collection (str): The name of the collection to add the item to.
title (str): The title of the item.
description (str): A brief description of the item.
Returns:
str: The identifier of the newly created item.
Raises:
requests.exceptions.HTTPError: If the API returns a non-200 status code.
KeyError: If the identifier is not found in the API response.
Example:
>>> create_item('test_collection', 'My Item', 'This is a test item.')
'my-item'
"""
metadata = {
'collection': collection,
'title': title,
'description': description,
'contributor': iaEmail
}
try:
response = requests.post(
'https://archive.org/metadata/items',
headers={'authorization': 'LOW {}:{}'.format(iaEmail, iaPassword)},
json=metadata
)
response.raise_for_status()
identifier = response.json()['uniq']
print('Item created with identifier: {}'.format(identifier))
return identifier
except requests.exceptions.HTTPError as error:
print('HTTP error occurred: {}'.format(error))
return None
except KeyError as error:
print('Identifier not found in response: {}'.format(error))
return None
def upload_file(identifier: str, file_bytes: BytesIO) -> str:
"""
Uploads a file to an existing item on the Internet Archive.
Args:
identifier (str): The identifier of the item to upload the file to.
file_bytes (BytesIO): The BytesIO object of the file to be uploaded.
Returns:
str: A URL to the uploaded file on the Internet Archive.
Raises:
Exception: If an error occurs while uploading the file.
Example:
>>> upload_file('my-item', 'My File.mp3', '/path/to/My File.mp3')
'https://archive.org/download/my-item/My%20File.mp3'
"""
try:
item = get_item(identifier)
item.upload(file_bytes, access_key=s3AccessKey, secret_key=s3Secret)
file_name = file_bytes.name
result_url = 'https://archive.org/download/{}/{}'.format(identifier, quote(file_name))
print('File uploaded with URL: {}'.format(result_url))
return result_url
except Exception as e:
print('Error uploading item: {}'.format(e))
return None
import requests
from urllib.parse import quote
def delete_file(identifier: str, file_name: str) -> bool:
"""
Deletes a file from an existing item on the Internet Archive.
Args:
identifier (str): The identifier of the item from which to delete the file.
file_name (str): The name of the file to be deleted.
Returns:
bool: True if the file was successfully deleted, False otherwise.
Raises:
Exception: If an error occurs while deleting the file.
Example:
>>> delete_file('my-item', 'My File.mp3')
True
"""
try:
base_url = f'https://s3.us.archive.org/{identifier}/{quote(file_name)}'
headers = {
'Authorization': f'LOW {s3AccessKey}:{s3Secret}',
}
response = requests.delete(base_url, headers=headers)
if response.status_code == 204:
print(f'File {file_name} deleted successfully.')
return True
else:
print(f'Error deleting file: {response.text}')
return False
except Exception as e:
print(f'Error deleting file: {e}')
return False
def get_all_mp3_files() -> list:
"""
Retrieves all items associated with the user's email and filters the files to only include those with an '.mp3' extension.
Returns:
list: A list of dictionaries, each containing the item identifier and the URL of the mp3 file.
Example:
>>> get_all_mp3_files()
[
{'filename': 'my-file-name', 'identifier': 'my-item', 'url': 'https://archive.org/download/my-item/My%20File.mp3'},
{'filename': 'another-file-name', 'identifier': 'another-item', 'url': 'https://archive.org/download/another-item/Another%20File.mp3'}
]
"""
mp3_files = []
# Search for items associated with the user's email
query = 'uploader:"{}"'.format(iaEmail)
search_results = search_items(query)
# Iterate through the items and filter the files with '.mp3' extension
for item in search_results:
item_object = get_item(identifier=item.get('identifier'))
item_files = item_object.get_files()
for file in item_files:
if file.name.endswith('.mp3'):
mp3_files.append({
'filename': file.name,
'identifier': file.identifier,
'url': 'https://archive.org/download/{}/{}'.format(file.identifier, quote(file.name))
})
return mp3_files
def main():
"""
The main function of the program.
"""
create_item('artist_mp3s', 'Artist MP3s', 'These are all the mp3 files for the artist biographies.')
# mp3_files = get_all_mp3_files()
# for file in mp3_files:
# print(file)
if __name__ == '__main__':
main()