-
-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
922 additions
and
414 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
__pycache__/ | ||
*.py[cod] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: UTF-8 -*- | ||
|
||
import gallery_dl | ||
|
||
if __name__ == '__main__': | ||
gallery_dl.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
__author__ = "Mike Fährmann" | ||
__copyright__ = "Copyright 2014, Mike Fährmann" | ||
|
||
__license__ = "GPLv3" | ||
__version__ = "0.4" | ||
__maintainer__ = "Mike Fährmann" | ||
__email__ = "[email protected]" | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import configparser | ||
|
||
from . import extractor | ||
from . import downloader | ||
|
||
def parse_cmdline_options(): | ||
p = argparse.ArgumentParser( | ||
description='Download images from various sources') | ||
p.add_argument("-c", "--config", | ||
default="~/.config/gallery/config", metavar="CFG", help="alternate configuration file") | ||
p.add_argument("-d", "--dest", | ||
metavar="DEST", help="destination directory") | ||
p.add_argument("urls", nargs="+", | ||
metavar="URL", help="url to download images from") | ||
return p.parse_args() | ||
|
||
def parse_config_file(path): | ||
config = configparser.ConfigParser( | ||
interpolation=None, | ||
) | ||
config.optionxform = lambda opt:opt | ||
config.read(os.path.expanduser(path)) | ||
return config | ||
|
||
def main(): | ||
opts = parse_cmdline_options() | ||
conf = parse_config_file(opts.config) | ||
extf = extractor.ExtractorFinder(conf) | ||
dlmg = downloader.DownloadManager(opts, conf) | ||
|
||
for url in opts.urls: | ||
ex = extf.match(url) | ||
dlmg.add(ex) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import os | ||
import sys | ||
import importlib | ||
|
||
class DownloadManager(): | ||
|
||
def __init__(self, opts, conf): | ||
self.opts = opts | ||
self.conf = conf | ||
self.downloaders = {} | ||
|
||
def add(self, extr): | ||
if self.opts.dest: | ||
dest = self.opts.dest | ||
elif extr.category in self.conf: | ||
dest = self.conf[extr.category].get("destination", "/tmp/") | ||
else: | ||
dest = self.conf["general"].get("destination", "/tmp/") | ||
dest = os.path.join(dest, extr.category, extr.directory) | ||
os.makedirs(dest, exist_ok=True) | ||
|
||
for url, filename in extr: | ||
path = os.path.join(dest, filename) | ||
if os.path.exists(path): | ||
self.print_skip(path) | ||
continue | ||
dl = self.get_downloader(extr, url) | ||
self.print_start(path) | ||
tries = dl.download(url, path) | ||
self.print_success(path, tries) | ||
|
||
def get_downloader(self, extr, url): | ||
end = url.find("://") | ||
proto = url[:end] if end != -1 else "http" | ||
if proto not in self.downloaders: | ||
# import downloader | ||
module = importlib.import_module("."+proto, __package__) | ||
self.downloaders[proto] = module.Downloader | ||
return self.downloaders[proto](extr) | ||
|
||
@staticmethod | ||
def print_start(path): | ||
print(path, end="") | ||
sys.stdout.flush() | ||
|
||
@staticmethod | ||
def print_skip(path): | ||
print("\033[2m", path, "\033[0m", sep="") | ||
|
||
@staticmethod | ||
def print_success(path, tries): | ||
if tries == 0: | ||
print("\r", end="") | ||
print("\r\033[1;32m", path, "\033[0m", sep="") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import os | ||
|
||
class BasicDownloader(): | ||
|
||
max_tries = 5 | ||
|
||
def download(self, url, path): | ||
with open(path, "wb") as file: | ||
try: | ||
return self.download_impl(url, file) | ||
file.close() | ||
except: | ||
# make sure to remove file if download failed | ||
os.unlink(path) | ||
raise | ||
|
||
@staticmethod | ||
def print_error(file, error, tries, max_tries=5): | ||
if tries == 1 and hasattr(file, "name"): | ||
print("\r\033[1;31m", file.name, sep="") | ||
print("\033[0;31m[Error]\033[0m ", error, " (", tries, "/", max_tries, ")", sep="") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from .common import BasicDownloader | ||
import time | ||
import requests | ||
|
||
class Downloader(BasicDownloader): | ||
|
||
def __init__(self, extr): | ||
BasicDownloader.__init__(self) | ||
self.session = extr.session | ||
|
||
def download_impl(self, url, file): | ||
tries = 0 | ||
while True: | ||
# try to connect to remote source | ||
try: | ||
response = self.session.get(url, stream=True, verify=True) | ||
except requests.exceptions.ConnectionError as e: | ||
tries += 1 | ||
self.print_error(file, e, tries, self.max_tries) | ||
time.sleep(1) | ||
if tries == self.max_tries: | ||
raise | ||
continue | ||
|
||
# reject error-status-codes | ||
if response.status_code != requests.codes.ok: | ||
tries += 1 | ||
self.print_error(file, 'HTTP status "{} {}"'.format( | ||
response.status_code, response.reason), tries, self.max_tries) | ||
if response.status_code == 404: | ||
return self.max_tries | ||
time.sleep(1) | ||
if tries == 5: | ||
response.raise_for_status() | ||
continue | ||
|
||
# everything ok -- proceed to download | ||
break | ||
|
||
for data in response.iter_content(16384): | ||
file.write(data) | ||
return tries |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .http import Downloader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from .common import BasicDownloader | ||
|
||
class Downloader(BasicDownloader): | ||
|
||
def __init__(self, extr): | ||
BasicDownloader.__init__(self) | ||
|
||
def download_impl(self, url, file): | ||
file.write(bytes(url[7:], "utf-8")) | ||
return 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from .common import BasicExtractor | ||
from urllib.parse import unquote | ||
import re | ||
|
||
class Extractor(BasicExtractor): | ||
|
||
thread_url_fmt = "https://www.8chan.co/{0}/res/{1}.html" | ||
regex = r'>File: <a href="([^"]+)">([^<]+)\.[^<]+<.*?<span class="postfilename"( title="([^"]+)")?>([^<]+)<' | ||
|
||
def __init__(self, match, config): | ||
BasicExtractor.__init__(self, config) | ||
self.board, _, self.thread_id = match.group(1).split("/") | ||
self.category = "8chan" | ||
self.directory = self.board + "-" + self.thread_id | ||
|
||
def images(self): | ||
url = self.thread_url_fmt.format(self.board, self.thread_id) | ||
text = self.request(url).text | ||
for match in re.finditer(self.regex, text): | ||
url, prefix, fullname, name = match.group(1, 2, 4, 5) | ||
yield ("https://www.8chan.co" + url, prefix + "-" + unquote(fullname or name)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import os | ||
import sys | ||
import re | ||
import sqlite3 | ||
import importlib | ||
|
||
class ExtractorFinder(): | ||
|
||
def __init__(self, config): | ||
self.config = config | ||
self.match_list = list() | ||
if "database" in config["general"]: | ||
path = os.path.expanduser(config["general"]["database"]) | ||
conn = sqlite3.connect(path) | ||
self.load_from_database(conn) | ||
self.load_from_config(config) | ||
|
||
def match(self, url): | ||
for category, regex in self.match_list: | ||
match = regex.match(url) | ||
if match: | ||
module = importlib.import_module("."+category, __package__) | ||
return module.Extractor(match, self.config) | ||
return None | ||
|
||
def load_from_database(self, db): | ||
query = ( | ||
"SELECT regex.re, category.name " | ||
"FROM regex JOIN category " | ||
"ON regex.category_id = category.id" | ||
) | ||
for row in db.execute(query): | ||
self.add_match(row[1], row[0]) | ||
|
||
def load_from_config(self, conf): | ||
for category in conf: | ||
for key, value in conf[category].items(): | ||
if(key.startswith("regex")): | ||
self.add_match(category, value) | ||
|
||
def add_match(self, category, regex): | ||
try: | ||
# print(category, regex) | ||
self.match_list.append( (category, re.compile(regex)) ) | ||
except: | ||
print("[Warning] [{0}] failed to compile regular expression '{1}'" | ||
.format(category, regex)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from .common import AsyncExtractor | ||
from ..util import filename_from_url | ||
from urllib.parse import unquote | ||
|
||
class Extractor(AsyncExtractor): | ||
|
||
def __init__(self, match, config): | ||
AsyncExtractor.__init__(self, config) | ||
self.url = "https://bato.to/read/_/" + match.group(1) + "/_/1" | ||
self.category = "batoto" | ||
self.directory = match.group(1) | ||
|
||
def images(self): | ||
next_url = self.url | ||
while next_url: | ||
text = self.request(next_url).text | ||
pos = text.find('<div id="full_image"') | ||
|
||
next_url, pos = self.extract(text, '<a href="', '"', pos) | ||
url, pos = self.extract(text, 'src="', '"', pos) | ||
name = unquote( filename_from_url(url) ) | ||
yield url, name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import queue | ||
import threading | ||
import requests | ||
from ..util import safe_request | ||
|
||
class BasicExtractor(): | ||
|
||
def __init__(self, config): | ||
self.session = requests.Session() | ||
self.category = "" | ||
self.directory = "" | ||
|
||
def __iter__(self): | ||
return self.images() | ||
|
||
def request(self, url, *args, **kwargs): | ||
return safe_request(self.session, url, *args, **kwargs) | ||
|
||
def enable_useragent(self): | ||
self.session.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0" | ||
|
||
@staticmethod | ||
def extract(txt, begin, end, pos=0): | ||
try: | ||
first = txt.index(begin, pos) + len(begin) | ||
last = txt.index(end, first) | ||
return txt[first:last], last+len(end) | ||
except: | ||
return None, pos | ||
|
||
@staticmethod | ||
def extract_all(txt, begin, end, pos=0): | ||
try: | ||
first = txt.index(begin, pos) | ||
last = txt.index(end, first + len(begin)) + len(end) | ||
return txt[first:last], last | ||
except: | ||
return None, pos | ||
|
||
class AsyncExtractor(BasicExtractor): | ||
|
||
def __init__(self, config): | ||
super().__init__(config) | ||
self.__queue = queue.Queue(maxsize=5) | ||
self.__thread = threading.Thread(target=self.async_images, daemon=True) | ||
|
||
def __iter__(self): | ||
get = self.__queue.get | ||
done = self.__queue.task_done | ||
|
||
self.__thread.start() | ||
while True: | ||
task = get() | ||
if task is None: | ||
return | ||
yield task | ||
done() | ||
|
||
def async_images(self): | ||
put = self.__queue.put | ||
try: | ||
for task in self.images(): | ||
put(task) | ||
except: | ||
import traceback | ||
print(traceback.format_exc()) | ||
put(None) |
Oops, something went wrong.