Skip to content

Commit

Permalink
[bunkr] extract correct 'filename' data (#6824)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jan 14, 2025
1 parent d17a423 commit 843a39a
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
10 changes: 7 additions & 3 deletions gallery_dl/extractor/bunkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __init__(self, match):
self.root = "https://" + domain

def request(self, url, **kwargs):
kwargs["encoding"] = "utf-8"
kwargs["allow_redirects"] = False

while True:
Expand Down Expand Up @@ -114,8 +115,7 @@ def request(self, url, **kwargs):

def fetch_album(self, album_id):
# album metadata
page = self.request(
self.root + "/a/" + album_id, encoding="utf-8").text
page = self.request(self.root + "/a/" + album_id).text
title = text.unescape(text.unescape(text.extr(
page, 'property="og:title" content="', '"')))

Expand All @@ -140,7 +140,8 @@ def _extract_files(self, items):

file = self._extract_file(url)
info = text.split_html(item)
file["name"] = info[-3]
if not file["name"]:
file["name"] = info[-3]
file["size"] = info[-2]
file["date"] = text.parse_datetime(
info[-1], "%H:%M:%S %d/%m/%Y")
Expand All @@ -157,6 +158,8 @@ def _extract_file(self, webpage_url):
page = response.text
file_url = (text.extr(page, '<source src="', '"') or
text.extr(page, '<img src="', '"'))
file_name = (text.extr(page, 'property="og:title" content="', '"') or
text.extr(page, "<title>", " | Bunkr<"))

if not file_url:
webpage_url = text.unescape(text.rextract(
Expand All @@ -166,6 +169,7 @@ def _extract_file(self, webpage_url):

return {
"file" : text.unescape(file_url),
"name" : text.unescape(file_name),
"_http_headers" : {"Referer": response.url},
"_http_validate": self._validate,
}
Expand Down
8 changes: 7 additions & 1 deletion gallery_dl/extractor/lolisafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ def items(self):
if "name" in file:
name = file["name"]
file["name"] = name.rpartition(".")[0] or name
file["id"] = file["filename"].rpartition("-")[2]
fid = file["filename"].rpartition("-")[2]
if len(fid) == 12:
file["id"] = ""
file["filename"] = file["name"]
else:
file["id"] = fid
file["filename"] = file["name"] + "-" + fid
elif "id" in file:
file["name"] = file["filename"]
file["filename"] = "{}-{}".format(file["name"], file["id"])
Expand Down
13 changes: 13 additions & 0 deletions test/results/bunkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,17 @@
"#urls" : "https://meatballs.bunkr.ru/27-03-2024-Rp-0FfrropA.mp4",
},

{
"#url" : "https://bunkr.site/f/wYGCKbGhSvuAW",
"#comment" : "correct 'name' from HTML (#6790)",
"#category": ("lolisafe", "bunkr", "media"),
"#class" : bunkr.BunkrMediaExtractor,
"#urls" : "https://kebab.bunkr.ru/80ca5405-8b8d-4f9f-8167-8b046bb9dc67.mp4",

"id" : "",
"name" : "0hwndshtfmj7hcbut1nd4_source",
"filename" : "0hwndshtfmj7hcbut1nd4_source",
"extension": "mp4",
},

)

0 comments on commit 843a39a

Please sign in to comment.