Skip to content

Commit

Permalink
Merge pull request #248 from ApokalyptischerTempler/download-discogra…
Browse files Browse the repository at this point in the history
…phy-fix

fix downloading discography
  • Loading branch information
Evolution0 authored Jan 19, 2025
2 parents 716cfd0 + a9a7241 commit 4802bdc
Showing 1 changed file with 37 additions and 2 deletions.
39 changes: 37 additions & 2 deletions bandcamp_dl/bandcamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import requests
from requests.adapters import HTTPAdapter
from urllib3.util import create_urllib3_context
from urllib.parse import urlparse, urlunparse

from bandcamp_dl import __version__
from bandcamp_dl.bandcampjson import BandcampJSON
Expand Down Expand Up @@ -245,7 +246,41 @@ def get_full_discography(self, artist: str, page_type: str) -> list:
except bs4.FeatureNotFound:
soup = bs4.BeautifulSoup(html, "html.parser")

urls = [f"https://{artist}.bandcamp.com{a['href']}" for a in soup.find_all("a", href=True)
if ("/" == a["href"].split("album")[0] or "/" == a["href"].split("track")[0])]
urls = []

for music_grid_item in soup.find_all("li", class_="music-grid-item"):
for a in music_grid_item.find_all("a", href=True):
url = a['href']
if not url.startswith('http'):
url = f"https://{artist}.bandcamp.com{a['href']}"

parsed_url = urlparse(url)
url = urlunparse(parsed_url._replace(query='', fragment=''))
urls.append(url)

data_client_items_attributes = soup.find_all(attrs={"data-client-items": True})

data_client_items = []

for data_client_items_attribute in data_client_items_attributes:
data_client_items += json.loads(
data_client_items_attribute['data-client-items'])

for album in data_client_items:
if 'page_url' in album:
page_url = album['page_url']
url = ""

if page_url.startswith('http'):
url = page_url
else:
url = f"https://{artist}.bandcamp.com{page_url}"

parsed_url = urlparse(url)
url = urlunparse(parsed_url._replace(query='', fragment=''))
urls.append(url)

self.logger.debug(f" {len(urls)} Album URLs found for {artist}.\nURLs: \n" + "\n"
.join(url for url in urls) + "\n")

return urls

0 comments on commit 4802bdc

Please sign in to comment.