Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added threading to search methods #194

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
5 changes: 4 additions & 1 deletion lyricsgenius/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def main(args=None):
help="Specify number of songs when searching for artist")
parser.add_argument("-q", "--quiet", action="store_true",
help="Turn off the API verbosity")
parser.add_argument("-n", "--num-workers", type=int, default=1,
help="Number of threads used to get songs")
args = parser.parse_args()

# Create an instance of the Genius class
Expand All @@ -51,7 +53,8 @@ def main(args=None):
elif args.search_type == "artist":
artist = api.search_artist(args.terms[0],
max_songs=args.max_songs,
sort='popularity')
sort='popularity',
num_workers=args.num_workers)
if args.save:
if not args.quiet:
print("Saving '{a}'' lyrics...".format(a=safe_unicode(artist.name)))
Expand Down
166 changes: 118 additions & 48 deletions lyricsgenius/genius.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@

import json
import os
import queue
import re
import shutil
import time
import threading

from bs4 import BeautifulSoup

from .api import API, PublicAPI
from .types import Album, Artist, Song, Track
from .utils import clean_str, safe_unicode
from .utils import SongThread, clean_str, safe_unicode


class Genius(API, PublicAPI):
Expand Down Expand Up @@ -268,7 +270,8 @@ def song_annotations(self, song_id, text_format=None):
return all_annotations

def search_album(self, name=None, artist="",
album_id=None, get_full_info=True, text_format=None):
album_id=None, get_full_info=True,
text_format=None, num_workers=1):
"""Searches for a specific album and gets its songs.

You must pass either a :obj:`name` or an :obj:`album_id`.
Expand Down Expand Up @@ -297,6 +300,16 @@ def search_album(self, name=None, artist="",
print(album.name)

"""
def download_track(track):
song_info = track['song']
if (song_info['lyrics_state'] == 'complete'
and not song_info.get('instrumental')):
song_lyrics = self.lyrics(song_url=song_info['url'])
else:
song_lyrics = ""

track = Track(self, track, song_lyrics)
tracks.append(track)
msg = "You must pass either a `name` or an `album_id`."
assert any([name, album_id]), msg

Expand Down Expand Up @@ -326,7 +339,8 @@ def search_album(self, name=None, artist="",

tracks = []
next_page = 1

errors_queue = queue.Queue()
thread_pool = []
# It's unlikely for an album to have >=50 songs,
# but it's best to check
while next_page:
Expand All @@ -337,17 +351,37 @@ def search_album(self, name=None, artist="",
text_format=text_format
)
for track in tracks_list['tracks']:
song_info = track['song']
if (song_info['lyrics_state'] == 'complete'
and not song_info.get('instrumental')):
song_lyrics = self.lyrics(song_url=song_info['url'])
if num_workers != 1:
thread = SongThread(
errors_queue,
name="Thread-Track-{}".format(track['song']['id']),
target=download_track,
args=(track,)
)
thread.daemon = True
thread.start()
thread_pool.append(thread)
if len(thread_pool) == num_workers:
for thread in thread_pool:
thread.join()
thread_pool.clear()
else:
song_lyrics = ""

track = Track(self, track, song_lyrics)
tracks.append(track)
download_track(track)

next_page = tracks_list['next_page']
for thread in thread_pool:
thread.join()
try:
error = errors_queue.get(False)
except queue.Empty:
pass
else:
raise error

length = len(tracks)
tracks.sort(key=lambda track: (track.number
if track.number is not None
else length))

if album_id is None and get_full_info is True:
new_info = self.album(album_id, text_format=text_format)['album']
Expand Down Expand Up @@ -456,6 +490,7 @@ def search_artist(self, artist_name, max_songs=None,
allow_name_change=True,
artist_id=None,
include_features=False,
num_workers=1,
):
"""Searches for a specific artist and gets their songs.

Expand Down Expand Up @@ -518,6 +553,39 @@ def find_artist_id(search_term):
# Assume the top search result is the intended artist
return found_artist['id']

def download_song(song_info, index):
# Check if song is valid (e.g. contains lyrics)
if self.skip_non_songs and not self._result_is_lyrics(song_info):
valid = False
else:
valid = True

# Reject non-song results (e.g. Linear Notes, Tracklists, etc.)
if not valid:
if self.verbose:
s = song_info['title']
print('"{s}" is not valid. Skipping.'.format(
s=safe_unicode(s)))
return

# Create the Song object from lyrics and metadata
if song_info['lyrics_state'] == 'complete':
lyrics = self.lyrics(song_url=song_info['url'])
else:
lyrics = ""
if get_full_info:
new_info = self.song(song_info['id'])['song']
song_info.update(new_info)
song = Song(self, song_info, lyrics)
song._index = index

# Attempt to add the Song to the Artist
result = artist.add_song(song, verbose=False,
include_features=include_features)
if result is not None and self.verbose:
print('Song {n}: "{t}"'.format(n=len(artist.songs),
t=safe_unicode(song.title)))

# Get the artist ID (or use the one supplied)
artist_id = artist_id if artist_id else find_artist_id(artist_name)
if not artist_id:
Expand All @@ -535,62 +603,64 @@ def find_artist_id(search_term):
artist = Artist(self, artist_info)
# Download each song by artist, stored as Song objects in Artist object
page = 1
num_songs = 0
reached_max_songs = True if max_songs == 0 else False
thread_pool = []
errors_queue = queue.Queue()
while not reached_max_songs:
songs_on_page = self.artist_songs(artist_id=artist_id,
per_page=per_page,
page=page,
sort=sort,
)

# Loop through each song on page of search results
for song_info in songs_on_page['songs']:
# Check if song is valid (e.g. contains lyrics)
if self.skip_non_songs and not self._result_is_lyrics(song_info):
valid = False
else:
valid = True

# Reject non-song results (e.g. Linear Notes, Tracklists, etc.)
if not valid:
if self.verbose:
s = song_info['title']
print('"{s}" is not valid. Skipping.'.format(
s=safe_unicode(s)))
continue

# Create the Song object from lyrics and metadata
if song_info['lyrics_state'] == 'complete':
lyrics = self.lyrics(song_url=song_info['url'])
for song in songs_on_page["songs"]:
if num_workers != 1:
thread = SongThread(
errors_queue,
name="Thread-Song-{}".format(song['id']),
target=download_song,
args=(song, num_songs)
)
thread.daemon = True
thread.start()
thread_pool.append(thread)
if len(thread_pool) == num_workers:
for thread in thread_pool:
thread.join()
thread_pool.clear()
else:
lyrics = ""
if get_full_info:
new_info = self.song(song_info['id'])['song']
song_info.update(new_info)
song = Song(self, song_info, lyrics)

# Attempt to add the Song to the Artist
result = artist.add_song(song, verbose=False,
include_features=include_features)
if result is not None and self.verbose:
print('Song {n}: "{t}"'.format(n=artist.num_songs,
t=safe_unicode(song.title)))
download_song(song, num_songs)
num_songs += 1

# Exit search if the max number of songs has been met
reached_max_songs = max_songs and artist.num_songs >= max_songs
reached_max_songs = max_songs and num_songs - 1 >= max_songs
if reached_max_songs:
if self.verbose:
print(('\nReached user-specified song limit ({m}).'
.format(m=max_songs)))
break

for thread in thread_pool:
thread.join()
try:
error = errors_queue.get(False)
except queue.Empty:
pass
else:
raise error
thread_pool.clear()

if reached_max_songs:
if self.verbose:
print(('\nReached user-specified song limit ({m}).'
.format(m=max_songs)))
break

# Move on to next page of search results
page = songs_on_page['next_page']
if page is None:
break # Exit search when last page is reached

artist.songs.sort(key=lambda x: x._index)
if self.verbose:
print('Done. Found {n} songs.'.format(n=artist.num_songs))
print('Done. Found {n} songs.'.format(n=len(artist.songs)))
return artist

def save_artists(self, artists, filename="artist_lyrics", overwrite=False,
Expand Down
5 changes: 4 additions & 1 deletion lyricsgenius/types/album.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,7 @@ def save_lyrics(self,

def __repr__(self):
name = self.__class__.__name__
return "{}(number, song)".format(name)
return "{name}({number}, Song(id={song_id}))".format(
name=name,
number=self.number,
song_id=self.song.id)
7 changes: 3 additions & 4 deletions lyricsgenius/types/artist.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def __init__(self, client, json_dict):
self._body = body
self._client = client
self.songs = []
self.num_songs = len(self.songs)

self.api_path = body['api_path']
self.header_image_url = body['header_image_url']
Expand Down Expand Up @@ -76,7 +75,6 @@ def add_song(self, new_song, verbose=True, include_features=False):
if (new_song.artist == self.name
or (include_features and any(new_song._body['featured_artists']))):
self.songs.append(new_song)
self.num_songs += 1
return new_song
if verbose:
print("Can't add song by {b}, artist must be {a}.".format(
Expand Down Expand Up @@ -149,6 +147,7 @@ def save_lyrics(self,

def __str__(self):
"""Return a string representation of the Artist object."""
msg = "{name}, {num} songs".format(name=self.name, num=self.num_songs)
msg = msg[:-1] if self.num_songs == 1 else msg
num_songs = len(self.songs)
msg = "{name}, {num} songs".format(name=self.name, num=num_songs)
msg = msg[:-1] if num_songs == 1 else msg
return msg
16 changes: 16 additions & 0 deletions lyricsgenius/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,29 @@

import re
import os
import threading
import sys
import unicodedata
from datetime import datetime
from string import punctuation
from urllib.parse import parse_qs, urlparse


class SongThread(threading.Thread):
def __init__(self, errors_queue, **kwargs):
super().__init__(**kwargs)
self.errors_queue = errors_queue

def run(self):
try:
if self._target:
self._target(*self._args, **self._kwargs)
except Exception as e:
self.errors_queue.put(e)
finally:
del self._target, self._args, self._kwargs


def auth_from_environment():
"""Gets credentials from environment variables.

Expand Down
4 changes: 2 additions & 2 deletions tests/test_artist.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_name(self):
def test_add_song_from_same_artist(self):
msg = "The new song was not added to the artist object."
self.artist.add_song(genius.search_song(self.new_song, self.artist_name))
self.assertEqual(self.artist.num_songs, self.max_songs + 1, msg)
self.assertEqual(len(self.artist.songs), self.max_songs + 1, msg)

def test_song(self):
msg = "Song was not in artist's songs."
Expand All @@ -51,7 +51,7 @@ def test_song(self):
def test_add_song_from_different_artist(self):
msg = "A song from a different artist was incorrectly allowed to be added."
self.artist.add_song(genius.search_song("These Days", "Jackson Browne"))
self.assertEqual(self.artist.num_songs, self.max_songs, msg)
self.assertEqual(len(self.artist.songs), self.max_songs, msg)

def test_artist_with_includes_features(self):
# The artist did not get songs returned that they were featured in.
Expand Down