diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 39876dd..d0a3cbf 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.5.1 +current_version = 0.6.0 commit = True tag = False parse = ^ diff --git a/.gitignore b/.gitignore index 0bb44e1..9c2d329 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ __pycache__/ # ebooks *.epub +*.mobi +*.pdf # C extensions *.so @@ -143,5 +145,6 @@ dmypy.json # project-extras *.txt +*.lst *.sqlite *.json diff --git a/README.md b/README.md index 1184f23..f864468 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ fichub_cli metadata --input-db "urls - 2022-01-29 T000558.sqlite" --export-db - To download the ebook along with the metadata ``` -fichub_cli metadata -i urls.txt --download-ebook epub +fichub_cli metadata -i urls.txt --download-ebook epub,mobi ``` - To get all story urls found from a page. Currently supports archiveofourown.org only. diff --git a/fichub_cli_metadata/__init__.py b/fichub_cli_metadata/__init__.py index 9aabbd9..d414a65 100644 --- a/fichub_cli_metadata/__init__.py +++ b/fichub_cli_metadata/__init__.py @@ -13,6 +13,6 @@ # limitations under the License. # __version__ at the top to prevent ImportError: ... partially initialized module ... -__version__ = "0.5.1" +__version__ = "0.6.0" from .cli import app # entry_point diff --git a/fichub_cli_metadata/cli.py b/fichub_cli_metadata/cli.py index 4a26a0a..4888558 100644 --- a/fichub_cli_metadata/cli.py +++ b/fichub_cli_metadata/cli.py @@ -57,7 +57,7 @@ def metadata( "", "-o", "--out-dir", help="Path to the Output directory (default: Current Directory)"), download_ebook: str = typer.Option( - "", "--download-ebook", help="Download the ebook as well. Specify the format: epub (default), mobi, pdf or html"), + "", "--download-ebook", help="Download the ebook as well. Specify the format, comma separated if multiple: epub (default), mobi, pdf or html"), fetch_urls: str = typer.Option( "", help="Fetch all story urls found from a page. Currently supports archiveofourown.org only"), @@ -124,7 +124,7 @@ def metadata( if not download_ebook == "": format_type = get_format_type(download_ebook) else: - format_type = None + format_type = [] if input and not update_db: fic = FetchData(debug=debug, automated=automated, format_type=format_type, @@ -159,9 +159,7 @@ def metadata( if fic.exit_status == 1: typer.echo( Fore.RED + - "\nThe CLI ran into some errors! Check " + Style.RESET_ALL + - Fore.YELLOW + "err.log" + Style.RESET_ALL + Fore.RED + - " in the current directory for urls!" + Style.RESET_ALL) + "\nThe CLI ran into some errors! Check the console for the log messages!" + Style.RESET_ALL) if os.path.exists("output.log"): rm_output_log = typer.confirm( diff --git a/fichub_cli_metadata/utils/crud.py b/fichub_cli_metadata/utils/crud.py index a56b005..f7094d1 100644 --- a/fichub_cli_metadata/utils/crud.py +++ b/fichub_cli_metadata/utils/crud.py @@ -24,8 +24,9 @@ from platformdirs import PlatformDirs from . import models -from .processing import process_extraMeta, get_ins_query, sql_to_json +from .processing import get_ins_query, sql_to_json from .logging import db_not_found_log +from fichub_cli.utils.processing import process_extendedMeta app_dirs = PlatformDirs("fichub_cli", "fichub") @@ -58,7 +59,6 @@ def insert_data(db: Session, item: dict, debug: bool): def update_data(db: Session, item: dict, debug: bool): """ Execute update query for the db """ - try: with open(os.path.join(app_dirs.user_data_dir, "config.json"), 'r') as f: config = json.load(f) @@ -78,27 +78,29 @@ def update_data(db: Session, item: dict, debug: bool): tqdm.write(Fore.GREEN + "Adding metadata to the database.") else: - rated, language, genre, characters, reviews, favs, follows = process_extraMeta( - item['extraMeta']) db.query(models.Metadata).filter( models.Metadata.source == item['source']). \ update( { models.Metadata.fichub_id: item['id'], + models.Metadata.fic_id: process_extendedMeta(item,'id'), models.Metadata.title: item['title'], models.Metadata.author: item['author'], + models.Metadata.author_id: item['authorLocalId'], + models.Metadata.author_url: item['authorUrl'], models.Metadata.chapters: item['chapters'], models.Metadata.created: item['created'], models.Metadata.description: item['description'], - models.Metadata.rated: rated, - models.Metadata.language: language, - models.Metadata.genre: genre, - models.Metadata.characters: characters, - models.Metadata.reviews: reviews, - models.Metadata.favs: favs, - models.Metadata.follows: follows, + models.Metadata.rated: process_extendedMeta(item,'rated'), + models.Metadata.language: process_extendedMeta(item,'language'), + models.Metadata.genre: process_extendedMeta(item,'genres'), + models.Metadata.characters: process_extendedMeta(item,'characters'), + models.Metadata.reviews: process_extendedMeta(item,'reviews'), + models.Metadata.favorites: process_extendedMeta(item,'favorites'), + models.Metadata.follows: process_extendedMeta(item,'follows'), models.Metadata.status: item['status'], models.Metadata.words: item['words'], + models.Metadata.fandom: process_extendedMeta(item,'raw_fandom'), models.Metadata.fic_last_updated: datetime.strptime(item['updated'], r'%Y-%m-%dT%H:%M:%S').strftime( config['fic_up_time_format']), models.Metadata.db_last_updated: datetime.now().astimezone().strftime(config['db_up_time_format']), @@ -196,6 +198,59 @@ def add_db_last_updated_column(db: Session, db_backup, debug: bool): db.execute("DROP TABLE TempFichubMetadata;") db.commit() +def add_rawExtendedMeta_columns(db: Session, db_backup, debug: bool): + """ To add fic_id, author_id, author_url, fandom columns + """ + cols_list = ['fic_id','author_id','author_url','fandom'] + for col in cols_list: + col_exists = False + try: + db.execute(f"SELECT {col} from fichub_metadata;") + col_exists = True + except OperationalError as e: + if debug: + logger.error(e) + pass + if not col_exists: + tqdm.write( + Fore.GREEN + f"{col} column not found! Migrating the database.") + # backup the db before migrating the data + db_backup("pre.migration") + + if debug: + logger.info(f"Migration: adding {col} column") + tqdm.write(Fore.GREEN + f"Migration: adding {col} column") + + db.execute(f"ALTER TABLE fichub_metadata ADD {col} TEXT DEFAULT '';") + db.commit() + + + +def rename_favs_column(db: Session, db_backup, debug: bool): + """ To rename favs column to favorites + """ + + col_exists = False + try: + db.execute("SELECT favorites from fichub_metadata;") + col_exists = True + except OperationalError as e: + if debug: + logger.error(e) + pass + if not col_exists: + tqdm.write( + Fore.GREEN + "Database Schema changes detected! Migrating the database.") + # backup the db before migrating the data + db_backup("pre.migration") + + if debug: + logger.info("Migration: renaming favs column to favorites") + tqdm.write(Fore.GREEN + "Migration: renaming favs column to favorites") + + db.execute("ALTER TABLE fichub_metadata RENAME COLUMN favs TO favorites;") + db.commit() + def drop_TempFichubMetadata(db: Session): try: diff --git a/fichub_cli_metadata/utils/fetch_data.py b/fichub_cli_metadata/utils/fetch_data.py index e940ba7..7285faf 100644 --- a/fichub_cli_metadata/utils/fetch_data.py +++ b/fichub_cli_metadata/utils/fetch_data.py @@ -12,12 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from fichub_cli_metadata import __version__ as plugin_version -from fichub_cli.utils.processing import check_url, save_data, \ - urls_preprocessing, check_output_log, build_changelog -from fichub_cli.utils.logging import download_processing_log, verbose_log -from .processing import init_database, get_db, object_as_dict,\ - prompt_user_contact from . import models, crud import os import sys @@ -32,13 +26,21 @@ import re import requests from bs4 import BeautifulSoup +import traceback from sqlalchemy.exc import OperationalError from sqlalchemy.orm import Session -from .fichub import FicHub +from fichub_cli.utils.fichub import FicHub from .logging import meta_fetched_log, db_not_found_log +from fichub_cli_metadata import __version__ as plugin_version +from fichub_cli.utils.processing import check_url, save_data, \ + urls_preprocessing, build_changelog +from fichub_cli.utils.logging import download_processing_log, verbose_log +from .processing import init_database, get_db, object_as_dict,\ + prompt_user_contact + bar_format = "{l_bar}{bar}| {n_fmt}/{total_fmt}, {rate_fmt}{postfix}, ETA: {remaining}" console = Console() @@ -135,17 +137,15 @@ def save_metadata(self, input: str): try: # if --download-ebook flag used - if self.format_type is not None: + if self.format_type: self.exit_status, self.url_exit_status = save_data( - self.out_dir, fic.file_name, - fic.download_url, self.debug, self.force, - fic.cache_hash, self.exit_status, - self.automated) + self.out_dir, fic.files, self.debug, self.force, + self.exit_status, self.automated) # save the data to db - if fic.fic_metadata: + if fic.files["meta"]: meta_fetched_log(self.debug, url) - self.save_to_db(fic.fic_metadata) + self.save_to_db(fic.files["meta"]) with open("output.log", "a") as file: file.write(f"{url}\n") @@ -167,7 +167,9 @@ def save_metadata(self, input: str): pbar.update(1) # if fic doesnt exist or the data is not fetched by the API yet - except AttributeError: + except Exception as e: + if self.debug: + logger.error(str(traceback.format_exc())) with open("err.log", "a") as file: file.write(url.strip()+"\n") self.exit_status = 1 @@ -209,7 +211,7 @@ def save_to_db(self, item): models.Base.metadata.create_all(bind=self.engine) except OperationalError as e: if self.debug: - logger.info(Fore.RED + str(e)) + logger.error(Fore.RED + str(e)) db_not_found_log(self.debug, self.db_file) sys.exit(1) @@ -258,10 +260,11 @@ def update_metadata(self): urls_input.append(row_dict['source']) try: - urls = check_output_log(urls_input, self.debug) - + urls, _ = urls_preprocessing(urls_input, self.debug) # if output.log doesnt exist, when run 1st time - except FileNotFoundError: + except FileNotFoundError as e: + if self.debug: + logger.error(str(traceback.format_exc())) urls = urls_input downloaded_urls, no_updates_urls, err_urls = [], [], [] @@ -281,18 +284,16 @@ def update_metadata(self): try: # if --download-ebook flag used - if self.format_type is not None: + if self.format_type: self.exit_status, self.url_exit_status = save_data( - self.out_dir, fic.file_name, - fic.download_url, self.debug, self.force, - fic.cache_hash, self.exit_status, - self.automated) + self.out_dir, fic.files, self.debug, self.force, + self.exit_status, self.automated) # update the metadata - if fic.fic_metadata: + if fic.files["meta"]: meta_fetched_log(self.debug, url) self.exit_status, self.url_exit_status = crud.update_data( - self.db, fic.fic_metadata, self.debug) + self.db, fic.files["meta"], self.debug) with open("output.log", "a") as file: file.write(f"{url}\n") @@ -313,7 +314,9 @@ def update_metadata(self): pbar.update(1) # if fic doesnt exist or the data is not fetched by the API yet - except AttributeError: + except Exception as e: + if self.debug: + logger.error(str(traceback.format_exc())) with open("err.log", "a") as file: file.write(url+"\n") err_urls.append(url) @@ -377,7 +380,11 @@ def run_migrations(self): crud.add_fichub_id_column(self.db, self.db_backup, self.debug) crud.add_db_last_updated_column( self.db, self.db_backup, self.debug) - + crud.add_rawExtendedMeta_columns( + self.db, self.db_backup, self.debug) + crud.rename_favs_column( + self.db, self.db_backup, self.debug) + except OperationalError as e: if self.debug: logger.info(Fore.RED + str(e)) diff --git a/fichub_cli_metadata/utils/fichub.py b/fichub_cli_metadata/utils/fichub.py deleted file mode 100644 index 1126cde..0000000 --- a/fichub_cli_metadata/utils/fichub.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2022 Arbaaz Laskar - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import requests -from requests.adapters import HTTPAdapter - -from colorama import Fore, Style -from tqdm import tqdm -from loguru import logger -import time -import re - -from fichub_cli.utils.fichub import retry_strategy, FicHub as Fichub_Base -from fichub_cli import __version__ as core_version -from fichub_cli_metadata import __version__ as plugin_version - -headers = { - 'User-Agent': f'fichub_cli_metadata/{plugin_version} (fichub_cli: {core_version})' -} - - -class FicHub(Fichub_Base): - def __init__(self, debug, automated, exit_status): - self.debug = debug - self.automated = automated - self.exit_status = exit_status - adapter = HTTPAdapter(max_retries=retry_strategy) - self.http = requests.Session() - self.http.mount("https://", adapter) - self.http.mount("http://", adapter) - - def get_fic_metadata(self, url: str, format_type: int = 0): - """ **OVERRIDING FUNCTION**\n - Sends GET request to Fichub API to fetch the metadata - """ - params = {'q': url} - if self.automated: # for internal testing - params['automated'] = 'true' - if self.debug: - logger.debug( - "--automated flag was passed. Internal Testing mode is on.") - - for _ in range(2): - try: - response = self.http.get( - "https://fichub.net/api/v0/epub", params=params, - allow_redirects=True, headers=headers, timeout=(6.1, 300) - ) - if self.debug: - logger.debug( - f"GET: {response.status_code}: {response.url}") - if self.automated: - logger.debug( - f"Headers: {response.request.headers}") - break - except (ConnectionError, TimeoutError, Exception) as e: - if self.debug: - logger.error(str(e)) - tqdm.write("\n" + Fore.RED + str(e) + Style.RESET_ALL + - Fore.GREEN + "\nWill retry in 3s!" + - Style.RESET_ALL) - time.sleep(3) - - try: - self.response = response.json() - self.fic_metadata = self.response['meta'] - if format_type == 0: - cache_url = self.response['epub_url'] - self.cache_hash = ( - re.search(r"\?h=(.*)", self.response['epub_url'])).group(1) - self.file_format = ".epub" - - elif format_type == 1: - cache_url = self.response['mobi_url'] - self.cache_hash = ( - re.search(r"\?h=(.*)", self.response['epub_url'])).group(1) - self.file_format = ".mobi" - - elif format_type == 2: - cache_url = self.response['pdf_url'] - self.cache_hash = ( - re.search(r"\?h=(.*)", self.response['epub_url'])).group(1) - self.file_format = ".pdf" - - elif format_type == 3: - cache_url = self.response['html_url'] - self.cache_hash = ( - re.search(r"\?h=(.*)", self.response['epub_url'])).group(1) - self.file_format = ".zip" - - else: # break the function if format_type is None - return - - self.file_name = self.response['epub_url'].split( - "/")[4].split("?")[0] - self.file_name = self.file_name.replace(".epub", self.file_format) - self.download_url = "https://fichub.net"+cache_url - - # Error: 'epub_url' - # Reason: Unsupported URL - except (KeyError, UnboundLocalError) as e: - if self.debug: - logger.error(str(e)) - - self.exit_status = 1 - if self.debug: - logger.error( - f"Skipping unsupported URL: {url}") - tqdm.write( - Fore.RED + f"\nSkipping unsupported URL: {url}" + - Style.RESET_ALL + Fore.CYAN + - "\nTo see the supported site list, use " + Fore.YELLOW + - "fichub_cli -ss" + Style.RESET_ALL + Fore.CYAN + - "\nReport the error if the URL is supported!\n") diff --git a/fichub_cli_metadata/utils/models.py b/fichub_cli_metadata/utils/models.py index 8a0e1e6..a32fab9 100644 --- a/fichub_cli_metadata/utils/models.py +++ b/fichub_cli_metadata/utils/models.py @@ -22,9 +22,12 @@ class Metadata(Base): __tablename__ = "fichub_metadata" id = Column(Integer, primary_key=True) + fic_id = Column(Integer) fichub_id = Column(String) title = Column(String) author = Column(String, index=True) + author_id = Column(Integer) + author_url = Column(String) chapters = Column(Integer) created = Column(String) description = Column(String) @@ -33,10 +36,11 @@ class Metadata(Base): genre = Column(String) characters = Column(String) reviews = Column(Integer) - favs = Column(Integer) + favorites = Column(Integer) follows = Column(Integer) status = Column(String) words = Column(Integer) + fandom = Column(String) fic_last_updated = Column(String) db_last_updated = Column(String) source = Column(String) diff --git a/fichub_cli_metadata/utils/processing.py b/fichub_cli_metadata/utils/processing.py index 86c27c3..9dba30d 100644 --- a/fichub_cli_metadata/utils/processing.py +++ b/fichub_cli_metadata/utils/processing.py @@ -21,6 +21,7 @@ from sqlalchemy import create_engine, inspect from sqlalchemy.orm import sessionmaker from platformdirs import PlatformDirs +from fichub_cli.utils.processing import process_extendedMeta from . import models app_dirs = PlatformDirs("fichub_cli", "fichub") @@ -44,68 +45,6 @@ def get_db(SessionLocal): db.close() -def process_extraMeta(extraMeta: str): - """ Process the extraMetadata string and return - fields like language, genre etc - """ - try: - extraMeta = extraMeta.split(' - ') - except AttributeError: - tqdm.write(Fore.RED + - "'extraMetadata' key not found in the API response. Adding Null for missing fields.") - extraMeta = [''] - pass - - for x in extraMeta: - if x.strip().startswith("Rated:"): - rated = x.replace('Rated:', '').strip() - break - else: - rated = None - - for x in extraMeta: - if x.strip().startswith("Language:"): - language = x.replace('Language:', '').strip() - break - else: - language = None - - for x in extraMeta: - if x.strip().startswith("Genre:"): - genre = x.replace('Genre:', '').strip() - break - else: - genre = None - for x in extraMeta: - if x.strip().startswith("Characters:"): - characters = x.replace('Characters:', '').strip() - break - else: - characters = None - for x in extraMeta: - if x.strip().startswith("Reviews:"): - reviews = x.replace('Reviews:', '').strip() - break - else: - reviews = None - - for x in extraMeta: - if x.strip().startswith("Favs:"): - favs = x.replace('Favs:', '').strip() - break - else: - favs = None - - for x in extraMeta: - if x.strip().startswith("Follows:"): - follows = x.replace('Follows:', '').strip() - break - else: - follows = None - - return rated, language, genre, characters, reviews, favs, follows - - def get_ins_query(item: dict): """ Return the insert query for the db model """ @@ -118,25 +57,26 @@ def get_ins_query(item: dict): Fore.GREEN + "Run `fichub_cli --config-init` to initialize the CLI config") exit(1) - rated, language, genre, characters, reviews, favs, follows = process_extraMeta( - item['extraMeta']) - query = models.Metadata( fichub_id=item['id'], + fic_id = process_extendedMeta(item,'id'), title=item['title'], author=item['author'], + author_id=item['authorLocalId'], + author_url=item['authorUrl'], chapters=item['chapters'], created=item['created'], description=item['description'], - rated=rated, - language=language, - genre=genre, - characters=characters, - reviews=reviews, - favs=favs, - follows=follows, + rated=process_extendedMeta(item,'rated'), + language=process_extendedMeta(item,'language'), + genre=process_extendedMeta(item,'genres'), + characters=process_extendedMeta(item,'characters'), + reviews=process_extendedMeta(item,'reviews'), + favorites=process_extendedMeta(item,'favorites'), + follows=process_extendedMeta(item,'follows'), status=item['status'], words=item['words'], + fandom=process_extendedMeta(item,'raw_fandom'), fic_last_updated=datetime.strptime(item['updated'], r'%Y-%m-%dT%H:%M:%S').strftime( config['fic_up_time_format']), db_last_updated=datetime.now().astimezone().strftime( diff --git a/setup.py b/setup.py index 4af08ca..d3fb086 100644 --- a/setup.py +++ b/setup.py @@ -10,21 +10,22 @@ description="A metadata plugin for fetching Metadata from the Fichub API for the fichub-cli", long_description=long_description, long_description_content_type="text/markdown", - version='0.5.1', + version='0.6.0', license='Apache License', url="https://github.com/fichub-cli-contrib/fichub-cli-metadata", packages=find_packages( include=['fichub_cli_metadata', 'fichub_cli_metadata.*']), include_package_data=True, install_requires=[ - 'fichub-cli>=0.7.0', + 'fichub-cli>=0.8.2', 'rich>=10.4.0', 'sqlalchemy>=1.4.31' ], - entry_points=''' - [console_scripts] - fichub_cli_metadata=fichub_cli_metadata.cli:app - ''', + entry_points= { + 'console_scripts': [ + 'fichub_cli_metadata=fichub_cli_metadata.cli:app' + ] + }, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", diff --git a/tests/test_cli.py b/tests/test_cli.py index c6acf30..3a1f2dc 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,7 +22,7 @@ def test_cli_url_input(tmpdir): runner = CliRunner() with runner.isolated_filesystem(): result = runner.invoke(app, [ - '-ai', 'https://www.fanfiction.net/s/12933896/1/Things-you-cannot-leave-behind', '-d']) + '-ai', 'https://www.fanfiction.net/s/11783284/1/Doppelgängland', '-d']) assert not result.exception assert result.exit_code == 0 @@ -35,4 +35,4 @@ def test_cli_version(): assert not result.exception assert result.exit_code == 0 - assert result.output.strip() == 'fichub-cli-metadata: v0.5.1' + assert result.output.strip() == 'fichub-cli-metadata: v0.6.0'