Skip to content

Commit

Permalink
Multiple ebook formats & ebook_name config (#5)
Browse files Browse the repository at this point in the history
* Bump version: 0.5.1 → 0.5.2

Added the feature to download multiple ebook formats simultaneously

* fix: Error when --download-ebook flag is false

* Bump version: 0.5.2 → 0.5.3

Added fic_id, author_id, author_url & fandom columns and renamed favs column to favorites

* cleanup: dead code

* fix: URL encoding issue

* fix: File hash issue

Refactored the code in `fichub.py` to save the hash & download_url
for the fic correctly

* refactor: Removed fichub.py override

Moved the `response["meta"]` code to the core CLI so the override
is not needed anymore.
The plugin now depends on fichub-cli>=0.8.2

* fix: KeyError 'meta' for fics which do not exist

Changed the Exception Handler from catching only Attribute Errors
to all Errors

* refactor: Added traceback to the error msgs

* Bump version: 0.5.3 → 0.5.4

Added priority based processing for the rawExtendedMeta & extraMeta
where it will process it in this order:
rawExtendedMeta >> extraMeta >> None

* fix: Import error

* fix: genres keyname

* Bump version: 0.5.4 → 0.6.0
  • Loading branch information
arzkar authored Jan 28, 2023
1 parent 8aa7ca3 commit e427fdb
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 253 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.1
current_version = 0.6.0
commit = True
tag = False
parse = ^
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ __pycache__/

# ebooks
*.epub
*.mobi
*.pdf

# C extensions
*.so
Expand Down Expand Up @@ -143,5 +145,6 @@ dmypy.json

# project-extras
*.txt
*.lst
*.sqlite
*.json
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ fichub_cli metadata --input-db "urls - 2022-01-29 T000558.sqlite" --export-db
- To download the ebook along with the metadata

```
fichub_cli metadata -i urls.txt --download-ebook epub
fichub_cli metadata -i urls.txt --download-ebook epub,mobi
```

- To get all story urls found from a page. Currently supports archiveofourown.org only.
Expand Down
2 changes: 1 addition & 1 deletion fichub_cli_metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# limitations under the License.

# __version__ at the top to prevent ImportError: ... partially initialized module ...
__version__ = "0.5.1"
__version__ = "0.6.0"

from .cli import app # entry_point
8 changes: 3 additions & 5 deletions fichub_cli_metadata/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def metadata(
"", "-o", "--out-dir", help="Path to the Output directory (default: Current Directory)"),

download_ebook: str = typer.Option(
"", "--download-ebook", help="Download the ebook as well. Specify the format: epub (default), mobi, pdf or html"),
"", "--download-ebook", help="Download the ebook as well. Specify the format, comma separated if multiple: epub (default), mobi, pdf or html"),

fetch_urls: str = typer.Option(
"", help="Fetch all story urls found from a page. Currently supports archiveofourown.org only"),
Expand Down Expand Up @@ -124,7 +124,7 @@ def metadata(
if not download_ebook == "":
format_type = get_format_type(download_ebook)
else:
format_type = None
format_type = []

if input and not update_db:
fic = FetchData(debug=debug, automated=automated, format_type=format_type,
Expand Down Expand Up @@ -159,9 +159,7 @@ def metadata(
if fic.exit_status == 1:
typer.echo(
Fore.RED +
"\nThe CLI ran into some errors! Check " + Style.RESET_ALL +
Fore.YELLOW + "err.log" + Style.RESET_ALL + Fore.RED +
" in the current directory for urls!" + Style.RESET_ALL)
"\nThe CLI ran into some errors! Check the console for the log messages!" + Style.RESET_ALL)

if os.path.exists("output.log"):
rm_output_log = typer.confirm(
Expand Down
77 changes: 66 additions & 11 deletions fichub_cli_metadata/utils/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
from platformdirs import PlatformDirs

from . import models
from .processing import process_extraMeta, get_ins_query, sql_to_json
from .processing import get_ins_query, sql_to_json
from .logging import db_not_found_log
from fichub_cli.utils.processing import process_extendedMeta

app_dirs = PlatformDirs("fichub_cli", "fichub")

Expand Down Expand Up @@ -58,7 +59,6 @@ def insert_data(db: Session, item: dict, debug: bool):
def update_data(db: Session, item: dict, debug: bool):
""" Execute update query for the db
"""

try:
with open(os.path.join(app_dirs.user_data_dir, "config.json"), 'r') as f:
config = json.load(f)
Expand All @@ -78,27 +78,29 @@ def update_data(db: Session, item: dict, debug: bool):
tqdm.write(Fore.GREEN +
"Adding metadata to the database.")
else:
rated, language, genre, characters, reviews, favs, follows = process_extraMeta(
item['extraMeta'])
db.query(models.Metadata).filter(
models.Metadata.source == item['source']). \
update(
{
models.Metadata.fichub_id: item['id'],
models.Metadata.fic_id: process_extendedMeta(item,'id'),
models.Metadata.title: item['title'],
models.Metadata.author: item['author'],
models.Metadata.author_id: item['authorLocalId'],
models.Metadata.author_url: item['authorUrl'],
models.Metadata.chapters: item['chapters'],
models.Metadata.created: item['created'],
models.Metadata.description: item['description'],
models.Metadata.rated: rated,
models.Metadata.language: language,
models.Metadata.genre: genre,
models.Metadata.characters: characters,
models.Metadata.reviews: reviews,
models.Metadata.favs: favs,
models.Metadata.follows: follows,
models.Metadata.rated: process_extendedMeta(item,'rated'),
models.Metadata.language: process_extendedMeta(item,'language'),
models.Metadata.genre: process_extendedMeta(item,'genres'),
models.Metadata.characters: process_extendedMeta(item,'characters'),
models.Metadata.reviews: process_extendedMeta(item,'reviews'),
models.Metadata.favorites: process_extendedMeta(item,'favorites'),
models.Metadata.follows: process_extendedMeta(item,'follows'),
models.Metadata.status: item['status'],
models.Metadata.words: item['words'],
models.Metadata.fandom: process_extendedMeta(item,'raw_fandom'),
models.Metadata.fic_last_updated: datetime.strptime(item['updated'], r'%Y-%m-%dT%H:%M:%S').strftime(
config['fic_up_time_format']),
models.Metadata.db_last_updated: datetime.now().astimezone().strftime(config['db_up_time_format']),
Expand Down Expand Up @@ -196,6 +198,59 @@ def add_db_last_updated_column(db: Session, db_backup, debug: bool):
db.execute("DROP TABLE TempFichubMetadata;")
db.commit()

def add_rawExtendedMeta_columns(db: Session, db_backup, debug: bool):
""" To add fic_id, author_id, author_url, fandom columns
"""
cols_list = ['fic_id','author_id','author_url','fandom']
for col in cols_list:
col_exists = False
try:
db.execute(f"SELECT {col} from fichub_metadata;")
col_exists = True
except OperationalError as e:
if debug:
logger.error(e)
pass
if not col_exists:
tqdm.write(
Fore.GREEN + f"{col} column not found! Migrating the database.")
# backup the db before migrating the data
db_backup("pre.migration")

if debug:
logger.info(f"Migration: adding {col} column")
tqdm.write(Fore.GREEN + f"Migration: adding {col} column")

db.execute(f"ALTER TABLE fichub_metadata ADD {col} TEXT DEFAULT '';")
db.commit()



def rename_favs_column(db: Session, db_backup, debug: bool):
""" To rename favs column to favorites
"""

col_exists = False
try:
db.execute("SELECT favorites from fichub_metadata;")
col_exists = True
except OperationalError as e:
if debug:
logger.error(e)
pass
if not col_exists:
tqdm.write(
Fore.GREEN + "Database Schema changes detected! Migrating the database.")
# backup the db before migrating the data
db_backup("pre.migration")

if debug:
logger.info("Migration: renaming favs column to favorites")
tqdm.write(Fore.GREEN + "Migration: renaming favs column to favorites")

db.execute("ALTER TABLE fichub_metadata RENAME COLUMN favs TO favorites;")
db.commit()


def drop_TempFichubMetadata(db: Session):
try:
Expand Down
63 changes: 35 additions & 28 deletions fichub_cli_metadata/utils/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from fichub_cli_metadata import __version__ as plugin_version
from fichub_cli.utils.processing import check_url, save_data, \
urls_preprocessing, check_output_log, build_changelog
from fichub_cli.utils.logging import download_processing_log, verbose_log
from .processing import init_database, get_db, object_as_dict,\
prompt_user_contact
from . import models, crud
import os
import sys
Expand All @@ -32,13 +26,21 @@
import re
import requests
from bs4 import BeautifulSoup
import traceback

from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import Session

from .fichub import FicHub
from fichub_cli.utils.fichub import FicHub
from .logging import meta_fetched_log, db_not_found_log

from fichub_cli_metadata import __version__ as plugin_version
from fichub_cli.utils.processing import check_url, save_data, \
urls_preprocessing, build_changelog
from fichub_cli.utils.logging import download_processing_log, verbose_log
from .processing import init_database, get_db, object_as_dict,\
prompt_user_contact


bar_format = "{l_bar}{bar}| {n_fmt}/{total_fmt}, {rate_fmt}{postfix}, ETA: {remaining}"
console = Console()
Expand Down Expand Up @@ -135,17 +137,15 @@ def save_metadata(self, input: str):

try:
# if --download-ebook flag used
if self.format_type is not None:
if self.format_type:
self.exit_status, self.url_exit_status = save_data(
self.out_dir, fic.file_name,
fic.download_url, self.debug, self.force,
fic.cache_hash, self.exit_status,
self.automated)
self.out_dir, fic.files, self.debug, self.force,
self.exit_status, self.automated)

# save the data to db
if fic.fic_metadata:
if fic.files["meta"]:
meta_fetched_log(self.debug, url)
self.save_to_db(fic.fic_metadata)
self.save_to_db(fic.files["meta"])

with open("output.log", "a") as file:
file.write(f"{url}\n")
Expand All @@ -167,7 +167,9 @@ def save_metadata(self, input: str):
pbar.update(1)

# if fic doesnt exist or the data is not fetched by the API yet
except AttributeError:
except Exception as e:
if self.debug:
logger.error(str(traceback.format_exc()))
with open("err.log", "a") as file:
file.write(url.strip()+"\n")
self.exit_status = 1
Expand Down Expand Up @@ -209,7 +211,7 @@ def save_to_db(self, item):
models.Base.metadata.create_all(bind=self.engine)
except OperationalError as e:
if self.debug:
logger.info(Fore.RED + str(e))
logger.error(Fore.RED + str(e))
db_not_found_log(self.debug, self.db_file)
sys.exit(1)

Expand Down Expand Up @@ -258,10 +260,11 @@ def update_metadata(self):
urls_input.append(row_dict['source'])

try:
urls = check_output_log(urls_input, self.debug)

urls, _ = urls_preprocessing(urls_input, self.debug)
# if output.log doesnt exist, when run 1st time
except FileNotFoundError:
except FileNotFoundError as e:
if self.debug:
logger.error(str(traceback.format_exc()))
urls = urls_input

downloaded_urls, no_updates_urls, err_urls = [], [], []
Expand All @@ -281,18 +284,16 @@ def update_metadata(self):

try:
# if --download-ebook flag used
if self.format_type is not None:
if self.format_type:
self.exit_status, self.url_exit_status = save_data(
self.out_dir, fic.file_name,
fic.download_url, self.debug, self.force,
fic.cache_hash, self.exit_status,
self.automated)
self.out_dir, fic.files, self.debug, self.force,
self.exit_status, self.automated)

# update the metadata
if fic.fic_metadata:
if fic.files["meta"]:
meta_fetched_log(self.debug, url)
self.exit_status, self.url_exit_status = crud.update_data(
self.db, fic.fic_metadata, self.debug)
self.db, fic.files["meta"], self.debug)

with open("output.log", "a") as file:
file.write(f"{url}\n")
Expand All @@ -313,7 +314,9 @@ def update_metadata(self):
pbar.update(1)

# if fic doesnt exist or the data is not fetched by the API yet
except AttributeError:
except Exception as e:
if self.debug:
logger.error(str(traceback.format_exc()))
with open("err.log", "a") as file:
file.write(url+"\n")
err_urls.append(url)
Expand Down Expand Up @@ -377,7 +380,11 @@ def run_migrations(self):
crud.add_fichub_id_column(self.db, self.db_backup, self.debug)
crud.add_db_last_updated_column(
self.db, self.db_backup, self.debug)

crud.add_rawExtendedMeta_columns(
self.db, self.db_backup, self.debug)
crud.rename_favs_column(
self.db, self.db_backup, self.debug)

except OperationalError as e:
if self.debug:
logger.info(Fore.RED + str(e))
Expand Down
Loading

0 comments on commit e427fdb

Please sign in to comment.