Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various file and exception handling changes #34

Merged
merged 2 commits into from
Dec 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,11 @@ ENV/
test.py

*.idea
*.db
*.db

#Vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
3 changes: 3 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"default_temporary_chapter_cleanup_behavior": true
}
11 changes: 7 additions & 4 deletions getify.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def clean(file_name_in, file_name_out, start):
for a in content.find_all("a"):
a.decompose()
raw.close()
file = open(file_name_out + ".xhtml", "w", encoding = "utf8")
file = open(file_name_out, "w", encoding = "utf8")
file.write('<html xmlns="http://www.w3.org/1999/xhtml">')
file.write("\n<head>")
file.write("\n<title>" + chapter_title + "</title>")
Expand Down Expand Up @@ -119,7 +119,7 @@ def cover_generator(src, starting, ending):
Sidenote: Will take a lot of time."""


def generate(html_files, novelname, author, chaptername, chapter_s, chapter_e):
def generate(html_files, novelname, author, chaptername, chapter_s, chapter_e, cleanup=True):
epub = zipfile.ZipFile(novelname + "_" + chapter_s + "-" + chapter_e + ".epub", "w")

# The first file must be named "mimetype"
Expand Down Expand Up @@ -212,5 +212,8 @@ def generate(html_files, novelname, author, chaptername, chapter_s, chapter_e):


#removes all the temporary files
for x in html_files:
os.remove(x)
if cleanup:
print("Cleaning up...")
for html_file in os.listdir(novelname):
os.remove(os.path.join(novelname, html_file))
os.rmdir(novelname)
120 changes: 74 additions & 46 deletions wuxiaworld2ebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
import tkinter as tk
from tkinter import ttk
from urllib.error import HTTPError, URLError
import os
import json
from os import path

#Initializing Stuff

main = tk.Tk()
main.title("Wuxiaworld-2-eBook")
main.geometry("375x150")
main.geometry("535x190")
main.resizable(True, True)
app = tk.Frame(main)
app.grid()
Expand All @@ -26,54 +29,66 @@ def on_field_change(index, value, op):
pass

def button_press():
#Getting Information
generate_button.configure(state = "disabled")
s_chapter = starting_chapter.get()
reset = str(s_chapter)
e_chapter = ending_chapter.get()
booknr = book_number.get()
name = novel.get()

#Getting relevant novel Information
raw_info = []
for i in db:
if name in i[0]:
raw_info.append(i)
raw_info = raw_info[0]
if raw_info[5] == 0:
link = raw_info[1]
else:
link = raw_info[1] + str(booknr) + "-chapter-"

#Generating list with download links
bulk_list = []
for s_chapter in range(s_chapter, e_chapter + 1):
bulk_list.append(link + str(s_chapter))
s_chapter = reset

getify.cover_generator(raw_info[4], s_chapter, str(e_chapter))

#Calls function's for downloading, cleanup and managing
#a list of file name's for cleanup, ToC and packing
y = int(s_chapter)
file_list = []
for x in range(len(bulk_list)):
try:
getify.download(bulk_list[x], str(s_chapter) + ".xhtml")
except HTTPError as e:
# Return code error (e.g. 404, 501, ...)
print('URL: {}, HTTPError: {} - {}'.format(bulk_list[x], e.code, e.reason))
except URLError as e:
# Not an HTTP-specific error (e.g. connection refused)
print('URL: {}, URLError: {}'.format(bulk_list[x], e.reason))
try:
#Getting Information
generate_button.configure(state = "disabled")
s_chapter = starting_chapter.get()
reset = str(s_chapter)
e_chapter = ending_chapter.get()
cleanup = delete_chapters.get()
booknr = book_number.get()
name = novel.get()

#Getting relevant novel Information
raw_info = []
for i in db:
if name in i[0]:
raw_info.append(i)
raw_info = raw_info[0]
if raw_info[5] == 0:
link = raw_info[1]
else:
getify.clean(str(s_chapter) + ".xhtml", raw_info[2] + str(s_chapter), name)
file_list.append(raw_info[2] + str(s_chapter) + ".xhtml")
link = raw_info[1] + str(booknr) + "-chapter-"

#Generating list with download links
bulk_list = []
for s_chapter in range(s_chapter, e_chapter + 1):
bulk_list.append(link + str(s_chapter))
s_chapter = reset

getify.cover_generator(raw_info[4], s_chapter, str(e_chapter))

if not path.exists(raw_info[0]):
os.makedirs(raw_info[0])

#Calls function's for downloading, cleanup and managing
#a list of file name's for cleanup, ToC and packing
y = int(s_chapter)
file_list = []
for x in range(len(bulk_list)):
if path.exists(_get_xhtml_path(raw_info, s_chapter)):
print(_get_xhtml_path(raw_info, s_chapter), " already exists")
else:
try:
getify.download(bulk_list[x], str(s_chapter) + ".xhtml")
except HTTPError as e:
# Return code error (e.g. 404, 501, ...)
print('URL: {}, HTTPError: {} - {}'.format(bulk_list[x], e.code, e.reason))
except URLError as e:
# Not an HTTP-specific error (e.g. connection refused)
print('URL: {}, URLError: {}'.format(bulk_list[x], e.reason))
else:
getify.clean(str(s_chapter) + ".xhtml", _get_xhtml_path(raw_info, s_chapter), name)
file_list.append(_get_xhtml_path(raw_info, s_chapter))
s_chapter = int(s_chapter) + 1

getify.generate(file_list, raw_info[0], raw_info[3], raw_info[2], reset, str(e_chapter))
generate_button.configure(state = "enabled")
getify.generate(file_list, raw_info[0], raw_info[3], raw_info[2], reset, str(e_chapter), cleanup=cleanup)

finally:
generate_button.configure(state = "enabled")

def _get_xhtml_path(raw_info, s_chapter, extension=".xhtml"):
return path.join(raw_info[0], raw_info[2] + str(s_chapter) + extension)

#Getting information from Database
c.execute("SELECT * FROM 'Information'")
Expand All @@ -86,6 +101,11 @@ def button_press():
hasbook.append(i[0])
namelist.sort()

#Load config
config_file = open("config.json", "r")
app_config = json.loads(config_file.read())
config_file.close()

#Code for the Combobox and the label
label1 = ttk.Label(app, text = "Select Novel:")
label1.grid(column = 0, row = 0, pady = 10, sticky = "W")
Expand Down Expand Up @@ -117,9 +137,17 @@ def button_press():
ending_chapter_chosen = ttk.Entry(app, width = 5, textvariable = ending_chapter)
ending_chapter_chosen.grid(column = 1, row = 3, sticky = "W")

#Code for delete chapters
label5 = ttk.Label(app, text = "Delete temporary chapter files after download: ")
label5.grid(column = 0, row = 4, pady = 10, sticky = "W")
delete_chapters = tk.BooleanVar()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change the default value of delete_chapters to True, so the default behaviour still deletes the temporary files?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a fan of having all the chapters deleted by default, but it is indeed a breaking change, so I added a config.json file with a boolean that is read when teh program launches so the user can configure it at will.

Main reason I'm not a fan of the deletion by default is because 1, it stresses wuxia servers if the user created a book from chapter 10 to 879 has has to restart to have 1-879, and 2, because it can take a while to get all the chapters. I think it took 20 minutes for me to get all the currently released Overgeared chapters.

This ties into the fact that I think it could be cool to have the option to split by "volume" , which could let the user say to split into parts of 300 chapters each. Allowing them to keep the temp chapters would let them "complete" a volume once more chapters come out.

Of course, by keeping a cache of chapter you lose the chance of getting a "revised" chapter with a typo correction and whatnot, but such revisions are probably so infrequent people wont care.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I think I never downloaded a chapter twice except for testing. I just assumed that this would be only practical for a few people and clutter up the harddrive for the rest.

But the config file should be the most optimal solution.

delete_chapters.set(app_config["default_temporary_chapter_cleanup_behavior"])
delete_chapters_chosen = ttk.Checkbutton(app, variable = delete_chapters, offvalue=False, onvalue=True)
delete_chapters_chosen.grid(column =1, row = 4, sticky = "W")

#Code for "Generate" button
generate_button = ttk.Button(app, text = "Generate", command = button_press)
generate_button.grid(column = 1, row = 4, sticky = "E")
generate_button.grid(column = 1, row = 5, sticky = "E")



Expand Down