-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
537 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import os | ||
import html | ||
import pickle | ||
import numpy as np | ||
import xml.etree.cElementTree as ElementTree | ||
|
||
""" | ||
- all points: | ||
>> [[x1, y1, e1], ..., [xn, yn, en]] | ||
- indexed values | ||
>> [h1, ... hn] | ||
""" | ||
|
||
|
||
def distance(p1, p2, axis=None): | ||
return np.sqrt(np.sum(np.square(p1 - p2), axis=axis)) | ||
|
||
|
||
def clear_middle(pts): | ||
to_remove = set() | ||
for i in range(1, len(pts) - 1): | ||
p1, p2, p3 = pts[i - 1: i + 2, :2] | ||
dist = distance(p1, p2) + distance(p2, p3) | ||
if dist > 1500: | ||
to_remove.add(i) | ||
npts = [] | ||
for i in range(len(pts)): | ||
if i not in to_remove: | ||
npts += [pts[i]] | ||
return np.array(npts) | ||
|
||
|
||
def separate(pts): | ||
seps = [] | ||
for i in range(0, len(pts) - 1): | ||
if distance(pts[i], pts[i+1]) > 600: | ||
seps += [i + 1] | ||
return [pts[b:e] for b, e in zip([0] + seps, seps + [len(pts)])] | ||
|
||
|
||
def main(): | ||
data = [] | ||
charset = set() | ||
|
||
file_no = 0 | ||
for root, dirs, files in os.walk('.'): | ||
for file in files: | ||
file_name, extension = os.path.splitext(file) | ||
if extension == '.xml': | ||
file_no += 1 | ||
print('[{:5d}] File {} -- '.format(file_no, os.path.join(root, file)), end='') | ||
xml = ElementTree.parse(os.path.join(root, file)).getroot() | ||
transcription = xml.findall('Transcription') | ||
if not transcription: | ||
print('skipped') | ||
continue | ||
texts = [html.unescape(s.get('text')) for s in transcription[0].findall('TextLine')] | ||
points = [s.findall('Point') for s in xml.findall('StrokeSet')[0].findall('Stroke')] | ||
strokes = [] | ||
mid_points = [] | ||
for ps in points: | ||
pts = np.array([[int(p.get('x')), int(p.get('y')), 0] for p in ps]) | ||
pts[-1, 2] = 1 | ||
|
||
pts = clear_middle(pts) | ||
if len(pts) == 0: | ||
continue | ||
|
||
seps = separate(pts) | ||
for pss in seps: | ||
if len(seps) > 1 and len(pss) == 1: | ||
continue | ||
pss[-1, 2] = 1 | ||
|
||
xmax, ymax = max(pss, key=lambda x: x[0])[0], max(pss, key=lambda x: x[1])[1] | ||
xmin, ymin = min(pss, key=lambda x: x[0])[0], min(pss, key=lambda x: x[1])[1] | ||
|
||
strokes += [pss] | ||
mid_points += [[(xmax + xmin) / 2., (ymax + ymin) / 2.]] | ||
distances = [-(abs(p1[0] - p2[0]) + abs(p1[1] - p2[1])) | ||
for p1, p2 in zip(mid_points, mid_points[1:])] | ||
splits = sorted(np.argsort(distances)[:len(texts) - 1] + 1) | ||
lines = [] | ||
for b, e in zip([0] + splits, splits + [len(strokes)]): | ||
lines += [[p for pts in strokes[b:e] for p in pts]] | ||
print('lines = {:4d}; texts = {:4d}'.format(len(lines), len(texts))) | ||
charset |= set(''.join(texts)) | ||
data += [(texts, lines)] | ||
print('data = {}; charset = ({}) {}'.format(len(data), len(charset), ''.join(sorted(charset)))) | ||
|
||
translation = {'<NULL>': 0} | ||
for c in ''.join(sorted(charset)): | ||
translation[c] = len(translation) | ||
|
||
def translate(txt): | ||
return list(map(lambda x: translation[x], txt)) | ||
|
||
dataset = [] | ||
labels = [] | ||
for texts, lines in data: | ||
for text, line in zip(texts, lines): | ||
line = np.array(line, dtype=np.float32) | ||
line[:, 0] = line[:, 0] - np.min(line[:, 0]) | ||
line[:, 1] = line[:, 1] - np.mean(line[:, 1]) | ||
|
||
dataset += [line] | ||
labels += [translate(text)] | ||
|
||
whole_data = np.concatenate(dataset, axis=0) | ||
std_y = np.std(whole_data[:, 1]) | ||
norm_data = [] | ||
for line in dataset: | ||
line[:, :2] /= std_y | ||
norm_data += [line] | ||
dataset = norm_data | ||
|
||
print('datset = {}; labels = {}'.format(len(dataset), len(labels))) | ||
|
||
try: | ||
os.makedirs('data') | ||
except FileExistsError: | ||
pass | ||
np.save(os.path.join('data', 'dataset'), np.array(dataset)) | ||
np.save(os.path.join('data', 'labels'), np.array(labels)) | ||
with open(os.path.join('data', 'translation.pkl'), 'wb') as file: | ||
pickle.dump(translation, file) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
matplotlib | ||
numpy | ||
pillow | ||
tensorflow | ||
img2pdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import flask | ||
from flask import request, jsonify | ||
import os | ||
from flask import send_file | ||
import handwrite | ||
import argparse | ||
import json | ||
|
||
app = flask.Flask(__name__) | ||
|
||
# Create some test data for our catalog in the form of a list of dictionaries. | ||
books = [ | ||
{'id': 0, | ||
'title': 'A Fire Upon the Deep', | ||
'author': 'Vernor Vinge', | ||
'first_sentence': 'The coldsleep itself was dreamless.', | ||
'year_published': '1992'}, | ||
{'id': 1, | ||
'title': 'The Ones Who Walk Away From Omelas', | ||
'author': 'Ursula K. Le Guin', | ||
'first_sentence': 'With a clamor of bells that set the swallows soaring, the Festival of Summer came to the city Omelas, bright-towered by the sea.', | ||
'published': '1973'}, | ||
{'id': 2, | ||
'title': 'Dhalgren', | ||
'author': 'Samuel R. Delany', | ||
'first_sentence': 'to wound the autumnal city.', | ||
'published': '1975'} | ||
] | ||
|
||
|
||
@app.route('/', methods=['GET']) | ||
def home(): | ||
return '''<h1>Distant Reading Archive</h1> | ||
<p>A prototype API for distant reading of science fiction novels.</p>''' | ||
|
||
|
||
@app.route('/api/v1/resources/books/all', methods=['GET']) | ||
def api_all(): | ||
return jsonify(books) | ||
|
||
|
||
@app.route('/api/v1/resources/books', methods=['GET']) | ||
def api_id(): | ||
# Check if an ID was provided as part of the URL. | ||
# If ID is provided, assign it to a variable. | ||
# If no ID is provided, display an error in the browser. | ||
if 'id' in request.args: | ||
id = int(request.args['id']) | ||
else: | ||
return "Error: No id field provided. Please specify an id." | ||
|
||
# Create an empty list for our results | ||
results = [] | ||
|
||
# Loop through the data and match results that fit the requested ID. | ||
# IDs are unique, but other fields might return many results | ||
for book in books: | ||
if book['id'] == id: | ||
results.append(book) | ||
|
||
# Use the jsonify function from Flask to convert our list of | ||
# Python dictionaries to the JSON format. | ||
return jsonify(results) | ||
|
||
|
||
@app.route('/api/v1/submitText', methods=['POST']) | ||
def setTextToConvert(): | ||
print(request.json) | ||
|
||
|
||
if "text" in request.json: | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--model', dest='model_path', type=str, default=os.path.join('pretrained', 'model-29'),help='(optional) DL model to use') | ||
parser.add_argument('--text', dest='text', type=str, help='Text to write',default=request.json["text"]) | ||
parser.add_argument('--text-file', dest='file', type=str, default=None, help='Path to the input text file') | ||
parser.add_argument('--style', dest='style', type=int, default=0, help='Style of handwriting (1 to 7)') | ||
parser.add_argument('--bias', dest='bias', type=float, default=0.9,help='Bias in handwriting. More bias is more unclear handwriting (0.00 to 1.00)') | ||
parser.add_argument('--force', dest='force', action='store_true', default=False) | ||
parser.add_argument('--color', dest='color_text', type=str, default='0,0,150',help='Color of handwriting in RGB format') | ||
parser.add_argument('--output', dest='output', type=str, default='./handwritten.pdf',help='Output PDF file path and name') | ||
args = parser.parse_args() | ||
res = handwrite.textToHandWritting(args) | ||
return jsonify({'sucess': True,'key':res}), 201 | ||
else: | ||
return jsonify({'sucess': False,'key':'Mandatory parameter text is missing'}), 201 | ||
|
||
|
||
@app.route('/downloadPdf',methods=['GET']) | ||
def downloadFile (): | ||
#For windows you need to use drive name [ex: F:/Example.pdf] | ||
path = "handwritten.pdf" | ||
return send_file(path, as_attachment=True) | ||
|
||
app.run(host="0.0.0.0") |
Oops, something went wrong.