Skip to content

Commit

Permalink
initial commit version change
Browse files Browse the repository at this point in the history
  • Loading branch information
ayush016 authored Oct 8, 2021
1 parent ff9c770 commit 1b95800
Show file tree
Hide file tree
Showing 15 changed files with 537 additions and 0 deletions.
Binary file added pages/blank_page.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page3.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page4.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pages/page4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
130 changes: 130 additions & 0 deletions preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os
import html
import pickle
import numpy as np
import xml.etree.cElementTree as ElementTree

"""
- all points:
>> [[x1, y1, e1], ..., [xn, yn, en]]
- indexed values
>> [h1, ... hn]
"""


def distance(p1, p2, axis=None):
return np.sqrt(np.sum(np.square(p1 - p2), axis=axis))


def clear_middle(pts):
to_remove = set()
for i in range(1, len(pts) - 1):
p1, p2, p3 = pts[i - 1: i + 2, :2]
dist = distance(p1, p2) + distance(p2, p3)
if dist > 1500:
to_remove.add(i)
npts = []
for i in range(len(pts)):
if i not in to_remove:
npts += [pts[i]]
return np.array(npts)


def separate(pts):
seps = []
for i in range(0, len(pts) - 1):
if distance(pts[i], pts[i+1]) > 600:
seps += [i + 1]
return [pts[b:e] for b, e in zip([0] + seps, seps + [len(pts)])]


def main():
data = []
charset = set()

file_no = 0
for root, dirs, files in os.walk('.'):
for file in files:
file_name, extension = os.path.splitext(file)
if extension == '.xml':
file_no += 1
print('[{:5d}] File {} -- '.format(file_no, os.path.join(root, file)), end='')
xml = ElementTree.parse(os.path.join(root, file)).getroot()
transcription = xml.findall('Transcription')
if not transcription:
print('skipped')
continue
texts = [html.unescape(s.get('text')) for s in transcription[0].findall('TextLine')]
points = [s.findall('Point') for s in xml.findall('StrokeSet')[0].findall('Stroke')]
strokes = []
mid_points = []
for ps in points:
pts = np.array([[int(p.get('x')), int(p.get('y')), 0] for p in ps])
pts[-1, 2] = 1

pts = clear_middle(pts)
if len(pts) == 0:
continue

seps = separate(pts)
for pss in seps:
if len(seps) > 1 and len(pss) == 1:
continue
pss[-1, 2] = 1

xmax, ymax = max(pss, key=lambda x: x[0])[0], max(pss, key=lambda x: x[1])[1]
xmin, ymin = min(pss, key=lambda x: x[0])[0], min(pss, key=lambda x: x[1])[1]

strokes += [pss]
mid_points += [[(xmax + xmin) / 2., (ymax + ymin) / 2.]]
distances = [-(abs(p1[0] - p2[0]) + abs(p1[1] - p2[1]))
for p1, p2 in zip(mid_points, mid_points[1:])]
splits = sorted(np.argsort(distances)[:len(texts) - 1] + 1)
lines = []
for b, e in zip([0] + splits, splits + [len(strokes)]):
lines += [[p for pts in strokes[b:e] for p in pts]]
print('lines = {:4d}; texts = {:4d}'.format(len(lines), len(texts)))
charset |= set(''.join(texts))
data += [(texts, lines)]
print('data = {}; charset = ({}) {}'.format(len(data), len(charset), ''.join(sorted(charset))))

translation = {'<NULL>': 0}
for c in ''.join(sorted(charset)):
translation[c] = len(translation)

def translate(txt):
return list(map(lambda x: translation[x], txt))

dataset = []
labels = []
for texts, lines in data:
for text, line in zip(texts, lines):
line = np.array(line, dtype=np.float32)
line[:, 0] = line[:, 0] - np.min(line[:, 0])
line[:, 1] = line[:, 1] - np.mean(line[:, 1])

dataset += [line]
labels += [translate(text)]

whole_data = np.concatenate(dataset, axis=0)
std_y = np.std(whole_data[:, 1])
norm_data = []
for line in dataset:
line[:, :2] /= std_y
norm_data += [line]
dataset = norm_data

print('datset = {}; labels = {}'.format(len(dataset), len(labels)))

try:
os.makedirs('data')
except FileExistsError:
pass
np.save(os.path.join('data', 'dataset'), np.array(dataset))
np.save(os.path.join('data', 'labels'), np.array(labels))
with open(os.path.join('data', 'translation.pkl'), 'wb') as file:
pickle.dump(translation, file)


if __name__ == '__main__':
main()
Binary file added pretrained/model-29.meta
Binary file not shown.
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
matplotlib
numpy
pillow
tensorflow
img2pdf
95 changes: 95 additions & 0 deletions textToHandWritting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import flask
from flask import request, jsonify
import os
from flask import send_file
import handwrite
import argparse
import json

app = flask.Flask(__name__)

# Create some test data for our catalog in the form of a list of dictionaries.
books = [
{'id': 0,
'title': 'A Fire Upon the Deep',
'author': 'Vernor Vinge',
'first_sentence': 'The coldsleep itself was dreamless.',
'year_published': '1992'},
{'id': 1,
'title': 'The Ones Who Walk Away From Omelas',
'author': 'Ursula K. Le Guin',
'first_sentence': 'With a clamor of bells that set the swallows soaring, the Festival of Summer came to the city Omelas, bright-towered by the sea.',
'published': '1973'},
{'id': 2,
'title': 'Dhalgren',
'author': 'Samuel R. Delany',
'first_sentence': 'to wound the autumnal city.',
'published': '1975'}
]


@app.route('/', methods=['GET'])
def home():
return '''<h1>Distant Reading Archive</h1>
<p>A prototype API for distant reading of science fiction novels.</p>'''


@app.route('/api/v1/resources/books/all', methods=['GET'])
def api_all():
return jsonify(books)


@app.route('/api/v1/resources/books', methods=['GET'])
def api_id():
# Check if an ID was provided as part of the URL.
# If ID is provided, assign it to a variable.
# If no ID is provided, display an error in the browser.
if 'id' in request.args:
id = int(request.args['id'])
else:
return "Error: No id field provided. Please specify an id."

# Create an empty list for our results
results = []

# Loop through the data and match results that fit the requested ID.
# IDs are unique, but other fields might return many results
for book in books:
if book['id'] == id:
results.append(book)

# Use the jsonify function from Flask to convert our list of
# Python dictionaries to the JSON format.
return jsonify(results)


@app.route('/api/v1/submitText', methods=['POST'])
def setTextToConvert():
print(request.json)


if "text" in request.json:

parser = argparse.ArgumentParser()
parser.add_argument('--model', dest='model_path', type=str, default=os.path.join('pretrained', 'model-29'),help='(optional) DL model to use')
parser.add_argument('--text', dest='text', type=str, help='Text to write',default=request.json["text"])
parser.add_argument('--text-file', dest='file', type=str, default=None, help='Path to the input text file')
parser.add_argument('--style', dest='style', type=int, default=0, help='Style of handwriting (1 to 7)')
parser.add_argument('--bias', dest='bias', type=float, default=0.9,help='Bias in handwriting. More bias is more unclear handwriting (0.00 to 1.00)')
parser.add_argument('--force', dest='force', action='store_true', default=False)
parser.add_argument('--color', dest='color_text', type=str, default='0,0,150',help='Color of handwriting in RGB format')
parser.add_argument('--output', dest='output', type=str, default='./handwritten.pdf',help='Output PDF file path and name')
args = parser.parse_args()
res = handwrite.textToHandWritting(args)
return jsonify({'sucess': True,'key':res}), 201
else:
return jsonify({'sucess': False,'key':'Mandatory parameter text is missing'}), 201


@app.route('/downloadPdf',methods=['GET'])
def downloadFile ():
#For windows you need to use drive name [ex: F:/Example.pdf]
path = "handwritten.pdf"
return send_file(path, as_attachment=True)

app.run(host="0.0.0.0")
Loading

0 comments on commit 1b95800

Please sign in to comment.