initial commit version change

ayush016 · Oct 8, 2021 · 1b95800 · 1b95800
1 parent ff9c770
commit 1b95800
Show file tree

Hide file tree

Showing 15 changed files with 537 additions and 0 deletions.
diff --git a/pages/blank_page.jpg b/pages/blank_page.jpg
diff --git a/pages/page1.jpg b/pages/page1.jpg
diff --git a/pages/page1.png b/pages/page1.png
diff --git a/pages/page2.jpg b/pages/page2.jpg
diff --git a/pages/page2.png b/pages/page2.png
diff --git a/pages/page3.jpg b/pages/page3.jpg
diff --git a/pages/page3.png b/pages/page3.png
diff --git a/pages/page4.jpg b/pages/page4.jpg
diff --git a/pages/page4.png b/pages/page4.png
diff --git a/preprocess.py b/preprocess.py
@@ -0,0 +1,130 @@
+import os
+import html
+import pickle
+import numpy as np
+import xml.etree.cElementTree as ElementTree
+
+"""
+- all points:
+>> [[x1, y1, e1], ..., [xn, yn, en]]
+- indexed values
+>> [h1, ... hn]
+"""
+
+
+def distance(p1, p2, axis=None):
+    return np.sqrt(np.sum(np.square(p1 - p2), axis=axis))
+
+
+def clear_middle(pts):
+    to_remove = set()
+    for i in range(1, len(pts) - 1):
+        p1, p2, p3 = pts[i - 1: i + 2, :2]
+        dist = distance(p1, p2) + distance(p2, p3)
+        if dist > 1500:
+            to_remove.add(i)
+    npts = []
+    for i in range(len(pts)):
+        if i not in to_remove:
+            npts += [pts[i]]
+    return np.array(npts)
+
+
+def separate(pts):
+    seps = []
+    for i in range(0, len(pts) - 1):
+        if distance(pts[i], pts[i+1]) > 600:
+            seps += [i + 1]
+    return [pts[b:e] for b, e in zip([0] + seps, seps + [len(pts)])]
+
+
+def main():
+    data = []
+    charset = set()
+
+    file_no = 0
+    for root, dirs, files in os.walk('.'):
+        for file in files:
+            file_name, extension = os.path.splitext(file)
+            if extension == '.xml':
+                file_no += 1
+                print('[{:5d}] File {} -- '.format(file_no, os.path.join(root, file)), end='')
+                xml = ElementTree.parse(os.path.join(root, file)).getroot()
+                transcription = xml.findall('Transcription')
+                if not transcription:
+                    print('skipped')
+                    continue
+                texts = [html.unescape(s.get('text')) for s in transcription[0].findall('TextLine')]
+                points = [s.findall('Point') for s in xml.findall('StrokeSet')[0].findall('Stroke')]
+                strokes = []
+                mid_points = []
+                for ps in points:
+                    pts = np.array([[int(p.get('x')), int(p.get('y')), 0] for p in ps])
+                    pts[-1, 2] = 1
+
+                    pts = clear_middle(pts)
+                    if len(pts) == 0:
+                        continue
+
+                    seps = separate(pts)
+                    for pss in seps:
+                        if len(seps) > 1 and len(pss) == 1:
+                            continue
+                        pss[-1, 2] = 1
+
+                        xmax, ymax = max(pss, key=lambda x: x[0])[0], max(pss, key=lambda x: x[1])[1]
+                        xmin, ymin = min(pss, key=lambda x: x[0])[0], min(pss, key=lambda x: x[1])[1]
+
+                        strokes += [pss]
+                        mid_points += [[(xmax + xmin) / 2., (ymax + ymin) / 2.]]
+                distances = [-(abs(p1[0] - p2[0]) + abs(p1[1] - p2[1]))
+                             for p1, p2 in zip(mid_points, mid_points[1:])]
+                splits = sorted(np.argsort(distances)[:len(texts) - 1] + 1)
+                lines = []
+                for b, e in zip([0] + splits, splits + [len(strokes)]):
+                    lines += [[p for pts in strokes[b:e] for p in pts]]
+                print('lines = {:4d}; texts = {:4d}'.format(len(lines), len(texts)))
+                charset |= set(''.join(texts))
+                data += [(texts, lines)]
+    print('data = {}; charset = ({}) {}'.format(len(data), len(charset), ''.join(sorted(charset))))
+
+    translation = {'<NULL>': 0}
+    for c in ''.join(sorted(charset)):
+        translation[c] = len(translation)
+
+    def translate(txt):
+        return list(map(lambda x: translation[x], txt))
+
+    dataset = []
+    labels = []
+    for texts, lines in data:
+        for text, line in zip(texts, lines):
+            line = np.array(line, dtype=np.float32)
+            line[:, 0] = line[:, 0] - np.min(line[:, 0])
+            line[:, 1] = line[:, 1] - np.mean(line[:, 1])
+
+            dataset += [line]
+            labels += [translate(text)]
+
+    whole_data = np.concatenate(dataset, axis=0)
+    std_y = np.std(whole_data[:, 1])
+    norm_data = []
+    for line in dataset:
+        line[:, :2] /= std_y
+        norm_data += [line]
+    dataset = norm_data
+
+    print('datset = {}; labels = {}'.format(len(dataset), len(labels)))
+
+    try:
+        os.makedirs('data')
+    except FileExistsError:
+        pass
+    np.save(os.path.join('data', 'dataset'), np.array(dataset))
+    np.save(os.path.join('data', 'labels'), np.array(labels))
+    with open(os.path.join('data', 'translation.pkl'), 'wb') as file:
+        pickle.dump(translation, file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pretrained/model-29.meta b/pretrained/model-29.meta
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+matplotlib
+numpy
+pillow
+tensorflow
+img2pdf
diff --git a/textToHandWritting.py b/textToHandWritting.py
@@ -0,0 +1,95 @@
+import flask
+from flask import request, jsonify
+import os
+from flask import send_file
+import handwrite
+import argparse
+import json
+
+app = flask.Flask(__name__)
+
+# Create some test data for our catalog in the form of a list of dictionaries.
+books = [
+    {'id': 0,
+     'title': 'A Fire Upon the Deep',
+     'author': 'Vernor Vinge',
+     'first_sentence': 'The coldsleep itself was dreamless.',
+     'year_published': '1992'},
+    {'id': 1,
+     'title': 'The Ones Who Walk Away From Omelas',
+     'author': 'Ursula K. Le Guin',
+     'first_sentence': 'With a clamor of bells that set the swallows soaring, the Festival of Summer came to the city Omelas, bright-towered by the sea.',
+     'published': '1973'},
+    {'id': 2,
+     'title': 'Dhalgren',
+     'author': 'Samuel R. Delany',
+     'first_sentence': 'to wound the autumnal city.',
+     'published': '1975'}
+]
+
+
+@app.route('/', methods=['GET'])
+def home():
+    return '''<h1>Distant Reading Archive</h1>
+<p>A prototype API for distant reading of science fiction novels.</p>'''
+
+
+@app.route('/api/v1/resources/books/all', methods=['GET'])
+def api_all():
+    return jsonify(books)
+
+
+@app.route('/api/v1/resources/books', methods=['GET'])
+def api_id():
+    # Check if an ID was provided as part of the URL.
+    # If ID is provided, assign it to a variable.
+    # If no ID is provided, display an error in the browser.
+    if 'id' in request.args:
+        id = int(request.args['id'])
+    else:
+        return "Error: No id field provided. Please specify an id."
+
+    # Create an empty list for our results
+    results = []
+
+    # Loop through the data and match results that fit the requested ID.
+    # IDs are unique, but other fields might return many results
+    for book in books:
+        if book['id'] == id:
+            results.append(book)
+
+    # Use the jsonify function from Flask to convert our list of
+    # Python dictionaries to the JSON format.
+    return jsonify(results)
+
+
+@app.route('/api/v1/submitText', methods=['POST'])
+def setTextToConvert():
+    print(request.json)
+
+
+    if "text" in request.json:
+
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--model', dest='model_path', type=str, default=os.path.join('pretrained', 'model-29'),help='(optional) DL model to use')
+        parser.add_argument('--text', dest='text', type=str, help='Text to write',default=request.json["text"])
+        parser.add_argument('--text-file', dest='file', type=str, default=None, help='Path to the input text file')
+        parser.add_argument('--style', dest='style', type=int, default=0, help='Style of handwriting (1 to 7)')
+        parser.add_argument('--bias', dest='bias', type=float, default=0.9,help='Bias in handwriting. More bias is more unclear handwriting (0.00 to 1.00)')
+        parser.add_argument('--force', dest='force', action='store_true', default=False)
+        parser.add_argument('--color', dest='color_text', type=str, default='0,0,150',help='Color of handwriting in RGB format')
+        parser.add_argument('--output', dest='output', type=str, default='./handwritten.pdf',help='Output PDF file path and name')
+        args = parser.parse_args()
+        res = handwrite.textToHandWritting(args)
+        return jsonify({'sucess': True,'key':res}), 201
+    else:
+         return jsonify({'sucess': False,'key':'Mandatory parameter text is missing'}), 201
+
+
+@app.route('/downloadPdf',methods=['GET'])
+def downloadFile ():
+    #For windows you need to use drive name [ex: F:/Example.pdf]
+    path = "handwritten.pdf"
+    return send_file(path, as_attachment=True)
+
+app.run(host="0.0.0.0")