Skip to content

Commit

Permalink
big update: preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
Dcosthephalump committed Apr 2, 2024
1 parent d95641d commit b2203be
Show file tree
Hide file tree
Showing 13 changed files with 2,243 additions and 62 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
# glyptodon

Glyptodon is a manuscript annotation tool that uses [Dash](https://dash.plotly.com/) and [Plotly](https://plotly.com/python/).
Glyptodon is a manuscript annotation tool that uses [Dash](https://dash.plotly.com/), [Plotly](https://plotly.com/python/), [OpenCV](https://opencv.org/), and [NumPy](https://numpy.org/). The purpose of this tool is to create and utilize physical document transcriptions for the digital humanities.

At present, it can accept manuscript images for most image formats and manuscript transcriptions in xml.
At present, it can accept manuscript images for most image formats. It cannot use the transcriptions uploaded in xml yet, but it should be able to in future updates.

It cannot use the transcriptions uploaded in xml yet, but it should be able to in future updates.
There is an algorithm for line level segmentation and preprocessing that will be integrated into the Dash GUI in later updates.

The purpose of this tool is to create and utilize physical document transcriptions for the digital humanities.
The manuscripts/datasets that this algorithm was made in mind for are:
- [HPGTR](https://github.com/vivianpl/HPGTR/tree/main)
- [EPARCHOS](https://zenodo.org/records/4095301)
- Stavronikita Monastery Greek Handwritten Document Collection:
- [No. 53](https://zenodo.org/records/5595669)
- [No. 79](https://zenodo.org/records/5578136)
- [No. 114](https://zenodo.org/records/5578251)
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- dash-bootstrap-components # For website styling
- pandas
- numpy
- matplotlib
- pip
- pip:
# App library
Expand Down
25 changes: 13 additions & 12 deletions glyptodon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@
import re
import os

# %% ../nbs/07_app.ipynb 8
# %% ../nbs/07_app.ipynb 6
selectionKey, selectionLayout = createSelectionLayout()
print(selectionKey)

centuries, informationLayout = createInformationLayout()

annotationLayout = createAnnotationLayout()

exportLayout = createExportLayout()

# %% ../nbs/07_app.ipynb 10
# %% ../nbs/07_app.ipynb 8
app = Dash(
external_stylesheets=[dbc.themes.BOOTSTRAP]
)
Expand Down Expand Up @@ -77,7 +78,7 @@
]
)

# %% ../nbs/07_app.ipynb 14
# %% ../nbs/07_app.ipynb 10
newManuscript = False
selectedManuscript = selectionKey[
"Stavronikita Monastery Greek handwritten document Collection no.53"
Expand Down Expand Up @@ -128,7 +129,7 @@ def selectManuscript(work):
]
return work, author, language, country, city, institution, centuriesValue, {"display": "none"}

# %% ../nbs/07_app.ipynb 17
# %% ../nbs/07_app.ipynb 12
@callback(
Output("tabs-object","value", allow_duplicate=True),
Input("finalize-selection", "n_clicks"),
Expand All @@ -137,7 +138,7 @@ def selectManuscript(work):
def finalizeSelectionCallback(clicks):
return "information"

# %% ../nbs/07_app.ipynb 19
# %% ../nbs/07_app.ipynb 14
@callback(
Output("tabs-object", "value", allow_duplicate=True),
Output("manuscript-select", "value"),
Expand Down Expand Up @@ -246,7 +247,7 @@ def saveNContinuteCallback(

return "annotation", manSelectVal, manSelectOpts, dropdownOptions, dropdownOptions[0]["value"]

# %% ../nbs/07_app.ipynb 22
# %% ../nbs/07_app.ipynb 16
@callback(
Output("annotation-figure", "figure"),
Input("page-selector", "value"),
Expand Down Expand Up @@ -301,7 +302,7 @@ def pageSelectorCallback(path):

return fig

# %% ../nbs/07_app.ipynb 24
# %% ../nbs/07_app.ipynb 18
@callback(
Output("dummy-output","children", allow_duplicate=True),
Input("save-shapes", "n_clicks"),
Expand Down Expand Up @@ -367,7 +368,7 @@ def saveShapesCallback(clicks, shapes, path):
dummy = ["1","2","3"]
return dummy

# %% ../nbs/07_app.ipynb 26
# %% ../nbs/07_app.ipynb 20
@callback(
Output("annotation-text-area","value"),
Input("annotation-figure", "relayoutData"),
Expand Down Expand Up @@ -402,7 +403,7 @@ def lineNumberCallback(shapes, currentText):

return newValue

# %% ../nbs/07_app.ipynb 28
# %% ../nbs/07_app.ipynb 22
@callback(
Output("dummy-output", "children", allow_duplicate=True),
Input("save-annotation", "n_clicks"),
Expand Down Expand Up @@ -484,7 +485,7 @@ def saveAnnotationCallback(clicks, shapes, path, currentText):
dummy = ["1", "2", "3"]
return dummy

# %% ../nbs/07_app.ipynb 30
# %% ../nbs/07_app.ipynb 24
@callback(
Output("tabs-object", "value", allow_duplicate=True),
Input("next-tab", "n_clicks"),
Expand All @@ -493,7 +494,7 @@ def saveAnnotationCallback(clicks, shapes, path, currentText):
def nextTabCallback(clicks):
return "export"

# %% ../nbs/07_app.ipynb 32
# %% ../nbs/07_app.ipynb 26
@callback(
Output("export-download", "data"),
Input("export-button", "n_clicks"),
Expand All @@ -506,6 +507,6 @@ def exportManuscriptCallback(clicks, name, options):
path = zipManuscript(options, selectedManuscript[0], name)
return dcc.send_file(path)

# %% ../nbs/07_app.ipynb 34
# %% ../nbs/07_app.ipynb 28
if __name__ == "__main__":
app.run(debug=True)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Work:Stavronikita Monastery Greek handwritten document Collection no.53
Author:
Language:Greek
Country:Greece
City:Mount Athos
Institution:Stavronikita Monastery
Work:None
Author:None
Language:None
Country:None
City:None
Institution:None
Centuries:14th Century
38 changes: 20 additions & 18 deletions nbs/01_manuscriptFiles.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 1,
"metadata": {
"tags": []
},
Expand Down Expand Up @@ -117,22 +117,20 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 24,
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'dictToList' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[9], line 12\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Testing on a real world example\u001b[39;00m\n\u001b[1;32m 2\u001b[0m Stav53 \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mWork\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mStavronikita Monastery Greek handwritten document Collection no.53\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAuthor\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCenturies\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m14th Century\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 10\u001b[0m }\n\u001b[0;32m---> 12\u001b[0m \u001b[43mcreateManuscriptDirectory\u001b[49m\u001b[43m(\u001b[49m\u001b[43mStav53\u001b[49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[2], line 30\u001b[0m, in \u001b[0;36mcreateManuscriptDirectory\u001b[0;34m(metadata)\u001b[0m\n\u001b[1;32m 27\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(title \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.cfg\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 29\u001b[0m \u001b[38;5;66;03m# Writes relevant metadata to file\u001b[39;00m\n\u001b[0;32m---> 30\u001b[0m printable \u001b[38;5;241m=\u001b[39m \u001b[43mdictToList\u001b[49m(metadata)\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m data \u001b[38;5;129;01min\u001b[39;00m printable:\n\u001b[1;32m 32\u001b[0m f\u001b[38;5;241m.\u001b[39mwrite(data \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'dictToList' is not defined"
]
"data": {
"text/plain": [
"'/home/dc/glyptodon/glyptodon/manuscripts/stvrnktmnstrygrkcllctnn53'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand Down Expand Up @@ -161,7 +159,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 12,
"metadata": {
"tags": []
},
Expand Down Expand Up @@ -749,8 +747,10 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"execution_count": 41,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#| export\n",
Expand All @@ -770,13 +770,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"execution_count": 43,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#| hide\n",
"\n",
"directory = '/home/dc/glyptodon/manuscripts/stvrnktmnstrygrkcllctnn.53'\n",
"directory = '/home/dc/glyptodon/glyptodon/manuscripts/stvrnktmnstrygrkcllctnn.53'\n",
"information = {'Work': 'Stavronikita Monastery Greek handwritten document Collection no.53',\n",
" 'Author': 'Anonymous',\n",
" 'Language': 'Greek',\n",
Expand Down
6 changes: 3 additions & 3 deletions nbs/02_selection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -48,7 +48,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {
"tags": []
},
Expand All @@ -75,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down
2 changes: 1 addition & 1 deletion nbs/05_classes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.11.3"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit b2203be

Please sign in to comment.