Skip to content

Commit

Permalink
Tokenize
Browse files Browse the repository at this point in the history
smut-ny committed May 27, 2018
0 parents commit af0bcc4
Showing 8 changed files with 90,412 additions and 0 deletions.
Binary file added __pycache__/tokenize.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/verticalization.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/vocabulary.cpython-36.pyc
Binary file not shown.
29 changes: 29 additions & 0 deletions anottate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
''' ANOTACE nefunguje protoze musim mit zvlast regex pro vertikalizovany text a pro nevertikalizovany
def an_word():
global op
op = re.sub(r"(\w*)", r"<w>\1</w>", op)
def an_s():
global op
op = re.sub("(\b((?!=|\.).)+(.)\b[.!?]", r"<s>\1</s>", op)
def anottate():Y
a = input("Annotation on: Word level (W), Sentence level (S) or both (B)? If changed your mind write anything else.")
if a == "W":
print("Word level annotation")
an_word()
output()
if a == "S":
print("Sentence level annotation")
an_s()
output()
if a == "B":
print("Word and sentence level annotation")
an_word()
an_s()
output()
else:
print("No annotation")
output()
'''
8,197 changes: 8,197 additions & 0 deletions input/text.txt

Large diffs are not rendered by default.

Loading

0 comments on commit af0bcc4

Please sign in to comment.