Skip to content

Commit

Permalink
added preprocesors
Browse files Browse the repository at this point in the history
  • Loading branch information
privacyrespected committed Jan 19, 2022
1 parent d54a7a7 commit 3ed65a4
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 2 deletions.
3 changes: 2 additions & 1 deletion alpha_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
'chatterbot.preprocessors.clean_whitespace', #cleans any wrong spacing with regex
'chatterbot.preprocessors.unescape_html', #Convert escaped html characters into unescaped html characters
'chatterbot.preprocessors.convert_to_ascii',
'preprocessors.clean_words.clean_words'
'preprocessors.clean_words.clean_words',
'preprocessors,clean_puncuations,clean_puncuations'
]
)

Expand Down
30 changes: 30 additions & 0 deletions preprocessors/clean_punctuations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import py_compile
import re
puncuations=[
"`",
"~",
"!",
"#",
"$",
"%",
"^",
"&",
"*",
"(",
")",
"-",
"+",
"?",
".",
","

]

def clean_punctuations(statement):
print(statement)
for x in puncuations:
if x in statement.text:
statement.text=statement.text.replace(x,"")
else:
continue
return statement
1 change: 0 additions & 1 deletion preprocessors/clean_words.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import re
#this file imports the regex and cleans any extra words that is preventing the processing of this program

#this is the list of the words that will be removed during processing
Expand Down

0 comments on commit 3ed65a4

Please sign in to comment.