diff --git a/alpha_main.py b/alpha_main.py index 27b57c2..e5cc75d 100644 --- a/alpha_main.py +++ b/alpha_main.py @@ -34,7 +34,8 @@ 'chatterbot.preprocessors.clean_whitespace', #cleans any wrong spacing with regex 'chatterbot.preprocessors.unescape_html', #Convert escaped html characters into unescaped html characters 'chatterbot.preprocessors.convert_to_ascii', - 'preprocessors.clean_words.clean_words' + 'preprocessors.clean_words.clean_words', + 'preprocessors,clean_puncuations,clean_puncuations' ] ) diff --git a/preprocessors/clean_punctuations.py b/preprocessors/clean_punctuations.py new file mode 100644 index 0000000..68b596e --- /dev/null +++ b/preprocessors/clean_punctuations.py @@ -0,0 +1,30 @@ +import py_compile +import re +puncuations=[ + "`", + "~", + "!", + "#", + "$", + "%", + "^", + "&", + "*", + "(", + ")", + "-", + "+", + "?", + ".", + "," + +] + +def clean_punctuations(statement): + print(statement) + for x in puncuations: + if x in statement.text: + statement.text=statement.text.replace(x,"") + else: + continue + return statement \ No newline at end of file diff --git a/preprocessors/clean_words.py b/preprocessors/clean_words.py index 50f1e1a..e784d8a 100644 --- a/preprocessors/clean_words.py +++ b/preprocessors/clean_words.py @@ -1,4 +1,3 @@ -import re #this file imports the regex and cleans any extra words that is preventing the processing of this program #this is the list of the words that will be removed during processing