From 028c90181025327910bf63bf92331eeb9f7abe17 Mon Sep 17 00:00:00 2001
From: rn grapenthin <rg@nmt.edu>
Date: Thu, 4 Jun 2015 15:05:17 -0600
Subject: [PATCH] first commit

---
 README.md     |  1 +
 check_repeats | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 README.md
 create mode 100755 check_repeats
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f4e6867
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# util
diff --git a/check_repeats b/check_repeats
new file mode 100755
index 0000000..8271bee
--- /dev/null
+++ b/check_repeats
@@ -0,0 +1,38 @@
+#!/bin/tcsh
+# (use sh2doc.pl to auto generate HTML doc)
+#
+##BRIEF
+# check textfile for repeated words using grep 
+#
+##AUTHOR
+# Ronni Grapenthin
+#
+##DATE
+# version 2011-01-27
+#
+##DETAILS
+# check textfile for repeated words using grep 
+# (regexp partly blatantly stolen off the web: http://www.codeproject.com/kb/dotnet/RegexTutorial.aspx?fid=136362&df=90&mpp=25&noise=3&sort=Position&view=Quick&fr=226)
+# gives line number and marks repeated words, doesn't care about cases
+#
+# USAGE:
+#
+# usage: check_repeats <text-file>
+#
+##CHANGELOG
+# 2010-06-13, ronni: First version.
+# 2011-01-27, ronni: added checking over linebreaks, apparently this should be 
+#                    possible and easier using sed; couldn't get it to work though
+#		     I iterate over the file again, that's inefficient, but the output
+#                    might be clearer.
+
+echo "Checking for repeated words in a line of ${1}:"
+grep -Ein --color  "\b(\w+)\b\s*\1\b" $1
+echo " "
+
+echo "Checking for repeated words over linebreaks of ${1}:"
+awk 'BEGIN{getline l;} {combined=l " " $0; printf(" %.5d - %.5d: %s\n",FNR,FNR+1, combined); l=$0;}' $1 | grep -Ei --color  "\b(\w+)\b\s*\1\b"
+echo " "
+echo "Done."
+
+#thank you very much!