From 028c90181025327910bf63bf92331eeb9f7abe17 Mon Sep 17 00:00:00 2001 From: rn grapenthin Date: Thu, 4 Jun 2015 15:05:17 -0600 Subject: [PATCH] first commit --- README.md | 1 + check_repeats | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 README.md create mode 100755 check_repeats diff --git a/README.md b/README.md new file mode 100644 index 0000000..f4e6867 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# util diff --git a/check_repeats b/check_repeats new file mode 100755 index 0000000..8271bee --- /dev/null +++ b/check_repeats @@ -0,0 +1,38 @@ +#!/bin/tcsh +# (use sh2doc.pl to auto generate HTML doc) +# +##BRIEF +# check textfile for repeated words using grep +# +##AUTHOR +# Ronni Grapenthin +# +##DATE +# version 2011-01-27 +# +##DETAILS +# check textfile for repeated words using grep +# (regexp partly blatantly stolen off the web: http://www.codeproject.com/kb/dotnet/RegexTutorial.aspx?fid=136362&df=90&mpp=25&noise=3&sort=Position&view=Quick&fr=226) +# gives line number and marks repeated words, doesn't care about cases +# +# USAGE: +# +# usage: check_repeats +# +##CHANGELOG +# 2010-06-13, ronni: First version. +# 2011-01-27, ronni: added checking over linebreaks, apparently this should be +# possible and easier using sed; couldn't get it to work though +# I iterate over the file again, that's inefficient, but the output +# might be clearer. + +echo "Checking for repeated words in a line of ${1}:" +grep -Ein --color "\b(\w+)\b\s*\1\b" $1 +echo " " + +echo "Checking for repeated words over linebreaks of ${1}:" +awk 'BEGIN{getline l;} {combined=l " " $0; printf(" %.5d - %.5d: %s\n",FNR,FNR+1, combined); l=$0;}' $1 | grep -Ei --color "\b(\w+)\b\s*\1\b" +echo " " +echo "Done." + +#thank you very much!