-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathreduce.py
executable file
·93 lines (77 loc) · 2.95 KB
/
reduce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/python
# Reduce removes all the duplicate entries in a raw csv file and outputs a csv
# file of the strongest detected signal of each access point.
from argparse import ArgumentParser
import sys
import re
# Returns true if string is a valid UTF-8 string
def isValidUtf8(string):
valid_utf8 = True
try:
string.decode('utf-8')
except UnicodeDecodeError:
valid_utf8 = False
return valid_utf8
parser = ArgumentParser()
parser.add_argument("-i", "--input", dest="inputFile",
help="Input file to process", metavar="FILE")
parser.add_argument("-o", "--output", dest="outputFile",
help="output file from reduce", metavar="FILE")
args = parser.parse_args()
if args.inputFile == None:
print "Please provide an input file with -i flag."
sys.exit()
if args.outputFile == None:
print "Please provide an output file with -o flag."
sys.exit()
APs = []
header = ""
with open(args.inputFile, "r") as f:
# Iterate through each line of the file and put it in if it's the strongest
# Skip the first two lines because they're headers
iterLines = iter(f)
header = iterLines.next()
header += iterLines.next()
for line in iterLines:
entry = []
for x in line.split(','):
entry.append(x.rstrip())
# Search for entries MAC in AP
foundMatch = False
i = 0;
# Before conducting the search, make sure first entry is strictly in
# MAC address format and the number of columns in entry is 11
if len(entry) == 11 and re.match("[0-9a-f]{2}([-:]?)[0-9a-f]{2}(\\1[0-9a-f]{2}){4}$", entry[0].lower()):
# Go through all the entries in AP
for apInst in APs:
# If the MAC address matches
if len(apInst) > 0 and apInst[0] == entry[0]:
# And if the SSID matches
if apInst[1] == entry[1]:
foundMatch = True
# Compare their intensity
if len(entry) > 5 and len(apInst) > 5 and isinstance(entry[5], int) and isinstance(apInst[5], int) and int(entry[5]) > int(apInst[5]):
# For entries that have higher intensities replace the
# existing entry with it
APs[i] = entry
i += 1
# If we went through the whole loop and found no matches, append entry
# to APs
if foundMatch == False:
APs.append(entry)
f.closed
# Write APs to a file
f = open(args.outputFile, 'w')
f.write(header.rstrip())
f.write('\n')
for ap in APs:
if len(ap) > 1 and isValidUtf8(ap[0]) and isValidUtf8(ap[1]):
outputStr = ""
for elm in ap:
outputStr += elm
outputStr += ','
outputStr = outputStr.rstrip()
outputStr = outputStr.rstrip(',')
f.write(outputStr)
f.write('\n')
f.close()