-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
157 lines (124 loc) · 3.77 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package main
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"regexp"
"sort"
"strings"
"github.com/labstack/gommon/log"
)
// SequenceCount represents a sequence and its associated count.
type SequenceCount struct {
Sequence string
Count int
}
const (
MaxSequences = 100
FilePath = "input.txt"
ErrFileScanning = "Error occurred while scanning the file"
ErrParsingRegexp = "Error occurred while parsing the regular expression"
ErrFileOpening = "Error occurred while opening the file"
)
// main is the entry point of the program.
func main() {
sequences, err := processSequenceCounts()
if err != nil {
log.Print(err)
}
sortSequencesByCount(sequences)
printHighestSequences(sequences)
}
// printHighestSequences prints the highest count sequences.
func printHighestSequences(sequences []SequenceCount) {
for i := 0; i < getMaxSequences(len(sequences)); i++ {
fmt.Println(sequences[i].Sequence, sequences[i].Count)
}
}
// processSequenceCounts processes the sequence counts from the input file or stdin.
func processSequenceCounts() ([]SequenceCount, error) {
sequenceCounts := make(map[string]int)
var sequences []SequenceCount
var err error
if isFileFromStdin() {
err = processFileFromStdin(sequenceCounts)
} else {
err = processFile(FilePath, sequenceCounts)
}
if err != nil {
return sequences, err
}
for sequence, count := range sequenceCounts {
sequences = append(sequences, SequenceCount{sequence, count})
}
return sequences, nil
}
// processFileFromStdin processes the sequence count from stdin.
func processFileFromStdin(sequenceCounts map[string]int) error {
fileName := os.Args[2]
return processFile(fileName, sequenceCounts)
}
// processFile processes the sequence count from a file.
func processFile(filePath string, sequenceCounts map[string]int) error {
file, err := os.Open(filePath)
if err != nil {
return errors.New(fmt.Sprintf("%s: %s\n", ErrFileOpening, err))
}
defer file.Close()
return processSequenceCountFromFile(file, sequenceCounts)
}
// processSequenceCountFromFile processes sequence count from a reader.
func processSequenceCountFromFile(file io.Reader, sequenceCounts map[string]int) error {
scanner := bufio.NewScanner(file)
sequenceQueue := make([]string, 0, 3)
for scanner.Scan() {
line := scanner.Text()
line = strings.ToLower(line)
regexCompile, err := regexp.Compile(`\\[nrt]|[^a-zA-Z\s]`)
if err != nil {
return errors.New(fmt.Sprintf("%s: %s\n", ErrParsingRegexp, err))
}
lineWithoutSpecialCharacters := regexCompile.ReplaceAllString(line, " ")
words := strings.Fields(lineWithoutSpecialCharacters)
for _, word := range words {
sequenceQueue = append(sequenceQueue, word)
if len(sequenceQueue) > 3 {
sequenceQueue = sequenceQueue[1:]
}
if len(sequenceQueue) == 3 {
sequence := strings.Join(sequenceQueue, " ")
sequenceCounts[sequence]++
}
}
}
if err := scanner.Err(); err != nil {
return errors.New(fmt.Sprintf("%s: %s\n", ErrFileScanning, err))
}
return nil
}
// getMaxSequences returns the maximum number of sequences to consider.
func getMaxSequences(sequencesCount int) int {
if sequencesCount >= MaxSequences {
return MaxSequences
} else if sequencesCount == 0 {
return -1
}
return sequencesCount
}
// isFileFromStdin checks if the program is receiving input from stdin.
func isFileFromStdin() bool {
if len(os.Args) > 1 {
return true
}
return false
}
// ths is to mock and test conflict issue. these changes are pushed to feature
// sortSequencesByCount gets all the sequences sorted by count in descending order
// ths is to mock and test conflict issue. these changes are directly pushed to master
func sortSequencesByCount(sequences []SequenceCount) {
sort.Slice(sequences, func(i, j int) bool {
return sequences[i].Count > sequences[j].Count
})
}