-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrequencyParser.cpp
72 lines (63 loc) · 2.88 KB
/
frequencyParser.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//
// Created by Developer on 25.01.2021.
//
#include "frequencyParser.h"
#include <algorithm>
#include <iostream>
#include <chrono>
namespace freq {
std::unordered_map<std::string, size_t> parse(std::istream &stream) {
auto start = std::chrono::high_resolution_clock::now();
std::string const symbols{"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"};
std::unordered_map<std::string, size_t> wordsCount;
std::string word;
if (!stream.good()) {
std::cout << "error occurred in input stream" << std::endl;
return wordsCount;
}
std::string str(std::istreambuf_iterator<char>(stream), {});
auto end = std::chrono::high_resolution_clock::now();
std::cout << "stream to string: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " milliseconds" << std::endl;
start = std::chrono::high_resolution_clock::now();
size_t beg, pos = 0;
while ((beg = str.find_first_of(symbols, pos)) != std::string::npos) {
pos = str.find_first_not_of(symbols, beg + 1);
word = str.substr(beg, pos - beg);
std::transform(word.begin(), word.end(), word.begin(), static_cast<int(*)(int)>(std::tolower));
auto mapIt = wordsCount.find(word);
size_t count = 1;
if (mapIt != wordsCount.end())
count = ++mapIt->second;
wordsCount.insert_or_assign(word, count);
word.clear();
}
end = std::chrono::high_resolution_clock::now();
std::cout << "string to words: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " milliseconds" << std::endl;
return wordsCount;
}
struct comparator {
bool operator()(const std::pair<std::string, size_t> &l, const std::pair<std::string, size_t> &r) const {
if (l.second == r.second)
return l.first < r.first;
return l.second > r.second;
}
};
void saveSortedDictionary(const std::unordered_map<std::string, size_t> &map, std::ostream &stream) {
auto start = std::chrono::high_resolution_clock::now();
if (!stream.good()) {
std::cout << "error occurred in output stream" << std::endl;
return;
}
if (map.empty()) {
std::cout << "the dictionary is empty, nothing to write" << std::endl;
return;
}
std::vector<std::pair<std::string, size_t>> v(map.begin(), map.end());
std::sort(v.begin(), v.end(), comparator());
for (const auto &i : v) {
stream << i.second << " " << i.first << std::endl;
}
auto end = std::chrono::high_resolution_clock::now();
std::cout << "words to stream: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " milliseconds" << std::endl;
}
}