From fcc9010a35bb4f88b3f5d5cd8522ecbb92f7f334 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Mon, 24 Aug 2020 14:42:25 +0530 Subject: [PATCH 01/12] Create convert.cpp --- dataset_utils/convert.cpp | 93 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 dataset_utils/convert.cpp diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp new file mode 100644 index 00000000..5c921494 --- /dev/null +++ b/dataset_utils/convert.cpp @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace boost::property_tree; +using namespace boost; + +class Convert +{ + void csvxmlHelper(std::string path, std::string to) + { + //static int ctr; + static std::unordered_map fileNames; + std::vector tags; + std::vector rows; + std::ifstream file(path); + std::string line; + + auto tokenize = [&](std::string line) + { + std::vector col_names; + + tokenizer > tk(line, escaped_list_separator()); + for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) + col_names.push_back(*i); + + return col_names; + }; + + auto create_XML = [&](std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; + + for (auto i : boost::combine(tags, rows)) + { + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } + + write_xml(std::to_string(ctr) + ".xml", XMLobjectL, std::locale(), + xml_writer_make_settings(' ', 1u)); + + ctr++; + }; + + auto create_JSON = [&](std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; + + for (auto i : boost::combine(tags, rows)) + { + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } + + write_json(std::to_string(ctr) + ".json", XMLobjectL); + ctr++; + }; + + std::getline(file, line); + tags = tokenize(line); + + if (to == "xml") + while (std::getline(file, line)) + create_XML(tags, tokenize(line)); + + else if (to == "json") + while (std::getline(file, line)) + create_JSON(tags, tokenize(line)); + + } + +public: + void convert(std::string path, std::string to) + { + csvxmlHelper(path, to); + } +}; From 41a1cbd4358d1b8831161c46e1e1550c7684035b Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Mon, 24 Aug 2020 14:44:31 +0530 Subject: [PATCH 02/12] Update convert.cpp --- dataset_utils/convert.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp index 5c921494..d63fac6e 100644 --- a/dataset_utils/convert.cpp +++ b/dataset_utils/convert.cpp @@ -91,3 +91,12 @@ class Convert csvxmlHelper(path, to); } }; + +// How To use +/*int main() { + Convert foo; + + foo.convert("path_to_csv_file.csv", "xml"); + foo.convert("path_to_csv_file.csv", "json"); + +}*/ From 415c58e592ca73af0fecdf37e051568aa0b2a8a9 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Mon, 24 Aug 2020 14:45:55 +0530 Subject: [PATCH 03/12] Create Readme.md --- dataset_utils/Readme.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 dataset_utils/Readme.md diff --git a/dataset_utils/Readme.md b/dataset_utils/Readme.md new file mode 100644 index 00000000..14c793ad --- /dev/null +++ b/dataset_utils/Readme.md @@ -0,0 +1,8 @@ +# Dataset Utils + +This directory contains utility functions related to Datasets. + +Current Implemented features + +* Convert CSV files to JSON +* Convert CSV files to XML From 291eb42ab106a8b8cef155cc77257e0ab3724597 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Mon, 24 Aug 2020 14:47:07 +0530 Subject: [PATCH 04/12] Update Readme.md --- dataset_utils/Readme.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dataset_utils/Readme.md b/dataset_utils/Readme.md index 14c793ad..ad5c9261 100644 --- a/dataset_utils/Readme.md +++ b/dataset_utils/Readme.md @@ -4,5 +4,6 @@ This directory contains utility functions related to Datasets. Current Implemented features -* Convert CSV files to JSON -* Convert CSV files to XML +1. [Issue](https://github.com/mlpack/models/issues/22) + * Convert CSV files to JSON + * Convert CSV files to XML From 7a82b379682e34dfad9f9ad0d8cecacefab8e738 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Mon, 24 Aug 2020 14:47:46 +0530 Subject: [PATCH 05/12] Update Readme.md --- dataset_utils/Readme.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dataset_utils/Readme.md b/dataset_utils/Readme.md index ad5c9261..41396633 100644 --- a/dataset_utils/Readme.md +++ b/dataset_utils/Readme.md @@ -4,6 +4,5 @@ This directory contains utility functions related to Datasets. Current Implemented features -1. [Issue](https://github.com/mlpack/models/issues/22) - * Convert CSV files to JSON - * Convert CSV files to XML + * Convert CSV files to JSON([Issue](https://github.com/mlpack/models/issues/22)) + * Convert CSV files to XML([Issue](https://github.com/mlpack/models/issues/22)) From a84b331d473b5e33014274204676908e101bf894 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 26 Aug 2020 16:50:23 +0530 Subject: [PATCH 06/12] Update convert.cpp --- dataset_utils/convert.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp index d63fac6e..3cc720d0 100644 --- a/dataset_utils/convert.cpp +++ b/dataset_utils/convert.cpp @@ -76,13 +76,17 @@ class Convert tags = tokenize(line); if (to == "xml") + { while (std::getline(file, line)) create_XML(tags, tokenize(line)); + } else if (to == "json") + { while (std::getline(file, line)) create_JSON(tags, tokenize(line)); + } public: From f5e75132042f363feeef8a9911da21e57f9309a6 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 26 Aug 2020 16:52:02 +0530 Subject: [PATCH 07/12] Update convert.cpp --- dataset_utils/convert.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp index 3cc720d0..9901a824 100644 --- a/dataset_utils/convert.cpp +++ b/dataset_utils/convert.cpp @@ -86,7 +86,7 @@ class Convert while (std::getline(file, line)) create_JSON(tags, tokenize(line)); - + } } public: From a2dbf02801ab5944487fc855a7e581f918e0a36b Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 26 Aug 2020 17:06:46 +0530 Subject: [PATCH 08/12] Delete convert.cpp --- dataset_utils/convert.cpp | 106 -------------------------------------- 1 file changed, 106 deletions(-) delete mode 100644 dataset_utils/convert.cpp diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp deleted file mode 100644 index 9901a824..00000000 --- a/dataset_utils/convert.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -using namespace boost::property_tree; -using namespace boost; - -class Convert -{ - void csvxmlHelper(std::string path, std::string to) - { - //static int ctr; - static std::unordered_map fileNames; - std::vector tags; - std::vector rows; - std::ifstream file(path); - std::string line; - - auto tokenize = [&](std::string line) - { - std::vector col_names; - - tokenizer > tk(line, escaped_list_separator()); - for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) - col_names.push_back(*i); - - return col_names; - }; - - auto create_XML = [&](std::vector& tags, std::vector rows) - { - static int ctr; - ptree XMLobjectL; - std::string tag, value; - - for (auto i : boost::combine(tags, rows)) - { - //tag contains tags, value contains corresponding values - boost::tie(tag, value) = i; - XMLobjectL.put("annotation.object." + tag, value); - } - - write_xml(std::to_string(ctr) + ".xml", XMLobjectL, std::locale(), - xml_writer_make_settings(' ', 1u)); - - ctr++; - }; - - auto create_JSON = [&](std::vector& tags, std::vector rows) - { - static int ctr; - ptree XMLobjectL; - std::string tag, value; - - for (auto i : boost::combine(tags, rows)) - { - //tag contains tags, value contains corresponding values - boost::tie(tag, value) = i; - XMLobjectL.put("annotation.object." + tag, value); - } - - write_json(std::to_string(ctr) + ".json", XMLobjectL); - ctr++; - }; - - std::getline(file, line); - tags = tokenize(line); - - if (to == "xml") - { - while (std::getline(file, line)) - create_XML(tags, tokenize(line)); - - } - else if (to == "json") - { - while (std::getline(file, line)) - create_JSON(tags, tokenize(line)); - - } - } - -public: - void convert(std::string path, std::string to) - { - csvxmlHelper(path, to); - } -}; - -// How To use -/*int main() { - Convert foo; - - foo.convert("path_to_csv_file.csv", "xml"); - foo.convert("path_to_csv_file.csv", "json"); - -}*/ From 196521f045fa907099a1706ef89e8301925c7c53 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 26 Aug 2020 17:07:06 +0530 Subject: [PATCH 09/12] Add files via upload --- dataset_utils/convert.cpp | 104 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 dataset_utils/convert.cpp diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp new file mode 100644 index 00000000..c3e477c7 --- /dev/null +++ b/dataset_utils/convert.cpp @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace boost::property_tree; +using namespace boost; + +class Convert +{ + void csvxmlHelper(std::string path, std::string to) + { + //static int ctr; + static std::unordered_map fileNames; + std::vector tags; + std::vector rows; + std::ifstream file(path); + std::string line; + + auto tokenize = [&](std::string line) + { + std::vector col_names; + + tokenizer > tk(line, escaped_list_separator()); + for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) + col_names.push_back(*i); + + return col_names; + }; + + auto create_XML = [&](std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; + + for (auto i : boost::combine(tags, rows)) + { + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } + + write_xml(std::to_string(ctr) + ".xml", XMLobjectL, std::locale(), xml_writer_make_settings(' ', 1u)); + + ctr++; + }; + + auto create_JSON = [&](std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; + + for (auto i : boost::combine(tags, rows)) + { + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } + write_json(std::to_string(ctr) + ".json", XMLobjectL); + ctr++; + }; + + std::getline(file, line); + tags = tokenize(line); + + if (to == "xml") + { + while (std::getline(file, line)) + create_XML(tags, tokenize(line)); + + } + else if (to == "json") + { + while (std::getline(file, line)) + create_JSON(tags, tokenize(line)); + + } + } + +public: + void convert(std::string path, std::string to) + { + csvxmlHelper(path, to); + } +}; + +// How To use +/*int main() { + Convert foo; + + foo.convert("path_to_csv_file.csv", "xml"); + foo.convert("path_to_csv_file.csv", "json"); + +}*/ From 93ddab1a2e25f2c518c08b05cb65676fbd8ad793 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 2 Sep 2020 11:33:53 +0530 Subject: [PATCH 10/12] Update convert.cpp --- dataset_utils/convert.cpp | 106 +++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp index c3e477c7..8899c5ff 100644 --- a/dataset_utils/convert.cpp +++ b/dataset_utils/convert.cpp @@ -16,89 +16,91 @@ using namespace boost; class Convert { - void csvxmlHelper(std::string path, std::string to) + auto tokenize(std::string& line) { - //static int ctr; - static std::unordered_map fileNames; - std::vector tags; - std::vector rows; - std::ifstream file(path); - std::string line; + std::vector col_names; - auto tokenize = [&](std::string line) - { - std::vector col_names; + tokenizer > tk(line, escaped_list_separator()); + for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) + col_names.push_back(*i); - tokenizer > tk(line, escaped_list_separator()); - for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) - col_names.push_back(*i); + return col_names; + } - return col_names; - }; + auto create_XML(std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; - auto create_XML = [&](std::vector& tags, std::vector rows) + for (auto i : boost::combine(tags, rows)) { - static int ctr; - ptree XMLobjectL; - std::string tag, value; - - for (auto i : boost::combine(tags, rows)) - { - //tag contains tags, value contains corresponding values - boost::tie(tag, value) = i; - XMLobjectL.put("annotation.object." + tag, value); - } + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } - write_xml(std::to_string(ctr) + ".xml", XMLobjectL, std::locale(), xml_writer_make_settings(' ', 1u)); + write_xml(std::to_string(ctr) + ".xml", XMLobjectL, std::locale(), + xml_writer_make_settings(' ', 1u)); + ctr++; + } - ctr++; - }; + auto create_JSON(std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; - auto create_JSON = [&](std::vector& tags, std::vector rows) + for (auto i : boost::combine(tags, rows)) { - static int ctr; - ptree XMLobjectL; - std::string tag, value; + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } - for (auto i : boost::combine(tags, rows)) - { - //tag contains tags, value contains corresponding values - boost::tie(tag, value) = i; - XMLobjectL.put("annotation.object." + tag, value); - } - write_json(std::to_string(ctr) + ".json", XMLobjectL); - ctr++; - }; + write_json(std::to_string(ctr) + ".json", XMLobjectL); + ctr++; + } + void convertHelper(std::string path, std::string to) + { + //static int ctr; + static std::unordered_map fileNames; + std::vector tags; + std::vector rows; + std::ifstream file(path); + std::string line; + std::vector col_names; std::getline(file, line); tags = tokenize(line); - + if (to == "xml") { while (std::getline(file, line)) + { create_XML(tags, tokenize(line)); - + } } else if (to == "json") { while (std::getline(file, line)) + { create_JSON(tags, tokenize(line)); - + } } } public: void convert(std::string path, std::string to) { - csvxmlHelper(path, to); + convertHelper(path, to); } }; -// How To use -/*int main() { +// How to invoke +/* +int main() { Convert foo; - - foo.convert("path_to_csv_file.csv", "xml"); - foo.convert("path_to_csv_file.csv", "json"); - + foo.convert("path_to_csv.csv", "xml"); + foo.convert("path_to_csv.csv", "json"); }*/ From 164ded5c351d69b4aec26ff67569c5ac6f41d178 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 2 Sep 2020 11:34:54 +0530 Subject: [PATCH 11/12] Update convert.cpp --- dataset_utils/convert.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp index 8899c5ff..cf9a714b 100644 --- a/dataset_utils/convert.cpp +++ b/dataset_utils/convert.cpp @@ -71,6 +71,7 @@ class Convert std::ifstream file(path); std::string line; std::vector col_names; + std::getline(file, line); tags = tokenize(line); From 31f4899a1c21613dc43e9a75baba128e61872533 Mon Sep 17 00:00:00 2001 From: Gopi M Tatiraju Date: Wed, 2 Sep 2020 12:17:53 +0530 Subject: [PATCH 12/12] Update dataset_utils/convert.cpp Co-authored-by: kartikdutt18 <39593019+kartikdutt18@users.noreply.github.com> --- dataset_utils/convert.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp index cf9a714b..0aca549d 100644 --- a/dataset_utils/convert.cpp +++ b/dataset_utils/convert.cpp @@ -16,16 +16,14 @@ using namespace boost; class Convert { - auto tokenize(std::string& line) - { - std::vector col_names; - - tokenizer > tk(line, escaped_list_separator()); - for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) - col_names.push_back(*i); - - return col_names; - } + auto tokenize(std::string& line) + { + std::vector col_names; + tokenizer > tk(line, escaped_list_separator()); + for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) + col_names.push_back(*i); + return col_names; + } auto create_XML(std::vector& tags, std::vector rows) {