From 26211f022c6e238635805375d551435f2c3f7450 Mon Sep 17 00:00:00 2001 From: Ephraim Chukwu Date: Mon, 18 Nov 2024 21:57:37 +0100 Subject: [PATCH] Word Count (#304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added introduction for strings * Added blurb for strings * Changed to review * One change * Change wordings for clarity * Enums Data Type * Update:Changes on Enums Data Type * Update:Changes on Enums Data Type * New:Changes on Enums Data Type * Reorder sentences in Enum Traits section * Match basics concept * reorder enum & match basics * amend to requested changes * minor changes * nit: code update * Apply suggestions from code review * traits concept * correction to traits concept * MD047 lint: fixed * few correction on traits * wordy exercise * pass checks * Wordy: update fmt * wordy: formatted * wordy: formatted * wordy: formatted * wordy: error handling. * Update exercises/practice/wordy/src/lib.cairo * Update difficulty to 4 * rename max->num in parse_int * Exercises: Word count * Exercises: Word count * word count:changes requested * word-count: new changes * word-count: new changes * word-count: new changes * word-count: new changes * word-count: new changes * word-count: new changes * redesign the split function * build passes * remove dup and update * formatted: new * remove dup * assert_unordered->assert_unordered_eq + verify both are subsets of each other * Check if prev. and next are alphanumeric --------- Co-authored-by: Nenad Misić Co-authored-by: Nenad --- config.json | 8 + .../practice/word-count/.docs/instructions.md | 47 ++++ .../practice/word-count/.docs/introduction.md | 8 + .../practice/word-count/.meta/config.json | 21 ++ .../practice/word-count/.meta/example.cairo | 102 +++++++ .../practice/word-count/.meta/tests.toml | 57 ++++ exercises/practice/word-count/Scarb.toml | 7 + exercises/practice/word-count/src/lib.cairo | 9 + .../word-count/tests/word_count.cairo | 254 ++++++++++++++++++ 9 files changed, 513 insertions(+) create mode 100644 exercises/practice/word-count/.docs/instructions.md create mode 100644 exercises/practice/word-count/.docs/introduction.md create mode 100644 exercises/practice/word-count/.meta/config.json create mode 100644 exercises/practice/word-count/.meta/example.cairo create mode 100644 exercises/practice/word-count/.meta/tests.toml create mode 100644 exercises/practice/word-count/Scarb.toml create mode 100644 exercises/practice/word-count/src/lib.cairo create mode 100644 exercises/practice/word-count/tests/word_count.cairo diff --git a/config.json b/config.json index 75aa6aad..e13242f4 100644 --- a/config.json +++ b/config.json @@ -820,6 +820,14 @@ "prerequisites": [], "difficulty": 4 }, + { + "slug": "word-count", + "name": "Word Count", + "uuid": "5fded933-439a-4faa-bfb6-18ec7b7c8469", + "practices": [], + "prerequisites": [], + "difficulty": 4 + }, { "slug": "binary-search-tree", "name": "Binary Search Tree", diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md new file mode 100644 index 00000000..064393c8 --- /dev/null +++ b/exercises/practice/word-count/.docs/instructions.md @@ -0,0 +1,47 @@ +# Instructions + +Your task is to count how many times each word occurs in a subtitle of a drama. + +The subtitles from these dramas use only ASCII characters. + +The characters often speak in casual English, using contractions like _they're_ or _it's_. +Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word. + +Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " "). +The only punctuation that does not separate words is the apostrophe in contractions. + +Numbers are considered words. +If the subtitles say _It costs 100 dollars._ then _100_ will be its own word. + +Words are case insensitive. +For example, the word _you_ occurs three times in the following sentence: + +> You come back, you hear me? DO YOU HEAR ME? + +The ordering of the word counts in the results doesn't matter. + +Here's an example that incorporates several of the elements discussed above: + +- simple words +- contractions +- numbers +- case insensitive words +- punctuation (including apostrophes) to separate words +- different forms of whitespace to separate words + +`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` + +The mapping for this subtitle would be: + +```text +123: 1 +agent: 1 +cried: 1 +fled: 1 +i: 1 +password: 2 +so: 1 +special: 1 +that's: 1 +the: 2 +``` diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md new file mode 100644 index 00000000..1654508e --- /dev/null +++ b/exercises/practice/word-count/.docs/introduction.md @@ -0,0 +1,8 @@ +# Introduction + +You teach English as a foreign language to high school students. + +You've decided to base your entire curriculum on TV shows. +You need to analyze which words are used, and how often they're repeated. + +This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes. diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json new file mode 100644 index 00000000..a390fa4f --- /dev/null +++ b/exercises/practice/word-count/.meta/config.json @@ -0,0 +1,21 @@ +{ + "authors": [ + "Ephraim-nonso" + ], + "files": { + "solution": [ + "src/lib.cairo" + ], + "test": [ + "tests/word_count.cairo" + ], + "example": [ + ".meta/example.cairo" + ], + "invalidator": [ + "Scarb.toml" + ] + }, + "blurb": "Given a phrase, count the occurrences of each word in that phrase.", + "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour." +} diff --git a/exercises/practice/word-count/.meta/example.cairo b/exercises/practice/word-count/.meta/example.cairo new file mode 100644 index 00000000..4712f0fa --- /dev/null +++ b/exercises/practice/word-count/.meta/example.cairo @@ -0,0 +1,102 @@ +#[derive(Debug, PartialEq, Clone, Drop)] +pub struct WordResult { + pub word: ByteArray, + pub count: u64, +} + +pub fn count_words(phrase: ByteArray) -> Span { + let mut results: Array = ArrayTrait::new(); + let words = split_phrase_into_words(phrase); + + let mut i = 0; + while i < words.len() { + let mut found = false; + + let mut j = 0; + while j < results.len() { + if results[j].word == words[i] { + let updated_result = WordResult { + word: results[j].word.clone(), count: *results[j].count + 1, + }; + + results = remove_index_from_array(results, j); + results.append(updated_result); + found = true; + break; + } + j += 1; + }; + + if !found { + let word_and_count = WordResult { word: words[i].clone(), count: 1 }; + results.append(word_and_count); + } + + i += 1; + }; + + results.span() +} + +fn remove_index_from_array(arr: Array, index: u32) -> Array { + let mut new_arr: Array = ArrayTrait::new(); + + let mut i = 0; + while i < arr.len() { + if i != index { + new_arr.append(arr[i].clone()); + } + i += 1; + }; + + new_arr +} + +fn split_phrase_into_words(phrase: ByteArray) -> Array { + let mut words: Array = ArrayTrait::new(); + let mut current_word = ""; + + let mut i = 0; + while i < phrase.len() { + let lower_case = to_lowercase(phrase[i]); + + if is_alphanumeric_or_apostrophe(lower_case) { + if !is_apostrophe(lower_case) + || (i > 0 && i < phrase.len() + - 1 && is_alphanumeric(phrase[i - 1]) && is_alphanumeric(phrase[i + 1])) { + current_word.append_byte(lower_case); + } + } else if current_word.len() > 0 { + words.append(current_word.clone()); + current_word = ""; + } + + i += 1; + }; + + if current_word.len() > 0 { + words.append(current_word); + } + + words +} + +fn is_alphanumeric_or_apostrophe(ch: u8) -> bool { + is_alphanumeric(ch) || is_apostrophe(ch) +} + +fn is_alphanumeric(ch: u8) -> bool { + ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') +} + +fn is_apostrophe(ch: u8) -> bool { + ch == '\'' +} + +fn to_lowercase(ch: u8) -> u8 { + if 'A' <= ch && ch <= 'Z' { + ch + 32 + } else { + ch + } +} diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml new file mode 100644 index 00000000..1be425b3 --- /dev/null +++ b/exercises/practice/word-count/.meta/tests.toml @@ -0,0 +1,57 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[61559d5f-2cad-48fb-af53-d3973a9ee9ef] +description = "count one word" + +[5abd53a3-1aed-43a4-a15a-29f88c09cbbd] +description = "count one of each word" + +[2a3091e5-952e-4099-9fac-8f85d9655c0e] +description = "multiple occurrences of a word" + +[e81877ae-d4da-4af4-931c-d923cd621ca6] +description = "handles cramped lists" + +[7349f682-9707-47c0-a9af-be56e1e7ff30] +description = "handles expanded lists" + +[a514a0f2-8589-4279-8892-887f76a14c82] +description = "ignore punctuation" + +[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e] +description = "include numbers" + +[dac6bc6a-21ae-4954-945d-d7f716392dbf] +description = "normalize case" + +[4185a902-bdb0-4074-864c-f416e42a0f19] +description = "with apostrophes" +include = false + +[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3] +description = "with apostrophes" +reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19" + +[be72af2b-8afe-4337-b151-b297202e4a7b] +description = "with quotations" + +[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6] +description = "substrings from the beginning" + +[c5f4ef26-f3f7-4725-b314-855c04fb4c13] +description = "multiple spaces not detected as a word" + +[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360] +description = "alternating word separators not detected as a word" + +[6d00f1db-901c-4bec-9829-d20eb3044557] +description = "quotation for word with apostrophe" diff --git a/exercises/practice/word-count/Scarb.toml b/exercises/practice/word-count/Scarb.toml new file mode 100644 index 00000000..fa54cf30 --- /dev/null +++ b/exercises/practice/word-count/Scarb.toml @@ -0,0 +1,7 @@ +[package] +name = "word_count" +version = "0.1.0" +edition = "2024_07" + +[dev-dependencies] +cairo_test = "2.8.2" diff --git a/exercises/practice/word-count/src/lib.cairo b/exercises/practice/word-count/src/lib.cairo new file mode 100644 index 00000000..9e46cc2b --- /dev/null +++ b/exercises/practice/word-count/src/lib.cairo @@ -0,0 +1,9 @@ +#[derive(Debug, PartialEq, Clone, Drop)] +pub struct WordResult { + pub word: ByteArray, + pub count: u64, +} + +pub fn count_words(phrase: ByteArray) -> Span { + panic!("implement `count_words`") +} diff --git a/exercises/practice/word-count/tests/word_count.cairo b/exercises/practice/word-count/tests/word_count.cairo new file mode 100644 index 00000000..920a19bd --- /dev/null +++ b/exercises/practice/word-count/tests/word_count.cairo @@ -0,0 +1,254 @@ +use word_count::{count_words, WordResult}; + +#[test] +fn count_one_word() { + let input = "word"; + let mut output = count_words(input); + + let expected = array![WordResult { word: "word", count: 1 }].span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn count_one_of_each_word() { + let input = "one of each"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "of", count: 1 }, + WordResult { word: "each", count: 1 }, + WordResult { word: "one", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn multiple_occurrences_of_a_word() { + let input = "one fish two fish red fish blue fish"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "one", count: 1 }, + WordResult { word: "two", count: 1 }, + WordResult { word: "red", count: 1 }, + WordResult { word: "blue", count: 1 }, + WordResult { word: "fish", count: 4 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn handles_cramped_lists() { + let input = "one,two,three"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "one", count: 1 }, + WordResult { word: "two", count: 1 }, + WordResult { word: "three", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn handles_expanded_lists() { + let input = "one,\ntwo,\nthree"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "one", count: 1 }, + WordResult { word: "two", count: 1 }, + WordResult { word: "three", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn ignore_punctuation() { + let input = "car: carpet as java: javascript!!&@$%^&"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "car", count: 1 }, + WordResult { word: "carpet", count: 1 }, + WordResult { word: "as", count: 1 }, + WordResult { word: "java", count: 1 }, + WordResult { word: "javascript", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn include_numbers() { + let input = "testing, 1, 2 testing"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "testing", count: 2 }, + WordResult { word: "1", count: 1 }, + WordResult { word: "2", count: 1 }, + ] + .span(); + + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn normalize_case() { + let input = "go Go GO Stop stop"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "go", count: 3 }, WordResult { word: "stop", count: 2 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn with_apostrophes() { + let input = "'First: don't laugh. Then: don't cry. You're getting it.'"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "first", count: 1 }, + WordResult { word: "laugh", count: 1 }, + WordResult { word: "then", count: 1 }, + WordResult { word: "don't", count: 2 }, + WordResult { word: "cry", count: 1 }, + WordResult { word: "you're", count: 1 }, + WordResult { word: "getting", count: 1 }, + WordResult { word: "it", count: 1 }, + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn with_quotations() { + let input = "Joe can't tell between 'large' and large."; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "joe", count: 1 }, + WordResult { word: "can't", count: 1 }, + WordResult { word: "tell", count: 1 }, + WordResult { word: "between", count: 1 }, + WordResult { word: "and", count: 1 }, + WordResult { word: "large", count: 2 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn substrings_from_the_beginning() { + let input = "Joe can't tell between app, apple and a."; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "joe", count: 1 }, + WordResult { word: "can't", count: 1 }, + WordResult { word: "tell", count: 1 }, + WordResult { word: "between", count: 1 }, + WordResult { word: "app", count: 1 }, + WordResult { word: "apple", count: 1 }, + WordResult { word: "and", count: 1 }, + WordResult { word: "a", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn multiple_spaces_not_detected_as_a_word() { + let input = " multiple whitespaces"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "multiple", count: 1 }, WordResult { word: "whitespaces", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn alternating_word_separators_not_detected_as_a_word() { + let input = ",\n,one,\n ,two \n 'three'"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "one", count: 1 }, + WordResult { word: "two", count: 1 }, + WordResult { word: "three", count: 1 } + ] + .span(); + assert_unordered_eq(output, expected); +} + +#[test] +#[ignore] +fn quotation_for_word_with_apostrophe() { + let input = "can, can't, 'can't'"; + let mut output = count_words(input); + + let expected = array![ + WordResult { word: "can", count: 1 }, WordResult { word: "can't", count: 2 } + ] + .span(); + assert_unordered_eq(output, expected); +} + + +// helper function. +fn assert_unordered_eq(span1: Span, span2: Span) { + // `span1` should be subset of `span2` + for item in span1 { + let mut found = false; + for other_item in span2 { + if item == other_item { + found = true; + break; + } + }; + assert!( + found, + "assertion failed: `(left == right)`\n left: `{:?}`,\n right `{:?}`", + span1, + span2 + ); + }; + // and `span2` should be subset of `span1` + for item in span2 { + let mut found = false; + for other_item in span1 { + if item == other_item { + found = true; + break; + } + }; + assert!( + found, + "assertion failed: `(left == right)`\n left: `{:?}`,\n right `{:?}`", + span1, + span2 + ); + } +}