From e7c5f54109a2cc6f748e5b8e5970f3924f15ea40 Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 4 Jul 2024 11:20:26 +0200 Subject: [PATCH 1/6] add protein-translation exercise --- concepts/dictionaries/.meta/config.json | 7 + concepts/dictionaries/about.md | 1 + concepts/dictionaries/introduction.md | 1 + concepts/dictionaries/links.json | 1 + config.json | 22 +- .../protein-translation/.docs/instructions.md | 45 +++ .../protein-translation/.meta/config.json | 19 ++ .../protein-translation/.meta/example.cairo | 78 +++++ .../protein-translation/.meta/tests.toml | 105 +++++++ .../practice/protein-translation/Scarb.toml | 4 + .../protein-translation/src/lib.cairo | 78 +++++ .../protein-translation/src/tests.cairo | 291 ++++++++++++++++++ 12 files changed, 651 insertions(+), 1 deletion(-) create mode 100644 concepts/dictionaries/.meta/config.json create mode 100644 concepts/dictionaries/about.md create mode 100644 concepts/dictionaries/introduction.md create mode 100644 concepts/dictionaries/links.json create mode 100644 exercises/practice/protein-translation/.docs/instructions.md create mode 100644 exercises/practice/protein-translation/.meta/config.json create mode 100644 exercises/practice/protein-translation/.meta/example.cairo create mode 100644 exercises/practice/protein-translation/.meta/tests.toml create mode 100644 exercises/practice/protein-translation/Scarb.toml create mode 100644 exercises/practice/protein-translation/src/lib.cairo create mode 100644 exercises/practice/protein-translation/src/tests.cairo diff --git a/concepts/dictionaries/.meta/config.json b/concepts/dictionaries/.meta/config.json new file mode 100644 index 00000000..52914445 --- /dev/null +++ b/concepts/dictionaries/.meta/config.json @@ -0,0 +1,7 @@ +{ + "blurb": "", + "authors": [ + "misicnenad" + ], + "contributors": [] +} diff --git a/concepts/dictionaries/about.md b/concepts/dictionaries/about.md new file mode 100644 index 00000000..8c032762 --- /dev/null +++ b/concepts/dictionaries/about.md @@ -0,0 +1 @@ +# Dictionaries diff --git a/concepts/dictionaries/introduction.md b/concepts/dictionaries/introduction.md new file mode 100644 index 00000000..e10b99d0 --- /dev/null +++ b/concepts/dictionaries/introduction.md @@ -0,0 +1 @@ +# Introduction diff --git a/concepts/dictionaries/links.json b/concepts/dictionaries/links.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/concepts/dictionaries/links.json @@ -0,0 +1 @@ +[] diff --git a/config.json b/config.json index 5ce32b61..2a7dd86b 100644 --- a/config.json +++ b/config.json @@ -226,9 +226,24 @@ "slug": "sublist", "name": "Sublist", "uuid": "e62b1bb4-fb37-4dfe-ad81-e0fa5c494ba3", - "practices": ["arrays", "enums", "generics"], + "practices": [ + "arrays", + "enums", + "generics" + ], "prerequisites": [], "difficulty": 3 + }, + { + "slug": "protein-translation", + "name": "Protein Translation", + "uuid": "d42e7a29-64b9-478b-8b2a-44fd002391fa", + "practices": [ + "arrays", + "dictionaries" + ], + "prerequisites": [], + "difficulty": 6 } ], "foregone": [ @@ -257,6 +272,11 @@ "slug": "generics", "name": "Generic Types" }, + { + "uuid": "7e7b772a-2b3e-47d7-96ef-807a4d5b3545", + "slug": "dictionaries", + "name": "Dictionaries" + }, { "uuid": "de254443-42ee-4787-bc1e-3d0d2039c4f5", "slug": "booleans", diff --git a/exercises/practice/protein-translation/.docs/instructions.md b/exercises/practice/protein-translation/.docs/instructions.md new file mode 100644 index 00000000..7dc34d2e --- /dev/null +++ b/exercises/practice/protein-translation/.docs/instructions.md @@ -0,0 +1,45 @@ +# Instructions + +Translate RNA sequences into proteins. + +RNA can be broken into three nucleotide sequences called codons, and then translated to a polypeptide like so: + +RNA: `"AUGUUUUCU"` => translates to + +Codons: `"AUG", "UUU", "UCU"` +=> which become a polypeptide with the following sequence => + +Protein: `"Methionine", "Phenylalanine", "Serine"` + +There are 64 codons which in turn correspond to 20 amino acids; however, all of the codon sequences and resulting amino acids are not important in this exercise. +If it works for one codon, the program should work for all of them. +However, feel free to expand the list in the test suite to include them all. + +There are also three terminating codons (also known as 'STOP' codons); if any of these codons are encountered (by the ribosome), all translation ends and the protein is terminated. + +All subsequent codons after are ignored, like this: + +RNA: `"AUGUUUUCUUAAAUG"` => + +Codons: `"AUG", "UUU", "UCU", "UAA", "AUG"` => + +Protein: `"Methionine", "Phenylalanine", "Serine"` + +Note the stop codon `"UAA"` terminates the translation and the final methionine is not translated into the protein sequence. + +Below are the codons and resulting Amino Acids needed for the exercise. + +| Codon | Protein | +| :----------------- | :------------ | +| AUG | Methionine | +| UUU, UUC | Phenylalanine | +| UUA, UUG | Leucine | +| UCU, UCC, UCA, UCG | Serine | +| UAU, UAC | Tyrosine | +| UGU, UGC | Cysteine | +| UGG | Tryptophan | +| UAA, UAG, UGA | STOP | + +Learn more about [protein translation on Wikipedia][protein-translation]. + +[protein-translation]: https://en.wikipedia.org/wiki/Translation_(biology) diff --git a/exercises/practice/protein-translation/.meta/config.json b/exercises/practice/protein-translation/.meta/config.json new file mode 100644 index 00000000..8dfffb2e --- /dev/null +++ b/exercises/practice/protein-translation/.meta/config.json @@ -0,0 +1,19 @@ +{ + "authors": [ + "misicnenad" + ], + "files": { + "solution": [ + "src/lib.cairo", + "Scarb.toml" + ], + "test": [ + "src/tests.cairo" + ], + "example": [ + ".meta/example.cairo" + ] + }, + "blurb": "Translate RNA sequences into proteins.", + "source": "Tyler Long" +} diff --git a/exercises/practice/protein-translation/.meta/example.cairo b/exercises/practice/protein-translation/.meta/example.cairo new file mode 100644 index 00000000..cdded1b9 --- /dev/null +++ b/exercises/practice/protein-translation/.meta/example.cairo @@ -0,0 +1,78 @@ +use core::num::traits::zero::Zero; + +#[derive(Destruct)] +pub struct CodonsInfo { + actual_codons: Felt252Dict, +} + +pub fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { + let mut pairs = pairs; + let mut actual_codons: Felt252Dict = Default::default(); + while let Option::Some((codon, name)) = pairs + .pop_front() { + actual_codons.insert(codon, name); + }; + CodonsInfo { actual_codons, } +} + +const TWO_POW_8: u32 = 0x100; +const TWO_POW_16: u32 = 0x10000; + +#[generate_trait] +impl CodonsInfoImpl of CodonsInfoTrait { + fn name_for(ref self: CodonsInfo, codon: felt252) -> felt252 { + self.actual_codons.get(codon) + } + + fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { + let mut result: Array = array![]; + + let len = strand.len(); + let mut stopped = false; + let mut invalid = false; + let mut i = 0; + while i < len { + let mut codon: u32 = strand[i].into() * TWO_POW_16; + i += 1; + if let Option::Some(char) = strand.at(i) { + codon += char.into() * TWO_POW_8; + } else { + invalid = true; + break; + } + i += 1; + if let Option::Some(char) = strand.at(i) { + codon += char.into(); + } else { + invalid = true; + break; + } + + let name = self.name_for(codon.into()); + if name.is_zero() { + break; + } else if name == 'stop codon' { + stopped = true; + break; + } + result.append(name); + + i += 1; + }; + + if i < len { + if stopped { + return Option::Some(result); + } else { + return Option::None; + } + } else if invalid { + return Option::None; + } else { + return Option::Some(result); + } + } +} + +#[cfg(test)] +mod tests; diff --git a/exercises/practice/protein-translation/.meta/tests.toml b/exercises/practice/protein-translation/.meta/tests.toml new file mode 100644 index 00000000..de680e39 --- /dev/null +++ b/exercises/practice/protein-translation/.meta/tests.toml @@ -0,0 +1,105 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[2c44f7bf-ba20-43f7-a3bf-f2219c0c3f98] +description = "Empty RNA sequence results in no proteins" + +[96d3d44f-34a2-4db4-84cd-fff523e069be] +description = "Methionine RNA sequence" + +[1b4c56d8-d69f-44eb-be0e-7b17546143d9] +description = "Phenylalanine RNA sequence 1" + +[81b53646-bd57-4732-b2cb-6b1880e36d11] +description = "Phenylalanine RNA sequence 2" + +[42f69d4f-19d2-4d2c-a8b0-f0ae9ee1b6b4] +description = "Leucine RNA sequence 1" + +[ac5edadd-08ed-40a3-b2b9-d82bb50424c4] +description = "Leucine RNA sequence 2" + +[8bc36e22-f984-44c3-9f6b-ee5d4e73f120] +description = "Serine RNA sequence 1" + +[5c3fa5da-4268-44e5-9f4b-f016ccf90131] +description = "Serine RNA sequence 2" + +[00579891-b594-42b4-96dc-7ff8bf519606] +description = "Serine RNA sequence 3" + +[08c61c3b-fa34-4950-8c4a-133945570ef6] +description = "Serine RNA sequence 4" + +[54e1e7d8-63c0-456d-91d2-062c72f8eef5] +description = "Tyrosine RNA sequence 1" + +[47bcfba2-9d72-46ad-bbce-22f7666b7eb1] +description = "Tyrosine RNA sequence 2" + +[3a691829-fe72-43a7-8c8e-1bd083163f72] +description = "Cysteine RNA sequence 1" + +[1b6f8a26-ca2f-43b8-8262-3ee446021767] +description = "Cysteine RNA sequence 2" + +[1e91c1eb-02c0-48a0-9e35-168ad0cb5f39] +description = "Tryptophan RNA sequence" + +[e547af0b-aeab-49c7-9f13-801773a73557] +description = "STOP codon RNA sequence 1" + +[67640947-ff02-4f23-a2ef-816f8a2ba72e] +description = "STOP codon RNA sequence 2" + +[9c2ad527-ebc9-4ace-808b-2b6447cb54cb] +description = "STOP codon RNA sequence 3" + +[f4d9d8ee-00a8-47bf-a1e3-1641d4428e54] +description = "Sequence of two protein codons translates into proteins" + +[dd22eef3-b4f1-4ad6-bb0b-27093c090a9d] +description = "Sequence of two different protein codons translates into proteins" + +[d0f295df-fb70-425c-946c-ec2ec185388e] +description = "Translate RNA strand into correct protein list" + +[e30e8505-97ec-4e5f-a73e-5726a1faa1f4] +description = "Translation stops if STOP codon at beginning of sequence" + +[5358a20b-6f4c-4893-bce4-f929001710f3] +description = "Translation stops if STOP codon at end of two-codon sequence" + +[ba16703a-1a55-482f-bb07-b21eef5093a3] +description = "Translation stops if STOP codon at end of three-codon sequence" + +[4089bb5a-d5b4-4e71-b79e-b8d1f14a2911] +description = "Translation stops if STOP codon in middle of three-codon sequence" + +[2c2a2a60-401f-4a80-b977-e0715b23b93d] +description = "Translation stops if STOP codon in middle of six-codon sequence" + +[f6f92714-769f-4187-9524-e353e8a41a80] +description = "Sequence of two non-STOP codons does not translate to a STOP codon" + +[1e75ea2a-f907-4994-ae5c-118632a1cb0f] +description = "Non-existing codon can't translate" +include = false + +[9eac93f3-627a-4c90-8653-6d0a0595bc6f] +description = "Unknown amino acids, not part of a codon, can't translate" +reimplements = "1e75ea2a-f907-4994-ae5c-118632a1cb0f" + +[9d73899f-e68e-4291-b1e2-7bf87c00f024] +description = "Incomplete RNA sequence can't translate" + +[43945cf7-9968-402d-ab9f-b8a28750b050] +description = "Incomplete RNA sequence can translate if valid until a STOP codon" diff --git a/exercises/practice/protein-translation/Scarb.toml b/exercises/practice/protein-translation/Scarb.toml new file mode 100644 index 00000000..c0ebae10 --- /dev/null +++ b/exercises/practice/protein-translation/Scarb.toml @@ -0,0 +1,4 @@ +[package] +name = "protein_translation" +version = "0.1.0" +edition = "2023_11" diff --git a/exercises/practice/protein-translation/src/lib.cairo b/exercises/practice/protein-translation/src/lib.cairo new file mode 100644 index 00000000..cdded1b9 --- /dev/null +++ b/exercises/practice/protein-translation/src/lib.cairo @@ -0,0 +1,78 @@ +use core::num::traits::zero::Zero; + +#[derive(Destruct)] +pub struct CodonsInfo { + actual_codons: Felt252Dict, +} + +pub fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { + let mut pairs = pairs; + let mut actual_codons: Felt252Dict = Default::default(); + while let Option::Some((codon, name)) = pairs + .pop_front() { + actual_codons.insert(codon, name); + }; + CodonsInfo { actual_codons, } +} + +const TWO_POW_8: u32 = 0x100; +const TWO_POW_16: u32 = 0x10000; + +#[generate_trait] +impl CodonsInfoImpl of CodonsInfoTrait { + fn name_for(ref self: CodonsInfo, codon: felt252) -> felt252 { + self.actual_codons.get(codon) + } + + fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { + let mut result: Array = array![]; + + let len = strand.len(); + let mut stopped = false; + let mut invalid = false; + let mut i = 0; + while i < len { + let mut codon: u32 = strand[i].into() * TWO_POW_16; + i += 1; + if let Option::Some(char) = strand.at(i) { + codon += char.into() * TWO_POW_8; + } else { + invalid = true; + break; + } + i += 1; + if let Option::Some(char) = strand.at(i) { + codon += char.into(); + } else { + invalid = true; + break; + } + + let name = self.name_for(codon.into()); + if name.is_zero() { + break; + } else if name == 'stop codon' { + stopped = true; + break; + } + result.append(name); + + i += 1; + }; + + if i < len { + if stopped { + return Option::Some(result); + } else { + return Option::None; + } + } else if invalid { + return Option::None; + } else { + return Option::Some(result); + } + } +} + +#[cfg(test)] +mod tests; diff --git a/exercises/practice/protein-translation/src/tests.cairo b/exercises/practice/protein-translation/src/tests.cairo new file mode 100644 index 00000000..671e59a4 --- /dev/null +++ b/exercises/practice/protein-translation/src/tests.cairo @@ -0,0 +1,291 @@ +use protein_translation::{parse, CodonsInfoTrait}; + +#[test] +fn methionine() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('AUG'), 'methionine'); +} + +#[test] +fn cysteine_tgt() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('UGU'), 'cysteine'); +} + +#[test] +fn stop() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('UAA'), 'stop codon'); +} + +#[test] +fn valine() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('GUU'), 'valine'); +} + +#[test] +fn isoleucine() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('AUU'), 'isoleucine'); +} + +#[test] +fn arginine_cga() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('CGA'), 'arginine'); +} + +#[test] +fn arginine_aga() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('AGA'), 'arginine'); +} + +#[test] +fn arginine_agg() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('AGG'), 'arginine'); +} + +#[test] +fn empty_is_invalid() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for(''), ''); +} + +#[test] +fn x_is_not_shorthand_so_is_invalid() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('VWX'), ''); +} + +#[test] +fn too_short_is_invalid() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('AU'), ''); +} + +#[test] +fn too_long_is_invalid() { + let mut info = parse(make_pairs()); + assert_eq!(info.name_for('ATTA'), ''); +} + +#[test] +fn empty_rna_sequence_results_in_no_proteins() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna(""), Option::Some(array![]),); +} + +#[test] +fn methionine_rna_sequence() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("AUG"), Option::Some(array!['methionine'])); +} + +#[test] +fn phenylalanine_rna_sequence_1() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UUU"), Option::Some(array!['phenylalanine']),); +} + +#[test] +fn phenylalanine_rna_sequence_2() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UUC"), Option::Some(array!['phenylalanine'])); +} + +#[test] +fn leucine_rna_sequence_1() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UUA"), Option::Some(array!['leucine'])); +} + +#[test] +fn leucine_rna_sequence_2() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UUG"), Option::Some(array!['leucine'])); +} + +#[test] +fn serine_rna_sequence_1() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UCU"), Option::Some(array!['serine'])); +} + +#[test] +fn serine_rna_sequence_2() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UCC"), Option::Some(array!['serine'])); +} + +#[test] +fn serine_rna_sequence_3() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UCA"), Option::Some(array!['serine'])); +} + +#[test] +fn serine_rna_sequence_4() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UCG"), Option::Some(array!['serine'])); +} + +#[test] +fn tyrosine_rna_sequence_1() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UAU"), Option::Some(array!['tyrosine'])); +} + +#[test] +fn tyrosine_rna_sequence_2() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UAC"), Option::Some(array!['tyrosine'])); +} + +#[test] +fn cysteine_rna_sequence_1() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UGU"), Option::Some(array!['cysteine'])); +} + +#[test] +fn cysteine_rna_sequence_2() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UGC"), Option::Some(array!['cysteine'])); +} + +#[test] +fn tryptophan_rna_sequence() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UGG"), Option::Some(array!['tryptophan'])); +} + +#[test] +fn stop_codon_rna_sequence_1() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UAA"), Option::Some(array![]),); +} + +#[test] +fn stop_codon_rna_sequence_2() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UAG"), Option::Some(array![]),); +} + +#[test] +fn stop_codon_rna_sequence_3() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UGA"), Option::Some(array![]),); +} + +#[test] +fn sequence_of_two_protein_codons_translates_into_proteins() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UUUUUU"), Option::Some(array!['phenylalanine', 'phenylalanine']),); +} + +#[test] +fn sequence_of_two_different_protein_codons_translates_into_proteins() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UUAUUG"), Option::Some(array!['leucine', 'leucine']),); +} + +#[test] +fn translate_rna_strand_into_correct_protein_list() { + let mut info = parse(make_pairs()); + assert_eq!( + info.of_rna("AUGUUUUGG"), Option::Some(array!['methionine', 'phenylalanine', 'tryptophan']), + ); +} + +#[test] +fn translation_stops_if_stop_codon_at_beginning_of_sequence() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UAGUGG"), Option::Some(array![]),); +} + +#[test] +fn translation_stops_if_stop_codon_at_end_of_two_codon_sequence() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UGGUAG"), Option::Some(array!['tryptophan']),); +} + +#[test] +fn translation_stops_if_stop_codon_at_end_of_three_codon_sequence() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("AUGUUUUAA"), Option::Some(array!['methionine', 'phenylalanine']),); +} + +#[test] +fn translation_stops_if_stop_codon_in_middle_of_three_codon_sequence() { + let mut info = parse(make_pairs()); + assert_eq!(info.of_rna("UGGUAGUGG"), Option::Some(array!['tryptophan']),); +} + +#[test] +fn translation_stops_if_stop_codon_in_middle_of_six_codon_sequence() { + let mut info = parse(make_pairs()); + assert_eq!( + info.of_rna("UGGUGUUAUUAAUGGUUU"), + Option::Some(array!['tryptophan', 'cysteine', 'tyrosine']), + ); +} + +#[test] +fn unknown_amino_acids_not_part_of_a_codon_can_t_translate() { + let mut info = parse(make_pairs()); + let none: Option> = Option::None; + assert_eq!(info.of_rna("XYZ"), none); +} + +#[test] +fn incomplete_rna_sequence_can_t_translate() { + let mut info = parse(make_pairs()); + let none: Option> = Option::None; + assert_eq!(info.of_rna("AUGU"), none); +} + +#[test] +fn incomplete_rna_sequence_can_translate_if_valid_until_a_stop_codon() { + let mut info = parse(make_pairs()); + assert_eq!( + info.of_rna("UUCUUCUAAUGGU"), Option::Some(array!['phenylalanine', 'phenylalanine']), + ); +} + +// The input data constructor. Returns a list of codon, name pairs. +fn make_pairs() -> Array<(felt252, felt252)> { + let mut grouped: Array<(felt252, Array)> = array![ + ('isoleucine', array!['AUU', 'AUC', 'AUA']), + ('valine', array!['GUU', 'GUC', 'GUA', 'GUG']), + ('phenylalanine', array!['UUU', 'UUC']), + ('methionine', array!['AUG']), + ('cysteine', array!['UGU', 'UGC']), + ('alanine', array!['GCU', 'GCC', 'GCA', 'GCG']), + ('glycine', array!['GGU', 'GGC', 'GGA', 'GGG']), + ('proline', array!['CCU', 'CCC', 'CCA', 'CCG']), + ('threonine', array!['ACU', 'ACC', 'ACA', 'ACG']), + ('serine', array!['UCU', 'UCC', 'UCA', 'UCG']), + ('tyrosine', array!['UAU', 'UAC']), + ('tryptophan', array!['UGG']), + ('glutamine', array!['CAA', 'CAG']), + ('asparagine', array!['AAU', 'AAC']), + ('histidine', array!['CAU', 'CAC']), + ('glutamic acid', array!['GAA', 'GAG']), + ('aspartic acid', array!['GAU', 'GAC']), + ('lysine', array!['AAA', 'AAG']), + ('arginine', array!['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG']), + ('leucine', array!['UUA', 'UUG']), + ('stop codon', array!['UAA', 'UAG', 'UGA']), + ]; + let mut pairs = ArrayTrait::<(felt252, felt252)>::new(); + while let Option::Some((name, mut codons)) = grouped + .pop_front() { + while let Option::Some(codon) = codons.pop_front() { + pairs.append((codon, name)); + }; + }; + pairs +} From 2f6c421d722c2b0e6d20cf8201e3f60a5be15e7c Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 4 Jul 2024 12:46:09 +0200 Subject: [PATCH 2/6] refactor the logic (more understandable) --- .../protein-translation/.meta/example.cairo | 84 ++++++++++--------- .../protein-translation/src/lib.cairo | 84 ++++++++++--------- 2 files changed, 86 insertions(+), 82 deletions(-) diff --git a/exercises/practice/protein-translation/.meta/example.cairo b/exercises/practice/protein-translation/.meta/example.cairo index cdded1b9..1b7592b1 100644 --- a/exercises/practice/protein-translation/.meta/example.cairo +++ b/exercises/practice/protein-translation/.meta/example.cairo @@ -1,11 +1,17 @@ use core::num::traits::zero::Zero; #[derive(Destruct)] -pub struct CodonsInfo { +struct CodonsInfo { actual_codons: Felt252Dict, } -pub fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { +enum TranslateResult { + Invalid, + Stopped, + Ok +} + +fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { let mut pairs = pairs; let mut actual_codons: Felt252Dict = Default::default(); while let Option::Some((codon, name)) = pairs @@ -15,9 +21,6 @@ pub fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { CodonsInfo { actual_codons, } } -const TWO_POW_8: u32 = 0x100; -const TWO_POW_16: u32 = 0x10000; - #[generate_trait] impl CodonsInfoImpl of CodonsInfoTrait { fn name_for(ref self: CodonsInfo, codon: felt252) -> felt252 { @@ -27,49 +30,48 @@ impl CodonsInfoImpl of CodonsInfoTrait { fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { let mut result: Array = array![]; - let len = strand.len(); - let mut stopped = false; - let mut invalid = false; - let mut i = 0; - while i < len { - let mut codon: u32 = strand[i].into() * TWO_POW_16; - i += 1; - if let Option::Some(char) = strand.at(i) { - codon += char.into() * TWO_POW_8; - } else { - invalid = true; - break; + let mut codon_index = 0; + let translate_result = loop { + if codon_index == strand.len() { + break TranslateResult::Ok; } - i += 1; - if let Option::Some(char) = strand.at(i) { - codon += char.into(); + + if let Option::Some(codon) = strand.codon_from(codon_index) { + let name = self.name_for(codon); + if name.is_zero() { + break TranslateResult::Invalid; + } else if name == 'stop codon' { + break TranslateResult::Stopped; + } + + result.append(name); + codon_index += 3; } else { - invalid = true; - break; + break TranslateResult::Invalid; } + }; - let name = self.name_for(codon.into()); - if name.is_zero() { - break; - } else if name == 'stop codon' { - stopped = true; - break; - } - result.append(name); + match translate_result { + TranslateResult::Invalid => Option::None, + _ => Option::Some(result) + } + } +} - i += 1; - }; +const TWO_POW_8: u32 = 0x100; +const TWO_POW_16: u32 = 0x10000; - if i < len { - if stopped { - return Option::Some(result); - } else { - return Option::None; - } - } else if invalid { - return Option::None; +#[generate_trait] +impl ByteArrayChunk of ChunkTrait { + fn codon_from(self: @ByteArray, from: usize) -> Option { + if let Option::Some(char) = self.at(from + 2) { + let chunk = char.into() + + self[from + + 1].into() * TWO_POW_8 + + self[from].into() * TWO_POW_16; + Option::Some(chunk.into()) } else { - return Option::Some(result); + Option::None } } } diff --git a/exercises/practice/protein-translation/src/lib.cairo b/exercises/practice/protein-translation/src/lib.cairo index cdded1b9..1b7592b1 100644 --- a/exercises/practice/protein-translation/src/lib.cairo +++ b/exercises/practice/protein-translation/src/lib.cairo @@ -1,11 +1,17 @@ use core::num::traits::zero::Zero; #[derive(Destruct)] -pub struct CodonsInfo { +struct CodonsInfo { actual_codons: Felt252Dict, } -pub fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { +enum TranslateResult { + Invalid, + Stopped, + Ok +} + +fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { let mut pairs = pairs; let mut actual_codons: Felt252Dict = Default::default(); while let Option::Some((codon, name)) = pairs @@ -15,9 +21,6 @@ pub fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { CodonsInfo { actual_codons, } } -const TWO_POW_8: u32 = 0x100; -const TWO_POW_16: u32 = 0x10000; - #[generate_trait] impl CodonsInfoImpl of CodonsInfoTrait { fn name_for(ref self: CodonsInfo, codon: felt252) -> felt252 { @@ -27,49 +30,48 @@ impl CodonsInfoImpl of CodonsInfoTrait { fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { let mut result: Array = array![]; - let len = strand.len(); - let mut stopped = false; - let mut invalid = false; - let mut i = 0; - while i < len { - let mut codon: u32 = strand[i].into() * TWO_POW_16; - i += 1; - if let Option::Some(char) = strand.at(i) { - codon += char.into() * TWO_POW_8; - } else { - invalid = true; - break; + let mut codon_index = 0; + let translate_result = loop { + if codon_index == strand.len() { + break TranslateResult::Ok; } - i += 1; - if let Option::Some(char) = strand.at(i) { - codon += char.into(); + + if let Option::Some(codon) = strand.codon_from(codon_index) { + let name = self.name_for(codon); + if name.is_zero() { + break TranslateResult::Invalid; + } else if name == 'stop codon' { + break TranslateResult::Stopped; + } + + result.append(name); + codon_index += 3; } else { - invalid = true; - break; + break TranslateResult::Invalid; } + }; - let name = self.name_for(codon.into()); - if name.is_zero() { - break; - } else if name == 'stop codon' { - stopped = true; - break; - } - result.append(name); + match translate_result { + TranslateResult::Invalid => Option::None, + _ => Option::Some(result) + } + } +} - i += 1; - }; +const TWO_POW_8: u32 = 0x100; +const TWO_POW_16: u32 = 0x10000; - if i < len { - if stopped { - return Option::Some(result); - } else { - return Option::None; - } - } else if invalid { - return Option::None; +#[generate_trait] +impl ByteArrayChunk of ChunkTrait { + fn codon_from(self: @ByteArray, from: usize) -> Option { + if let Option::Some(char) = self.at(from + 2) { + let chunk = char.into() + + self[from + + 1].into() * TWO_POW_8 + + self[from].into() * TWO_POW_16; + Option::Some(chunk.into()) } else { - return Option::Some(result); + Option::None } } } From 5bceb739a3955cbd95e20284d357220d5445e766 Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 4 Jul 2024 12:56:15 +0200 Subject: [PATCH 3/6] use bytearrays for names --- config.json | 8 +- .../protein-translation/.meta/example.cairo | 25 ++-- .../protein-translation/src/lib.cairo | 25 ++-- .../protein-translation/src/tests.cairo | 128 +++++++++--------- 4 files changed, 100 insertions(+), 86 deletions(-) diff --git a/config.json b/config.json index 2a7dd86b..c08ca405 100644 --- a/config.json +++ b/config.json @@ -239,11 +239,13 @@ "name": "Protein Translation", "uuid": "d42e7a29-64b9-478b-8b2a-44fd002391fa", "practices": [ - "arrays", - "dictionaries" + "dictionaries", + "strings", + "felts", + "type-conversion" ], "prerequisites": [], - "difficulty": 6 + "difficulty": 8 } ], "foregone": [ diff --git a/exercises/practice/protein-translation/.meta/example.cairo b/exercises/practice/protein-translation/.meta/example.cairo index 1b7592b1..f55eb518 100644 --- a/exercises/practice/protein-translation/.meta/example.cairo +++ b/exercises/practice/protein-translation/.meta/example.cairo @@ -1,8 +1,9 @@ +use core::dict::Felt252DictEntryTrait; use core::num::traits::zero::Zero; #[derive(Destruct)] struct CodonsInfo { - actual_codons: Felt252Dict, + actual_codons: Felt252Dict>, } enum TranslateResult { @@ -11,24 +12,28 @@ enum TranslateResult { Ok } -fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { +fn parse(pairs: Array<(felt252, ByteArray)>) -> CodonsInfo { let mut pairs = pairs; - let mut actual_codons: Felt252Dict = Default::default(); + let mut actual_codons: Felt252Dict> = Default::default(); while let Option::Some((codon, name)) = pairs .pop_front() { - actual_codons.insert(codon, name); + actual_codons.insert(codon, NullableTrait::new(name)); }; CodonsInfo { actual_codons, } } #[generate_trait] impl CodonsInfoImpl of CodonsInfoTrait { - fn name_for(ref self: CodonsInfo, codon: felt252) -> felt252 { - self.actual_codons.get(codon) + fn name_for(ref self: CodonsInfo, codon: felt252) -> ByteArray { + let (entry, _name) = self.actual_codons.entry(codon); + let name = _name.deref_or(""); + let res = name.clone(); + self.actual_codons = entry.finalize(NullableTrait::new(name)); + res } - fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { - let mut result: Array = array![]; + fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { + let mut result: Array = array![]; let mut codon_index = 0; let translate_result = loop { @@ -38,9 +43,9 @@ impl CodonsInfoImpl of CodonsInfoTrait { if let Option::Some(codon) = strand.codon_from(codon_index) { let name = self.name_for(codon); - if name.is_zero() { + if name == "" { break TranslateResult::Invalid; - } else if name == 'stop codon' { + } else if name == "stop codon" { break TranslateResult::Stopped; } diff --git a/exercises/practice/protein-translation/src/lib.cairo b/exercises/practice/protein-translation/src/lib.cairo index 1b7592b1..f55eb518 100644 --- a/exercises/practice/protein-translation/src/lib.cairo +++ b/exercises/practice/protein-translation/src/lib.cairo @@ -1,8 +1,9 @@ +use core::dict::Felt252DictEntryTrait; use core::num::traits::zero::Zero; #[derive(Destruct)] struct CodonsInfo { - actual_codons: Felt252Dict, + actual_codons: Felt252Dict>, } enum TranslateResult { @@ -11,24 +12,28 @@ enum TranslateResult { Ok } -fn parse(pairs: Array<(felt252, felt252)>) -> CodonsInfo { +fn parse(pairs: Array<(felt252, ByteArray)>) -> CodonsInfo { let mut pairs = pairs; - let mut actual_codons: Felt252Dict = Default::default(); + let mut actual_codons: Felt252Dict> = Default::default(); while let Option::Some((codon, name)) = pairs .pop_front() { - actual_codons.insert(codon, name); + actual_codons.insert(codon, NullableTrait::new(name)); }; CodonsInfo { actual_codons, } } #[generate_trait] impl CodonsInfoImpl of CodonsInfoTrait { - fn name_for(ref self: CodonsInfo, codon: felt252) -> felt252 { - self.actual_codons.get(codon) + fn name_for(ref self: CodonsInfo, codon: felt252) -> ByteArray { + let (entry, _name) = self.actual_codons.entry(codon); + let name = _name.deref_or(""); + let res = name.clone(); + self.actual_codons = entry.finalize(NullableTrait::new(name)); + res } - fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { - let mut result: Array = array![]; + fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { + let mut result: Array = array![]; let mut codon_index = 0; let translate_result = loop { @@ -38,9 +43,9 @@ impl CodonsInfoImpl of CodonsInfoTrait { if let Option::Some(codon) = strand.codon_from(codon_index) { let name = self.name_for(codon); - if name.is_zero() { + if name == "" { break TranslateResult::Invalid; - } else if name == 'stop codon' { + } else if name == "stop codon" { break TranslateResult::Stopped; } diff --git a/exercises/practice/protein-translation/src/tests.cairo b/exercises/practice/protein-translation/src/tests.cairo index 671e59a4..f14f5159 100644 --- a/exercises/practice/protein-translation/src/tests.cairo +++ b/exercises/practice/protein-translation/src/tests.cairo @@ -1,75 +1,76 @@ +use core::clone::Clone; use protein_translation::{parse, CodonsInfoTrait}; #[test] fn methionine() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('AUG'), 'methionine'); + assert_eq!(info.name_for('AUG'), "methionine"); } #[test] fn cysteine_tgt() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('UGU'), 'cysteine'); + assert_eq!(info.name_for('UGU'), "cysteine"); } #[test] fn stop() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('UAA'), 'stop codon'); + assert_eq!(info.name_for('UAA'), "stop codon"); } #[test] fn valine() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('GUU'), 'valine'); + assert_eq!(info.name_for('GUU'), "valine"); } #[test] fn isoleucine() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('AUU'), 'isoleucine'); + assert_eq!(info.name_for('AUU'), "isoleucine"); } #[test] fn arginine_cga() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('CGA'), 'arginine'); + assert_eq!(info.name_for('CGA'), "arginine"); } #[test] fn arginine_aga() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('AGA'), 'arginine'); + assert_eq!(info.name_for('AGA'), "arginine"); } #[test] fn arginine_agg() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('AGG'), 'arginine'); + assert_eq!(info.name_for('AGG'), "arginine"); } #[test] fn empty_is_invalid() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for(''), ''); + assert_eq!(info.name_for(''), ""); } #[test] fn x_is_not_shorthand_so_is_invalid() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('VWX'), ''); + assert_eq!(info.name_for('VWX'), ""); } #[test] fn too_short_is_invalid() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('AU'), ''); + assert_eq!(info.name_for('AU'), ""); } #[test] fn too_long_is_invalid() { let mut info = parse(make_pairs()); - assert_eq!(info.name_for('ATTA'), ''); + assert_eq!(info.name_for('ATTA'), ""); } #[test] @@ -81,85 +82,85 @@ fn empty_rna_sequence_results_in_no_proteins() { #[test] fn methionine_rna_sequence() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("AUG"), Option::Some(array!['methionine'])); + assert_eq!(info.of_rna("AUG"), Option::Some(array!["methionine"])); } #[test] fn phenylalanine_rna_sequence_1() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUU"), Option::Some(array!['phenylalanine']),); + assert_eq!(info.of_rna("UUU"), Option::Some(array!["phenylalanine"]),); } #[test] fn phenylalanine_rna_sequence_2() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUC"), Option::Some(array!['phenylalanine'])); + assert_eq!(info.of_rna("UUC"), Option::Some(array!["phenylalanine"])); } #[test] fn leucine_rna_sequence_1() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUA"), Option::Some(array!['leucine'])); + assert_eq!(info.of_rna("UUA"), Option::Some(array!["leucine"])); } #[test] fn leucine_rna_sequence_2() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUG"), Option::Some(array!['leucine'])); + assert_eq!(info.of_rna("UUG"), Option::Some(array!["leucine"])); } #[test] fn serine_rna_sequence_1() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCU"), Option::Some(array!['serine'])); + assert_eq!(info.of_rna("UCU"), Option::Some(array!["serine"])); } #[test] fn serine_rna_sequence_2() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCC"), Option::Some(array!['serine'])); + assert_eq!(info.of_rna("UCC"), Option::Some(array!["serine"])); } #[test] fn serine_rna_sequence_3() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCA"), Option::Some(array!['serine'])); + assert_eq!(info.of_rna("UCA"), Option::Some(array!["serine"])); } #[test] fn serine_rna_sequence_4() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UCG"), Option::Some(array!['serine'])); + assert_eq!(info.of_rna("UCG"), Option::Some(array!["serine"])); } #[test] fn tyrosine_rna_sequence_1() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAU"), Option::Some(array!['tyrosine'])); + assert_eq!(info.of_rna("UAU"), Option::Some(array!["tyrosine"])); } #[test] fn tyrosine_rna_sequence_2() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UAC"), Option::Some(array!['tyrosine'])); + assert_eq!(info.of_rna("UAC"), Option::Some(array!["tyrosine"])); } #[test] fn cysteine_rna_sequence_1() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGU"), Option::Some(array!['cysteine'])); + assert_eq!(info.of_rna("UGU"), Option::Some(array!["cysteine"])); } #[test] fn cysteine_rna_sequence_2() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGC"), Option::Some(array!['cysteine'])); + assert_eq!(info.of_rna("UGC"), Option::Some(array!["cysteine"])); } #[test] fn tryptophan_rna_sequence() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGG"), Option::Some(array!['tryptophan'])); + assert_eq!(info.of_rna("UGG"), Option::Some(array!["tryptophan"])); } #[test] @@ -183,20 +184,20 @@ fn stop_codon_rna_sequence_3() { #[test] fn sequence_of_two_protein_codons_translates_into_proteins() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUUUUU"), Option::Some(array!['phenylalanine', 'phenylalanine']),); + assert_eq!(info.of_rna("UUUUUU"), Option::Some(array!["phenylalanine", "phenylalanine"]),); } #[test] fn sequence_of_two_different_protein_codons_translates_into_proteins() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UUAUUG"), Option::Some(array!['leucine', 'leucine']),); + assert_eq!(info.of_rna("UUAUUG"), Option::Some(array!["leucine", "leucine"]),); } #[test] fn translate_rna_strand_into_correct_protein_list() { let mut info = parse(make_pairs()); assert_eq!( - info.of_rna("AUGUUUUGG"), Option::Some(array!['methionine', 'phenylalanine', 'tryptophan']), + info.of_rna("AUGUUUUGG"), Option::Some(array!["methionine", "phenylalanine", "tryptophan"]), ); } @@ -209,19 +210,19 @@ fn translation_stops_if_stop_codon_at_beginning_of_sequence() { #[test] fn translation_stops_if_stop_codon_at_end_of_two_codon_sequence() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGGUAG"), Option::Some(array!['tryptophan']),); + assert_eq!(info.of_rna("UGGUAG"), Option::Some(array!["tryptophan"]),); } #[test] fn translation_stops_if_stop_codon_at_end_of_three_codon_sequence() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("AUGUUUUAA"), Option::Some(array!['methionine', 'phenylalanine']),); + assert_eq!(info.of_rna("AUGUUUUAA"), Option::Some(array!["methionine", "phenylalanine"]),); } #[test] fn translation_stops_if_stop_codon_in_middle_of_three_codon_sequence() { let mut info = parse(make_pairs()); - assert_eq!(info.of_rna("UGGUAGUGG"), Option::Some(array!['tryptophan']),); + assert_eq!(info.of_rna("UGGUAGUGG"), Option::Some(array!["tryptophan"]),); } #[test] @@ -229,21 +230,21 @@ fn translation_stops_if_stop_codon_in_middle_of_six_codon_sequence() { let mut info = parse(make_pairs()); assert_eq!( info.of_rna("UGGUGUUAUUAAUGGUUU"), - Option::Some(array!['tryptophan', 'cysteine', 'tyrosine']), + Option::Some(array!["tryptophan", "cysteine", "tyrosine"]), ); } #[test] fn unknown_amino_acids_not_part_of_a_codon_can_t_translate() { let mut info = parse(make_pairs()); - let none: Option> = Option::None; + let none: Option> = Option::None; assert_eq!(info.of_rna("XYZ"), none); } #[test] fn incomplete_rna_sequence_can_t_translate() { let mut info = parse(make_pairs()); - let none: Option> = Option::None; + let none: Option> = Option::None; assert_eq!(info.of_rna("AUGU"), none); } @@ -251,41 +252,42 @@ fn incomplete_rna_sequence_can_t_translate() { fn incomplete_rna_sequence_can_translate_if_valid_until_a_stop_codon() { let mut info = parse(make_pairs()); assert_eq!( - info.of_rna("UUCUUCUAAUGGU"), Option::Some(array!['phenylalanine', 'phenylalanine']), + info.of_rna("UUCUUCUAAUGGU"), Option::Some(array!["phenylalanine", "phenylalanine"]), ); } // The input data constructor. Returns a list of codon, name pairs. -fn make_pairs() -> Array<(felt252, felt252)> { - let mut grouped: Array<(felt252, Array)> = array![ - ('isoleucine', array!['AUU', 'AUC', 'AUA']), - ('valine', array!['GUU', 'GUC', 'GUA', 'GUG']), - ('phenylalanine', array!['UUU', 'UUC']), - ('methionine', array!['AUG']), - ('cysteine', array!['UGU', 'UGC']), - ('alanine', array!['GCU', 'GCC', 'GCA', 'GCG']), - ('glycine', array!['GGU', 'GGC', 'GGA', 'GGG']), - ('proline', array!['CCU', 'CCC', 'CCA', 'CCG']), - ('threonine', array!['ACU', 'ACC', 'ACA', 'ACG']), - ('serine', array!['UCU', 'UCC', 'UCA', 'UCG']), - ('tyrosine', array!['UAU', 'UAC']), - ('tryptophan', array!['UGG']), - ('glutamine', array!['CAA', 'CAG']), - ('asparagine', array!['AAU', 'AAC']), - ('histidine', array!['CAU', 'CAC']), - ('glutamic acid', array!['GAA', 'GAG']), - ('aspartic acid', array!['GAU', 'GAC']), - ('lysine', array!['AAA', 'AAG']), - ('arginine', array!['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG']), - ('leucine', array!['UUA', 'UUG']), - ('stop codon', array!['UAA', 'UAG', 'UGA']), +fn make_pairs() -> Array<(felt252, ByteArray)> { + let mut grouped: Array<(ByteArray, Array)> = array![ + ("isoleucine", array!['AUU', 'AUC', 'AUA']), + ("valine", array!['GUU', 'GUC', 'GUA', 'GUG']), + ("phenylalanine", array!['UUU', 'UUC']), + ("methionine", array!['AUG']), + ("cysteine", array!['UGU', 'UGC']), + ("alanine", array!['GCU', 'GCC', 'GCA', 'GCG']), + ("glycine", array!['GGU', 'GGC', 'GGA', 'GGG']), + ("proline", array!['CCU', 'CCC', 'CCA', 'CCG']), + ("threonine", array!['ACU', 'ACC', 'ACA', 'ACG']), + ("serine", array!['UCU', 'UCC', 'UCA', 'UCG']), + ("tyrosine", array!['UAU', 'UAC']), + ("tryptophan", array!['UGG']), + ("glutamine", array!['CAA', 'CAG']), + ("asparagine", array!['AAU', 'AAC']), + ("histidine", array!['CAU', 'CAC']), + ("glutamic acid", array!['GAA', 'GAG']), + ("aspartic acid", array!['GAU', 'GAC']), + ("lysine", array!['AAA', 'AAG']), + ("arginine", array!['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG']), + ("leucine", array!['UUA', 'UUG']), + ("stop codon", array!['UAA', 'UAG', 'UGA']), ]; - let mut pairs = ArrayTrait::<(felt252, felt252)>::new(); + let mut pairs = ArrayTrait::<(felt252, ByteArray)>::new(); while let Option::Some((name, mut codons)) = grouped .pop_front() { - while let Option::Some(codon) = codons.pop_front() { - pairs.append((codon, name)); - }; + while let Option::Some(codon) = codons + .pop_front() { + pairs.append((codon, name.clone())); + }; }; pairs } From b10d3cec82e9a1c4a78103e561e8eb6c8ae82693 Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 4 Jul 2024 12:58:35 +0200 Subject: [PATCH 4/6] codon_from->codon_chunk --- exercises/practice/protein-translation/src/lib.cairo | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exercises/practice/protein-translation/src/lib.cairo b/exercises/practice/protein-translation/src/lib.cairo index f55eb518..0e8279e2 100644 --- a/exercises/practice/protein-translation/src/lib.cairo +++ b/exercises/practice/protein-translation/src/lib.cairo @@ -41,7 +41,7 @@ impl CodonsInfoImpl of CodonsInfoTrait { break TranslateResult::Ok; } - if let Option::Some(codon) = strand.codon_from(codon_index) { + if let Option::Some(codon) = strand.codon_chunk(codon_index) { let name = self.name_for(codon); if name == "" { break TranslateResult::Invalid; @@ -68,7 +68,7 @@ const TWO_POW_16: u32 = 0x10000; #[generate_trait] impl ByteArrayChunk of ChunkTrait { - fn codon_from(self: @ByteArray, from: usize) -> Option { + fn codon_chunk(self: @ByteArray, from: usize) -> Option { if let Option::Some(char) = self.at(from + 2) { let chunk = char.into() + self[from From 48e1807d37a29abbcc795d56b1a84e04cfde6521 Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 4 Jul 2024 13:23:34 +0200 Subject: [PATCH 5/6] add scaffold + add explanation for codon_chunk --- .../protein-translation/.meta/example.cairo | 34 ++++++-- .../protein-translation/src/lib.cairo | 85 +++++++------------ 2 files changed, 60 insertions(+), 59 deletions(-) diff --git a/exercises/practice/protein-translation/.meta/example.cairo b/exercises/practice/protein-translation/.meta/example.cairo index f55eb518..55b7026c 100644 --- a/exercises/practice/protein-translation/.meta/example.cairo +++ b/exercises/practice/protein-translation/.meta/example.cairo @@ -41,7 +41,7 @@ impl CodonsInfoImpl of CodonsInfoTrait { break TranslateResult::Ok; } - if let Option::Some(codon) = strand.codon_from(codon_index) { + if let Option::Some(codon) = strand.codon_chunk(codon_index) { let name = self.name_for(codon); if name == "" { break TranslateResult::Invalid; @@ -67,14 +67,38 @@ const TWO_POW_8: u32 = 0x100; const TWO_POW_16: u32 = 0x10000; #[generate_trait] -impl ByteArrayChunk of ChunkTrait { - fn codon_from(self: @ByteArray, from: usize) -> Option { +impl CodonChunk of CodonChunkTrait { + /// Given a ByteArray, extracts a codon from a given index `from`. + /// Needs to extract 3 ByteArray characters and convert them to the appropriate + /// felt252 value. It does this by taking the characters' byte value and moving + /// their bits to the left depending on their position in the codon. + /// + /// Example: + /// 1. Method call: "AUG".codon_chunk(0) + /// 2. Chars and their byte (hex) values: + /// - "A" = 0x41 + /// - "U" = 0x55 + /// - "G" = 0x47 + /// 3. "A" is the leftmost character, so we "move" it 2 bytes to the left by + /// multiplying it by 2^16 (hex value: 0x10000) + /// 4. "U" is the middle character, so we "move" it 1 byte to the left by + /// multiplying it by 2^8 (hex value: 0x100) + /// 5. "G" is the rightmost character, so we leave it in place + /// 6. Codon = "A" * 2^16 + "U" * 2^8 + "G" + /// = 0x41 * 0x10000 + 0x55 * 0x100 * 0x47 + /// = 0x415547 + /// 7. (41)(55)(47) are hex values for (A)(U)(G) + /// + /// Returns: + /// - Option::Some(codon) -> if the extraction was successful + /// - Option::None -> if the ByteArray was too short from the given index + fn codon_chunk(self: @ByteArray, from: usize) -> Option { if let Option::Some(char) = self.at(from + 2) { - let chunk = char.into() + let codon = char.into() + self[from + 1].into() * TWO_POW_8 + self[from].into() * TWO_POW_16; - Option::Some(chunk.into()) + Option::Some(codon.into()) } else { Option::None } diff --git a/exercises/practice/protein-translation/src/lib.cairo b/exercises/practice/protein-translation/src/lib.cairo index 0e8279e2..55814ef2 100644 --- a/exercises/practice/protein-translation/src/lib.cairo +++ b/exercises/practice/protein-translation/src/lib.cairo @@ -1,65 +1,18 @@ -use core::dict::Felt252DictEntryTrait; -use core::num::traits::zero::Zero; - #[derive(Destruct)] -struct CodonsInfo { - actual_codons: Felt252Dict>, -} - -enum TranslateResult { - Invalid, - Stopped, - Ok -} +struct CodonsInfo {} fn parse(pairs: Array<(felt252, ByteArray)>) -> CodonsInfo { - let mut pairs = pairs; - let mut actual_codons: Felt252Dict> = Default::default(); - while let Option::Some((codon, name)) = pairs - .pop_front() { - actual_codons.insert(codon, NullableTrait::new(name)); - }; - CodonsInfo { actual_codons, } + panic!() } #[generate_trait] impl CodonsInfoImpl of CodonsInfoTrait { fn name_for(ref self: CodonsInfo, codon: felt252) -> ByteArray { - let (entry, _name) = self.actual_codons.entry(codon); - let name = _name.deref_or(""); - let res = name.clone(); - self.actual_codons = entry.finalize(NullableTrait::new(name)); - res + panic!() } fn of_rna(ref self: CodonsInfo, strand: ByteArray) -> Option> { - let mut result: Array = array![]; - - let mut codon_index = 0; - let translate_result = loop { - if codon_index == strand.len() { - break TranslateResult::Ok; - } - - if let Option::Some(codon) = strand.codon_chunk(codon_index) { - let name = self.name_for(codon); - if name == "" { - break TranslateResult::Invalid; - } else if name == "stop codon" { - break TranslateResult::Stopped; - } - - result.append(name); - codon_index += 3; - } else { - break TranslateResult::Invalid; - } - }; - - match translate_result { - TranslateResult::Invalid => Option::None, - _ => Option::Some(result) - } + panic!() } } @@ -67,14 +20,38 @@ const TWO_POW_8: u32 = 0x100; const TWO_POW_16: u32 = 0x10000; #[generate_trait] -impl ByteArrayChunk of ChunkTrait { +impl CodonChunk of CodonChunkTrait { + /// Given a ByteArray, extracts a codon from a given index `from`. + /// Needs to extract 3 ByteArray characters and convert them to the appropriate + /// felt252 value. It does this by taking the characters' byte value and moving + /// their bits to the left depending on their position in the codon. + /// + /// Example: + /// 1. Method call: "AUG".codon_chunk(0) + /// 2. Chars and their byte (hex) values: + /// - "A" = 0x41 + /// - "U" = 0x55 + /// - "G" = 0x47 + /// 3. "A" is the leftmost character, so we "move" it 2 bytes to the left by + /// multiplying it by 2^16 (hex value: 0x10000) + /// 4. "U" is the middle character, so we "move" it 1 byte to the left by + /// multiplying it by 2^8 (hex value: 0x100) + /// 5. "G" is the rightmost character, so we leave it in place + /// 6. Codon = "A" * 2^16 + "U" * 2^8 + "G" + /// = 0x41 * 0x10000 + 0x55 * 0x100 * 0x47 + /// = 0x415547 + /// 7. (41)(55)(47) are hex values for (A)(U)(G) + /// + /// Returns: + /// - Option::Some(codon) -> if the extraction was successful + /// - Option::None -> if the ByteArray was too short from the given index fn codon_chunk(self: @ByteArray, from: usize) -> Option { if let Option::Some(char) = self.at(from + 2) { - let chunk = char.into() + let codon = char.into() + self[from + 1].into() * TWO_POW_8 + self[from].into() * TWO_POW_16; - Option::Some(chunk.into()) + Option::Some(codon.into()) } else { Option::None } From c8fa6159dcfd898cf01385dbc2dd4ac9246d50d9 Mon Sep 17 00:00:00 2001 From: Nenad Date: Thu, 4 Jul 2024 13:24:53 +0200 Subject: [PATCH 6/6] Remove Clone from tests --- exercises/practice/protein-translation/src/tests.cairo | 1 - 1 file changed, 1 deletion(-) diff --git a/exercises/practice/protein-translation/src/tests.cairo b/exercises/practice/protein-translation/src/tests.cairo index f14f5159..498bcc09 100644 --- a/exercises/practice/protein-translation/src/tests.cairo +++ b/exercises/practice/protein-translation/src/tests.cairo @@ -1,4 +1,3 @@ -use core::clone::Clone; use protein_translation::{parse, CodonsInfoTrait}; #[test]