Skip to content

Commit

Permalink
add megamash stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Koeng101 committed Jan 6, 2024
1 parent 9e2b323 commit 3cb55d2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 16 deletions.
19 changes: 4 additions & 15 deletions lib/align/megamash/megamash.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/koeng101/dnadesign/lib/transform"
)

// StandardizedCompressedDNA returns the CompressedDNA byte string
func StandardizedCompressedDNA(sequence string) []byte {
var deterministicSequence string
reverseComplement := transform.ReverseComplement(sequence)
Expand Down Expand Up @@ -60,19 +61,7 @@ func MakeMegamashMap(sequences []string, kmerSize uint) MegamashMap {
// Add it to megamashMap
megamashMap = append(megamashMap, uniqueKmerMap)
}
// Finally, go back through and make a final megamashMap without
// all those falses.
var finalMegamashMap MegamashMap
for _, singleMegamashMap := range megamashMap {
finalMap := make(map[string]bool)
for kmerBase64, value := range singleMegamashMap {
if value {
finalMap[kmerBase64] = true
}
}
finalMegamashMap = append(finalMegamashMap, finalMap)
}
return finalMegamashMap
return megamashMap
}

func (m *MegamashMap) Score(sequence string) []float64 {
Expand Down Expand Up @@ -102,8 +91,8 @@ out:
for i := 0; i <= len(sequence)-kmerSize; i++ {
kmerBytes := StandardizedCompressedDNA(sequence[i : i+kmerSize])
kmerBase64 := base64.StdEncoding.EncodeToString(kmerBytes)
_, ok := sequenceMap[kmerBase64]
if ok {
unique, ok := sequenceMap[kmerBase64]
if ok && unique {
matchedKmers++
}
}
Expand Down
12 changes: 11 additions & 1 deletion lib/align/megamash/megamash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@ package megamash

import (
"testing"

"github.com/koeng101/dnadesign/lib/random"
)

func TestCompressDNA(t *testing.T) {
// Define test cases
longDna, _ := random.DNASequence(300, 0)
longerDna, _ := random.DNASequence(66000, 0)
tests := []struct {
name string
dna string
Expand All @@ -15,14 +19,16 @@ func TestCompressDNA(t *testing.T) {
{"Empty", "", 2, 0x00},
{"Short", "ATGC", 3, 0x00},
{"Medium", "ATGCGTATGCCGTAGC", 6, 0x00},
{"Long", longDna, 78, 0x01},
{"Longest", longerDna, 16505, 0x02},
// Add more test cases for longer sequences and edge cases
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
compressed := CompressDNA(tc.dna)
if len(compressed) != tc.expectedLen {
t.Errorf("CompressDNA() with input %s, expected length %d, got %d", tc.dna, tc.expectedLen, len(compressed))
t.Errorf("CompressDNA() with input %s, expected length %d, got %d", "", tc.expectedLen, len(compressed))
}
if compressed[0] != tc.expectedFlag {
t.Errorf("CompressDNA() with input %s, expected flag %b, got %b", tc.dna, tc.expectedFlag, compressed[0])
Expand All @@ -32,6 +38,8 @@ func TestCompressDNA(t *testing.T) {
}

func TestDecompressDNA(t *testing.T) {
longDna, _ := random.DNASequence(300, 0)
longerDna, _ := random.DNASequence(66000, 0)
// Define test cases
tests := []struct {
name string
Expand All @@ -41,6 +49,8 @@ func TestDecompressDNA(t *testing.T) {
{"Empty", "", ""},
{"Short", "ATGC", "ATGC"},
{"Medium", "ATGCGTATGCCGTAGC", "ATGCGTATGCCGTAGC"},
{"Long", longDna, longDna},
{"Longest", longerDna, longerDna},
// Add more test cases as needed
}

Expand Down

0 comments on commit 3cb55d2

Please sign in to comment.