diff --git a/internal/staticanalysis/analyze.go b/internal/staticanalysis/analyze.go
index ed9a5dcb..9d8dbea7 100644
--- a/internal/staticanalysis/analyze.go
+++ b/internal/staticanalysis/analyze.go
@@ -30,10 +30,6 @@ func enumeratePackageFiles(extractDir string) ([]string, error) {
 	return paths, err
 }
 
-func getPathInArchive(path, extractDir string) string {
-	return strings.TrimPrefix(path, extractDir+string(os.PathSeparator))
-}
-
 /*
 AnalyzePackageFiles walks a tree of extracted package files and runs the analysis tasks
 listed in analysisTasks to produce the result data.
@@ -76,14 +72,13 @@ func AnalyzePackageFiles(extractDir string, jsParserConfig parsing.ParserConfig,
 
 	result := Result{}
 
-	archivePath := map[string]string{}
-	for _, path := range fileList {
-		archivePath[path] = getPathInArchive(path, extractDir)
+	getPathInArchive := func(absolutePath string) string {
+		return strings.TrimPrefix(absolutePath, extractDir+string(os.PathSeparator))
 	}
 
 	if runTask[Basic] {
 		log.Info("run basic analysis")
-		basicData, err := GetBasicData(fileList, archivePath)
+		basicData, err := GetBasicData(fileList, getPathInArchive)
 		if err != nil {
 			log.Error("static analysis error", log.Label("task", string(Basic)), "error", err)
 		} else {
@@ -101,8 +96,8 @@ func AnalyzePackageFiles(extractDir string, jsParserConfig parsing.ParserConfig,
 			log.Error("static analysis error", log.Label("task", string(Parsing)), "error", err)
 		} else {
 			// change absolute path in parsingResults to package-relative path
-			for _, parseResult := range parsingResults {
-				parseResult.Filename = archivePath[parseResult.Filename]
+			for i, r := range parsingResults {
+				parsingResults[i].Filename = getPathInArchive(r.Filename)
 			}
 			result.ParsingData = parsingResults
 		}
diff --git a/internal/staticanalysis/basic_data.go b/internal/staticanalysis/basic_data.go
index 04c23ac3..adc499ed 100644
--- a/internal/staticanalysis/basic_data.go
+++ b/internal/staticanalysis/basic_data.go
@@ -107,7 +107,7 @@ some files should not prevent the analysis of other files.
 pathInArchive maps the absolute paths in fileList to relative paths
 in the package archive, to use for results.
 */
-func GetBasicData(fileList []string, pathInArchive map[string]string) (*BasicPackageData, error) {
+func GetBasicData(fileList []string, pathInArchive func(absolutePath string) string) (*BasicPackageData, error) {
 	// First, run file in batch processing mode to get all the file types at once.
 	// Then, file size, hash and line lengths can be done in a simple loop
 
@@ -124,7 +124,7 @@ func GetBasicData(fileList []string, pathInArchive map[string]string) (*BasicPac
 	}
 
 	for index, filePath := range fileList {
-		archivePath := pathInArchive[filePath]
+		archivePath := pathInArchive(filePath)
 		fileType := fileTypes[index]
 
 		var fileSize int64
diff --git a/internal/staticanalysis/basic_data_test.go b/internal/staticanalysis/basic_data_test.go
index 22311b66..ee14a924 100644
--- a/internal/staticanalysis/basic_data_test.go
+++ b/internal/staticanalysis/basic_data_test.go
@@ -4,55 +4,98 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
+	"strings"
 	"testing"
+
+	"github.com/ossf/package-analysis/internal/utils"
+	"github.com/ossf/package-analysis/internal/utils/valuecounts"
 )
 
-func TestGetFileTypes(t *testing.T) {
-	testDir := t.TempDir()
-	fileName1 := filepath.Join(testDir, "test1.txt")
-	fileName2 := filepath.Join(testDir, "test2.txt")
+type testFile struct {
+	filename     string
+	contents     []byte
+	contentsHash string
+	fileType     string
+	lineLengths  valuecounts.ValueCounts
+}
 
-	if err := os.WriteFile(fileName1, []byte("hello test 1!\n"), 0o666); err != nil {
-		t.Fatalf("failed to write test file 1: %v", err)
-	}
-	if err := os.WriteFile(fileName2, []byte("#! /bin/bash\necho 'Hello test 2'\n"), 0o666); err != nil {
-		t.Fatalf("failed to write test file 2: %v", err)
-	}
+var testFiles = []testFile{
+	{
+		filename:     "test1.txt",
+		contents:     []byte("hello test 1!\n"),
+		contentsHash: "sha256:bd96959573979235b87180b0b7513c7f1d5cbf046b263f366f2f10fe1b966494",
+		fileType:     "ASCII text",
+		lineLengths:  valuecounts.Count([]int{13}),
+	},
+	{
+		filename:     "test2.txt",
+		contents:     []byte("#! /bin/bash\necho 'Hello test 2'\n"),
+		contentsHash: "sha256:6179db3c673ceddcdbd384116ae4d301d64e65fc2686db9ba64945677a5a893c",
+		fileType:     "Bourne-Again shell script, ASCII text executable",
+		lineLengths:  valuecounts.Count([]int{12, 19}),
+	},
+}
 
+func TestGetBasicData(t *testing.T) {
 	tests := []struct {
-		name     string
-		fileList []string
-		want     []string
-		wantErr  bool
+		name    string
+		files   []testFile
+		wantErr bool
 	}{
 		{
-			name:     "test no files",
-			fileList: []string{},
-			want:     []string{},
-			wantErr:  false,
+			name:    "test no files",
+			files:   nil,
+			wantErr: false,
 		},
 		{
-			name:     "test one file",
-			fileList: []string{fileName1},
-			want:     []string{"ASCII text"},
-			wantErr:  false,
+			name:    "test one file",
+			files:   []testFile{testFiles[0]},
+			wantErr: false,
 		},
 		{
-			name:     "test two files",
-			fileList: []string{fileName1, fileName2},
-			want:     []string{"ASCII text", "Bourne-Again shell script, ASCII text executable"},
-			wantErr:  false,
+			name:    "test two files",
+			files:   []testFile{testFiles[0], testFiles[1]},
+			wantErr: false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got, err := getFileTypes(tt.fileList)
+			testDir := t.TempDir()
+			paths := utils.Transform(tt.files, func(f testFile) string {
+				return filepath.Join(testDir, f.filename)
+			})
+
+			for i := range tt.files {
+				if err := os.WriteFile(paths[i], tt.files[i].contents, 0o666); err != nil {
+					t.Fatalf("failed to write test file %d: %v", i, err)
+				}
+			}
+
+			getArchivePath := func(absolutePath string) string {
+				return strings.TrimPrefix(absolutePath, testDir+string(os.PathSeparator))
+			}
+
+			got, err := GetBasicData(paths, getArchivePath)
 			if (err != nil) != tt.wantErr {
 				t.Errorf("getFileTypes() error = %v, wantErr %v", err, tt.wantErr)
 				return
 			}
-			if !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("getFileTypes() got = %#v, want %#v", got, tt.want)
+
+			wantData := utils.Transform(tt.files, func(f testFile) BasicFileData {
+				return BasicFileData{
+					Filename:    f.filename,
+					FileType:    f.fileType,
+					Size:        int64(len(f.contents)),
+					Hash:        f.contentsHash,
+					LineLengths: f.lineLengths,
+				}
+			})
+
+			gotData := got.Files
+
+			if !reflect.DeepEqual(gotData, wantData) {
+				t.Errorf("TestGetBasicData() data mismatch:\n"+
+					"== got == \n%v\n== want ==\n%v", got, wantData)
 			}
 		})
 	}
diff --git a/internal/staticanalysis/obfuscation/file_signals_test.go b/internal/staticanalysis/obfuscation/file_signals_test.go
index f6f22baa..e443af4e 100644
--- a/internal/staticanalysis/obfuscation/file_signals_test.go
+++ b/internal/staticanalysis/obfuscation/file_signals_test.go
@@ -29,8 +29,8 @@ var fileSignalsTestCases = []fileSignalsTestCase{
 			FloatLiterals: []token.Float{},
 		},
 		expectedSignals: FileSignals{
-			StringLengths:         valuecounts.ValueCounts{5: 1},
-			IdentifierLengths:     valuecounts.ValueCounts{1: 1},
+			StringLengths:         valuecounts.Count([]int{5}),
+			IdentifierLengths:     valuecounts.Count([]int{1}),
 			SuspiciousIdentifiers: []SuspiciousIdentifier{{Name: "a", Rule: "single"}},
 			EscapedStrings:        []EscapedString{},
 			Base64Strings:         []string{},
@@ -61,8 +61,8 @@ var fileSignalsTestCases = []fileSignalsTestCase{
 			FloatLiterals: []token.Float{},
 		},
 		expectedSignals: FileSignals{
-			StringLengths:     valuecounts.ValueCounts{5: 2},
-			IdentifierLengths: valuecounts.ValueCounts{1: 3, 4: 1},
+			StringLengths:     valuecounts.Count([]int{5, 5}),
+			IdentifierLengths: valuecounts.Count([]int{4, 1, 1, 1}),
 			SuspiciousIdentifiers: []SuspiciousIdentifier{
 				{Name: "a", Rule: "single"},
 				{Name: "b", Rule: "single"},
@@ -76,6 +76,41 @@ var fileSignalsTestCases = []fileSignalsTestCase{
 			URLs:           []string{},
 		},
 	},
+	{
+		name: "one of everything",
+		parseData: parsing.SingleResult{
+			Identifiers: []token.Identifier{
+				{Name: "_0x12414124", Type: token.Variable},
+				{Name: "a", Type: token.Parameter},
+				{Name: "d1912931", Type: token.Parameter},
+			},
+			StringLiterals: []token.String{
+				{Value: "hello@email.me", Raw: `"hello@email.me"`},
+				{Value: "https://this.is.a.website.com", Raw: `"https://this.is.a.website.com"`},
+				{Value: "aGVsbG8gd29ybGQK", Raw: `"aGVsbG8gd29ybGQK"`},
+				{Value: "8.8.8.8", Raw: `"8.8.8.8"`},
+				{Value: "e3fc:234a:2341::abcd", Raw: `"e3fc:234a:2341::abcd"`},
+				{Value: "0x21323492394", Raw: `"0x21323492394"`},
+			},
+			IntLiterals:   []token.Int{},
+			FloatLiterals: []token.Float{},
+		},
+		expectedSignals: FileSignals{
+			IdentifierLengths: valuecounts.Count([]int{11, 1, 8}),
+			StringLengths:     valuecounts.Count([]int{14, 29, 16, 7, 20, 13}),
+			SuspiciousIdentifiers: []SuspiciousIdentifier{
+				{Name: "_0x12414124", Rule: "hex"},
+				{Name: "a", Rule: "single"},
+				{Name: "d1912931", Rule: "numeric"},
+			},
+			EscapedStrings: []EscapedString{},
+			Base64Strings:  []string{"aGVsbG8gd29ybGQK"},
+			EmailAddresses: []string{"hello@email.me"},
+			HexStrings:     []string{"21323492394"},
+			IPAddresses:    []string{"8.8.8.8", "e3fc:234a:2341::abcd"},
+			URLs:           []string{"https://this.is.a.website.com"},
+		},
+	},
 }
 
 func TestComputeSignals(t *testing.T) {
diff --git a/internal/staticanalysis/obfuscation/stats/sample_statistics.go b/internal/staticanalysis/obfuscation/stats/sample_statistics.go
index 36bae5e7..4758f198 100644
--- a/internal/staticanalysis/obfuscation/stats/sample_statistics.go
+++ b/internal/staticanalysis/obfuscation/stats/sample_statistics.go
@@ -8,7 +8,6 @@ import (
 	"golang.org/x/exp/slices"
 
 	"github.com/ossf/package-analysis/internal/utils"
-	"github.com/ossf/package-analysis/internal/utils/valuecounts"
 )
 
 type RealNumber interface {
@@ -209,11 +208,3 @@ func Summarise[T RealNumber](sample []T) SampleStatistics {
 	q := quartiles(sample)
 	return SampleStatistics{Size: l, Mean: m, Variance: v, Skewness: s, Quartiles: q}
 }
-
-func CountDistinct(sample []int) valuecounts.ValueCounts {
-	counts := valuecounts.New()
-	for _, t := range sample {
-		counts[t] += 1
-	}
-	return counts
-}
diff --git a/internal/staticanalysis/obfuscation/stats/sample_statistics_test.go b/internal/staticanalysis/obfuscation/stats/sample_statistics_test.go
index 315fa81a..d97ce6ad 100644
--- a/internal/staticanalysis/obfuscation/stats/sample_statistics_test.go
+++ b/internal/staticanalysis/obfuscation/stats/sample_statistics_test.go
@@ -2,10 +2,7 @@ package stats
 
 import (
 	"math"
-	"reflect"
 	"testing"
-
-	"github.com/ossf/package-analysis/internal/utils/valuecounts"
 )
 
 func TestSummary(t *testing.T) {
@@ -115,30 +112,3 @@ func TestSummary7(t *testing.T) {
 		t.Errorf("Expected summary: %v\nactual summary %v\n", expected, actual)
 	}
 }
-
-func TestCountDistinct1(t *testing.T) {
-	data := []int{1, 2, 3, 4, 3, 2, 1, 2}
-	actual := CountDistinct(data)
-	expected := valuecounts.ValueCounts{1: 2, 2: 3, 3: 2, 4: 1}
-	if !reflect.DeepEqual(actual, expected) {
-		t.Errorf("Expected counts: %v\nactual counts %v\n", expected, actual)
-	}
-}
-
-func TestCountDistinct2(t *testing.T) {
-	data := []int{1}
-	actual := CountDistinct(data)
-	expected := valuecounts.ValueCounts{1: 1}
-	if !reflect.DeepEqual(actual, expected) {
-		t.Errorf("Expected counts: %v\nactual counts %v\n", expected, actual)
-	}
-}
-
-func TestCountDistinct3(t *testing.T) {
-	data := []int{}
-	actual := CountDistinct(data)
-	expected := valuecounts.ValueCounts{}
-	if !reflect.DeepEqual(actual, expected) {
-		t.Errorf("Expected counts: %v\nactual counts %v\n", expected, actual)
-	}
-}
diff --git a/internal/staticanalysis/obfuscation/stringentropy/string_entropy.go b/internal/staticanalysis/obfuscation/stringentropy/string_entropy.go
index 48549d2d..7bec4dc4 100644
--- a/internal/staticanalysis/obfuscation/stringentropy/string_entropy.go
+++ b/internal/staticanalysis/obfuscation/stringentropy/string_entropy.go
@@ -6,7 +6,7 @@ import (
 )
 
 /*
-CalculateEntropy calculates entropy of a string S of characters over an alphabet A, which is defined as
+Calculate finds the entropy of a string S of characters over an alphabet A, which is defined as
 
 	E(S) = - sum(i in A) { (p(i)) * log(p(i)) },
 
@@ -24,7 +24,7 @@ the entropy approaches 0.
 
 Reference: https://link.springer.com/chapter/10.1007/978-3-642-10509-8_19
 */
-func CalculateEntropy(s string, prob map[rune]float64) float64 {
+func Calculate(s string, prob map[rune]float64) float64 {
 	if len(s) == 0 {
 		return 0
 	}
@@ -46,7 +46,7 @@ func CalculateEntropy(s string, prob map[rune]float64) float64 {
 }
 
 /*
-CalculateNormalisedEntropy returns the string entropy normalised by the log of the length of the string.
+CalculateNormalised returns the string entropy normalised by the log of the length of the string.
 This quantity is used because for log(N) is the maximum possible entropy out of all strings with length N,
 where N > 0. Special cases are empty strings (0) and single character strings (1).
 As a formula:
@@ -59,7 +59,7 @@ As a formula:
 */
 // TODO does this make sense when a general probability structure is used?
 // TODO calculate max string entropy for a given set of character counts.
-func CalculateNormalisedEntropy(s string, prob map[rune]float64) float64 {
+func CalculateNormalised(s string, prob map[rune]float64) float64 {
 	length := utf8.RuneCountInString(s)
 	switch length {
 	case 0:
@@ -67,7 +67,7 @@ func CalculateNormalisedEntropy(s string, prob map[rune]float64) float64 {
 	case 1:
 		return 1
 	default:
-		return CalculateEntropy(s, prob) / math.Log(float64(length))
+		return Calculate(s, prob) / math.Log(float64(length))
 	}
 }
 
diff --git a/internal/staticanalysis/obfuscation/stringentropy/string_entropy_test.go b/internal/staticanalysis/obfuscation/stringentropy/string_entropy_test.go
index 334e4913..70585f73 100644
--- a/internal/staticanalysis/obfuscation/stringentropy/string_entropy_test.go
+++ b/internal/staticanalysis/obfuscation/stringentropy/string_entropy_test.go
@@ -24,7 +24,7 @@ func TestStringEntropy(t *testing.T) {
 		{"aaA", -2*2.0/3.0*math.Log(2.0/3.0) - math.Log(1.0/3.0)/3.0},
 	}
 	for index, test := range testCases {
-		actual := CalculateEntropy(test.s, nil)
+		actual := Calculate(test.s, nil)
 		if !utils.FloatEquals(test.expected, actual, tolerance) {
 			t.Errorf("Test case %d failed (str: %s, expected: %f, actual: %f\n",
 				index+1, test.s, test.expected, actual)
@@ -53,7 +53,7 @@ func TestStringEntropyWithFixedProbs(t *testing.T) {
 		{" a \n", -a * math.Log(a)},
 	}
 	for index, test := range testCases {
-		actual := CalculateEntropy(test.s, probs)
+		actual := Calculate(test.s, probs)
 		if !utils.FloatEquals(test.expected, actual, tolerance) {
 			t.Errorf("Test case %d failed (str: %s, expected: %f, actual: %f\n",
 				index+1, test.s, test.expected, actual)
@@ -73,7 +73,7 @@ func TestStringEntropyRatio(t *testing.T) {
 		{"aaA", (-2*2.0*math.Log(2.0/3.0) - math.Log(1.0/3.0)) / (3.0 * math.Log(3))},
 	}
 	for index, test := range testCases {
-		actual := CalculateNormalisedEntropy(test.s, nil)
+		actual := CalculateNormalised(test.s, nil)
 		if !utils.FloatEquals(test.expected, actual, tolerance) {
 			t.Errorf("Test case %d failed (str: %s, expected: %f, actual: %f\n",
 				index+1, test.s, test.expected, actual)
diff --git a/internal/staticanalysis/parsing/analyze.go b/internal/staticanalysis/parsing/analyze.go
index 82039eb5..bee7c58d 100644
--- a/internal/staticanalysis/parsing/analyze.go
+++ b/internal/staticanalysis/parsing/analyze.go
@@ -49,7 +49,7 @@ func processJsData(filename string, fileData singleParseData) *SingleResult {
 	}
 
 	for _, c := range fileData.Comments {
-		result.Comments = append(result.Comments, token.Comment{Value: c.Data})
+		result.Comments = append(result.Comments, token.Comment{Text: c.Data})
 	}
 	return result
 }
@@ -62,11 +62,11 @@ func computeEntropy(parseResults []*SingleResult) {
 	var identifiers []string
 
 	for _, result := range parseResults {
-		for _, sl := range result.StringLiterals {
-			strings = append(strings, sl.Value)
+		for _, str := range result.StringLiterals {
+			strings = append(strings, str.Value)
 		}
-		for _, id := range result.Identifiers {
-			identifiers = append(identifiers, id.Name)
+		for _, ident := range result.Identifiers {
+			identifiers = append(identifiers, ident.Name)
 		}
 	}
 
@@ -74,11 +74,11 @@ func computeEntropy(parseResults []*SingleResult) {
 	identifierCharDistribution := stringentropy.CharacterProbabilities(identifiers)
 
 	for _, result := range parseResults {
-		for _, sl := range result.StringLiterals {
-			sl.Entropy = stringentropy.CalculateEntropy(sl.Value, stringLiteralCharDistribution)
+		for i := range result.StringLiterals {
+			result.StringLiterals[i].ComputeEntropy(stringLiteralCharDistribution)
 		}
-		for _, id := range result.Identifiers {
-			id.Entropy = stringentropy.CalculateEntropy(id.Name, identifierCharDistribution)
+		for i := range result.Identifiers {
+			result.Identifiers[i].ComputeEntropy(identifierCharDistribution)
 		}
 	}
 }
diff --git a/internal/staticanalysis/parsing/analyze_test.go b/internal/staticanalysis/parsing/analyze_test.go
index edadee36..3a3f279a 100644
--- a/internal/staticanalysis/parsing/analyze_test.go
+++ b/internal/staticanalysis/parsing/analyze_test.go
@@ -4,18 +4,27 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/ossf/package-analysis/internal/log"
 	"github.com/ossf/package-analysis/internal/staticanalysis/externalcmd"
+	"github.com/ossf/package-analysis/internal/staticanalysis/obfuscation/stringentropy"
 	"github.com/ossf/package-analysis/internal/staticanalysis/token"
 )
 
-type collectDataTestCase struct {
+type analyzeTestcase struct {
 	name         string
 	jsSource     string
 	expectedData SingleResult
 }
 
-var collectDataTestCases = []collectDataTestCase{
+var literalCharProbs = []map[rune]float64{
+	stringentropy.CharacterProbabilities([]string{"hello"}),
+	stringentropy.CharacterProbabilities([]string{"hello", "apple"}),
+}
+var identifierCharProbs = []map[rune]float64{
+	stringentropy.CharacterProbabilities([]string{"a"}),
+	stringentropy.CharacterProbabilities([]string{"test", "a", "b", "c"}),
+}
+
+var analyzeTestcases = []analyzeTestcase{
 	{
 		name: "simple 1",
 		jsSource: `
@@ -23,10 +32,10 @@ var a = "hello"
 	`,
 		expectedData: SingleResult{
 			Identifiers: []token.Identifier{
-				{Name: "a", Type: token.Variable},
+				{Name: "a", Type: token.Variable, Entropy: stringentropy.Calculate("a", identifierCharProbs[0])},
 			},
 			StringLiterals: []token.String{
-				{Value: "hello", Raw: `"hello"`},
+				{Value: "hello", Raw: `"hello"`, Entropy: stringentropy.Calculate("hello", literalCharProbs[0])},
 			},
 			IntLiterals:   []token.Int{},
 			FloatLiterals: []token.Float{},
@@ -47,14 +56,14 @@ function test(a, b = 2) {
 	`,
 		expectedData: SingleResult{
 			Identifiers: []token.Identifier{
-				{Name: "test", Type: token.Function},
-				{Name: "a", Type: token.Parameter},
-				{Name: "b", Type: token.Parameter},
-				{Name: "c", Type: token.Variable},
+				{Name: "test", Type: token.Function, Entropy: stringentropy.Calculate("test", identifierCharProbs[1])},
+				{Name: "a", Type: token.Parameter, Entropy: stringentropy.Calculate("a", identifierCharProbs[1])},
+				{Name: "b", Type: token.Parameter, Entropy: stringentropy.Calculate("b", identifierCharProbs[1])},
+				{Name: "c", Type: token.Variable, Entropy: stringentropy.Calculate("c", identifierCharProbs[1])},
 			},
 			StringLiterals: []token.String{
-				{Value: "hello", Raw: `"hello"`},
-				{Value: "apple", Raw: `"apple"`},
+				{Value: "hello", Raw: `"hello"`, Entropy: stringentropy.Calculate("hello", literalCharProbs[1])},
+				{Value: "apple", Raw: `"apple"`, Entropy: stringentropy.Calculate("apple", literalCharProbs[1])},
 			},
 			IntLiterals: []token.Int{
 				{Value: 2, Raw: "2"},
@@ -66,17 +75,13 @@ function test(a, b = 2) {
 	},
 }
 
-func init() {
-	log.Initialize("")
-}
-
-func TestCollectData(t *testing.T) {
+func TestAnalyze(t *testing.T) {
 	parserConfig, err := InitParser(t.TempDir())
 	if err != nil {
 		t.Fatalf("failed to init parser: %v", err)
 	}
 
-	for _, tt := range collectDataTestCases {
+	for _, tt := range analyzeTestcases {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := Analyze(parserConfig, externalcmd.StringInput(tt.jsSource), false)
 			if err != nil {
diff --git a/internal/staticanalysis/parsing/result.go b/internal/staticanalysis/parsing/result.go
index 6781041b..ce06bdc2 100644
--- a/internal/staticanalysis/parsing/result.go
+++ b/internal/staticanalysis/parsing/result.go
@@ -17,6 +17,7 @@ type SingleResult struct {
 	IntLiterals    []token.Int        `json:"int_literals"`
 	FloatLiterals  []token.Float      `json:"float_literals"`
 	Comments       []token.Comment    `json:"comments"`
+	// future: external function calls / references (e.g. eval)
 }
 
 func (r SingleResult) String() string {
diff --git a/internal/staticanalysis/token/tokens.go b/internal/staticanalysis/token/tokens.go
index b10a9a7a..2c767f75 100644
--- a/internal/staticanalysis/token/tokens.go
+++ b/internal/staticanalysis/token/tokens.go
@@ -1,27 +1,41 @@
 package token
 
+import "github.com/ossf/package-analysis/internal/staticanalysis/obfuscation/stringentropy"
+
 type Identifier struct {
-	Name    string
-	Type    IdentifierType
-	Entropy float64
+	Name    string         `json:"name"`
+	Type    IdentifierType `json:"type"`
+	Entropy float64        `json:"entropy"`
 }
 
-type Comment struct {
-	Value string
+// ComputeEntropy computes the entropy of this identifier under the given
+// character distribution and sets its Entropy field to the result value
+func (i *Identifier) ComputeEntropy(probs map[rune]float64) {
+	i.Entropy = stringentropy.Calculate(i.Name, probs)
 }
 
 type String struct {
-	Value   string
-	Raw     string
-	Entropy float64
+	Value   string  `json:"value"`
+	Raw     string  `json:"raw"`
+	Entropy float64 `json:"entropy"`
+}
+
+// ComputeEntropy computes the entropy of this string literal under the given
+// character distribution and sets its Entropy field to the result value
+func (s *String) ComputeEntropy(probs map[rune]float64) {
+	s.Entropy = stringentropy.Calculate(s.Value, probs)
 }
 
 type Int struct {
-	Value int64
-	Raw   string
+	Value int64  `json:"value"`
+	Raw   string `json:"raw"`
 }
 
 type Float struct {
-	Value float64
-	Raw   string
+	Value float64 `json:"value"`
+	Raw   string  `json:"raw"`
+}
+
+type Comment struct {
+	Text string `json:"text"`
 }
diff --git a/internal/utils/valuecounts/value_counts.go b/internal/utils/valuecounts/value_counts.go
index a1d20053..3d035fcd 100644
--- a/internal/utils/valuecounts/value_counts.go
+++ b/internal/utils/valuecounts/value_counts.go
@@ -12,7 +12,9 @@ import (
 // ValueCounts stores unordered counts of integer values as a map
 // from value (int) to count (int). It can be serialized to JSON
 // as an array of (value, count) pairs.
-type ValueCounts map[int]int
+type ValueCounts struct {
+	data map[int]int
+}
 
 // Aside: I know using 'value' to refer to map keys is not great, but the
 // other names I came up with like 'size' and 'length' were all usage-specific.
@@ -23,23 +25,41 @@ type Pair struct {
 	Count int `json:"count"`
 }
 
+// New creates a new empty ValueCounts object
 func New() ValueCounts {
-	return ValueCounts{}
+	return ValueCounts{
+		data: map[int]int{},
+	}
+}
+
+// FromMap creates a new ValueCounts object and initialises its counts from the given map
+func FromMap(data map[int]int) ValueCounts {
+	vc := New()
+	for value, count := range data {
+		vc.data[value] = count
+	}
+	return vc
 }
 
 // Count produces a new ValueCounts by counting repetitions of values in the input data
 func Count(data []int) ValueCounts {
 	vc := New()
 	for _, value := range data {
-		vc[value] += 1
+		vc.data[value] += 1
 	}
 	return vc
 }
 
+// Len returns the number of values stored by this ValueCounts.
+// It is equivalent to the length of the slice returned by ToPairs()
+func (vc ValueCounts) Len() int {
+	return len(vc.data)
+}
+
 // String() returns a string representation of this ValueCounts
 // with values sorted in ascending order
 func (vc ValueCounts) String() string {
-	pairStrings := make([]string, 0, len(vc))
+	pairStrings := make([]string, 0, len(vc.data))
 	for _, pair := range vc.ToPairs() {
 		pairStrings = append(pairStrings, fmt.Sprintf("%d: %d", pair.Value, pair.Count))
 	}
@@ -50,14 +70,14 @@ func (vc ValueCounts) String() string {
 // The values are sorted in increasing order so that the output is deterministic.
 // If this ValueCounts is empty, returns an empty slice.
 func (vc ValueCounts) ToPairs() []Pair {
-	pairs := make([]Pair, 0, len(vc))
+	pairs := make([]Pair, 0, len(vc.data))
 
 	// sort the values so that the output is in a deterministic order
-	values := maps.Keys(vc)
+	values := maps.Keys(vc.data)
 	slices.Sort(values)
 
 	for _, value := range values {
-		count := vc[value]
+		count := vc.data[value]
 		pairs = append(pairs, Pair{Value: value, Count: count})
 	}
 
@@ -71,10 +91,10 @@ func FromPairs(pairs []Pair) (ValueCounts, error) {
 	valueCounts := New()
 
 	for _, item := range pairs {
-		if _, seen := valueCounts[item.Value]; seen {
-			return nil, fmt.Errorf("value occurs multiple times: %d", item.Value)
+		if _, seen := valueCounts.data[item.Value]; seen {
+			return ValueCounts{}, fmt.Errorf("value occurs multiple times: %d", item.Value)
 		}
-		valueCounts[item.Value] = item.Count
+		valueCounts.data[item.Value] = item.Count
 	}
 
 	return valueCounts, nil
diff --git a/internal/utils/valuecounts/value_counts_test.go b/internal/utils/valuecounts/value_counts_test.go
index e1e1f66b..5bcab0e5 100644
--- a/internal/utils/valuecounts/value_counts_test.go
+++ b/internal/utils/valuecounts/value_counts_test.go
@@ -7,7 +7,7 @@ import (
 	"github.com/ossf/package-analysis/internal/utils"
 )
 
-func TestCountData_ToValueCountPairs(t *testing.T) {
+func TestValueCounts_ToValueCountPairs(t *testing.T) {
 	tests := []struct {
 		name string
 		vc   ValueCounts
@@ -15,22 +15,22 @@ func TestCountData_ToValueCountPairs(t *testing.T) {
 	}{
 		{
 			"nil",
-			nil,
+			New(),
 			[]Pair{},
 		},
 		{
 			"empty",
-			ValueCounts{},
+			New(),
 			[]Pair{},
 		},
 		{
 			"single item",
-			ValueCounts{0: 1},
+			FromMap(map[int]int{0: 1}),
 			[]Pair{{0, 1}},
 		},
 		{
 			"multiple items",
-			ValueCounts{0: 1, 1: 2, 2: 3},
+			FromMap(map[int]int{0: 1, 1: 2, 2: 3}),
 			[]Pair{{0, 1}, {1, 2}, {2, 3}},
 		},
 	}
@@ -53,31 +53,31 @@ func TestFromValueCountPairs(t *testing.T) {
 		{
 			"nil",
 			nil,
-			ValueCounts{},
+			New(),
 			false,
 		},
 		{
 			"empty non-nil",
 			[]Pair{},
-			ValueCounts{},
+			New(),
 			false,
 		},
 		{
 			"single item",
 			[]Pair{{0, 1}},
-			ValueCounts{0: 1},
+			FromMap(map[int]int{0: 1}),
 			false,
 		},
 		{
 			"multiple items",
 			[]Pair{{0, 1}, {1, 2}},
-			ValueCounts{0: 1, 1: 2},
+			FromMap(map[int]int{0: 1, 1: 2}),
 			false,
 		},
 		{
 			"repeated items",
 			[]Pair{{0, 1}, {0, 1}},
-			nil,
+			New(),
 			true,
 		},
 	}
@@ -88,8 +88,11 @@ func TestFromValueCountPairs(t *testing.T) {
 				t.Errorf("FromPairs() error = %v, wantErr %v", err, tt.wantErr)
 				return
 			}
+			if err != nil {
+				return
+			}
 			if !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("FromPairs() got = %v, want %v", got, tt.want)
+				t.Errorf("FromPairs() got %v, want %v", got, tt.want)
 			}
 		})
 	}
@@ -104,25 +107,25 @@ func TestCountData_MarshalJSON(t *testing.T) {
 	}{
 		{
 			"nil",
-			nil,
+			ValueCounts{},
 			"[]",
 			false,
 		},
 		{
 			"empty",
-			ValueCounts{},
+			New(),
 			"[]",
 			false,
 		},
 		{
 			"single item",
-			ValueCounts{0: 1},
+			FromMap(map[int]int{0: 1}),
 			`[ {"value": 0, "count": 1} ]`,
 			false,
 		},
 		{
 			"multiple items",
-			ValueCounts{0: 1, 1: 2, 2: 3},
+			FromMap(map[int]int{0: 1, 1: 2, 2: 3}),
 			`[ {"value":0, "count": 1}, {"value": 1, "count": 2}, {"value": 2, "count": 3} ]`,
 			false,
 		},
@@ -139,7 +142,7 @@ func TestCountData_MarshalJSON(t *testing.T) {
 			if equal, err := utils.JSONEquals(gotBytes, []byte(tt.want)); err != nil {
 				t.Errorf("MarshalJSON() error decoding JSON: %v", err)
 			} else if !equal {
-				t.Errorf("MarshalJSON() got = %s, want %s", got, tt.want)
+				t.Errorf("MarshalJSON() got %s, want %s", got, tt.want)
 			}
 		})
 	}
@@ -155,25 +158,25 @@ func TestCountData_UnmarshalJSON(t *testing.T) {
 		{
 			"null",
 			"null",
-			ValueCounts{},
+			New(),
 			false,
 		},
 		{
 			"empty",
 			"[]",
-			ValueCounts{},
+			New(),
 			false,
 		},
 		{
 			"single item",
 			`[{"value": 0, "count": 1}]`,
-			ValueCounts{0: 1},
+			FromMap(map[int]int{0: 1}),
 			false,
 		},
 		{
 			"multiple items",
 			`[{"value":0,"count":1},{"value":1,"count":2},{"value":2,"count":3}]`,
-			ValueCounts{0: 1, 1: 2, 2: 3},
+			FromMap(map[int]int{0: 1, 1: 2, 2: 3}),
 			false,
 		},
 		// TODO: Add test cases.
@@ -192,3 +195,72 @@ func TestCountData_UnmarshalJSON(t *testing.T) {
 		})
 	}
 }
+
+func TestFromMap(t *testing.T) {
+	tests := []struct {
+		name string
+		data map[int]int
+		want ValueCounts
+	}{
+		{
+			"nil",
+			nil,
+			New(),
+		},
+		{
+			"empty",
+			map[int]int{},
+			New(),
+		},
+		{
+			"basic",
+			map[int]int{-1: 210, 10: 102, 0: 34, 3: 0},
+			ValueCounts{
+				data: map[int]int{-1: 210, 0: 34, 3: 0, 10: 102},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := FromMap(tt.data); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("FromMap() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestCount(t *testing.T) {
+	tests := []struct {
+		name string
+		data []int
+		want ValueCounts
+	}{
+		{
+			"nil",
+			nil,
+			New(),
+		},
+		{
+			"empty",
+			[]int{},
+			New(),
+		},
+		{
+			"single",
+			[]int{1},
+			FromMap(map[int]int{1: 1}),
+		},
+		{
+			"multiple",
+			[]int{1, 2, 3, 4, 3, 2, 1, 2},
+			FromMap(map[int]int{1: 2, 2: 3, 3: 2, 4: 1}),
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := Count(tt.data); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("Count() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/scripts/format-static-analysis-json.py b/scripts/format-static-analysis-json.py
index be7d5267..877b9f9d 100755
--- a/scripts/format-static-analysis-json.py
+++ b/scripts/format-static-analysis-json.py
@@ -14,50 +14,30 @@
 
 # Changes JSON structs that are formatted like:
 #     {
-#         "Name": "...",
-#         "Type": "..."
+#         "key1": ...,
+#         "key2": ...
 #     }
 # into ones like
-#     { "Name": "...", "Type": "..." }
-name_type_substitution = (
-    re.compile('{$\\n^\\s*"Name": ?"(.*)",$\\n^\\s*"Type": ?"(.*)"$\\n^\\s*}', re.MULTILINE),
-    '{ "Name": "\\1", "Type": "\\2" }'
+#     { "key1": ..., "key2": ... }
+struct_pair_substitution = (
+    re.compile('{$\\n^\\s*"(.+)": ?(.*),$\\n^\\s*"(.+)": ?(.*)$\\n^\\s*}', re.MULTILINE),
+    '{ "\\1": \\2, "\\3": \\4 }'
 )
 
 # Changes JSON structs that are formatted like:
 #     {
-#         "Value": ..., (may not be a string)
-#         "Raw": "..."
+#         "key1": ...,
+#         "key2": ...,
+#         "key3": ...
 #     }
 # into ones like
-#     { "Value": ..., "Raw": "..." }
-value_raw_substitution = (
-    re.compile('{$\\n^\\s*"Value": ?(.*),$\\n^\\s*"Raw": ?"(.*)"$\\n^\\s*}', re.MULTILINE),
-    '{ "Value": \\1, "Raw": "\\2" }'
+#     { "key1": ..., "key2": ..., "key3": ... }
+struct_triple_substitution = (
+    re.compile('{$\\n^\\s*"(.+)": ?(.*),$\\n^\\s*"(.+)": ?(.*),$\\n^\\s*"(.+)": ?(.*)$\\n^\\s*}', re.MULTILINE),
+    '{ "\\1": \\2, "\\3": \\4, "\\5": \\6 }'
 )
 
-# Changes JSON arrays that are formatted like:
-#     "Quartiles": [
-#         0.1762,
-#         1.3075,
-#         1.4424,
-#         1.4766,
-#         1.6646
-#     ]
-# into ones like
-#     "Quartiles": [ 0.1762, 1.3075, 1.4424, 1.4766, 1.6646 ]
-quartile_substitution = (
-    re.compile('"Quartiles": \\[$\\n'
-               '^\\s*(\\d+\\.?\\d*),$\\n'
-               '^\\s*(\\d+\\.?\\d*),$\\n'
-               '^\\s*(\\d+\\.?\\d*),$\\n'
-               '^\\s*(\\d+\\.?\\d*),$\\n'
-               '^\\s*(\\d+\\.?\\d*)$\\n'
-               '^\\s*]', re.MULTILINE),
-    '"Quartiles": [ \\1, \\2, \\3, \\4, \\5 ]'
-)
-
-all_substitutions = (name_type_substitution, value_raw_substitution, quartile_substitution)
+all_substitutions = (struct_pair_substitution, struct_triple_substitution)
 
 
 # Pretty prints a JSON object with newlines and indentation, then applies