(core) adjust alloyList datasets to dataList

amkrajewski · Feb 28, 2024 · 0c9b88a · 0c9b88a
1 parent 901a3b7
commit 0c9b88a
Show file tree

Hide file tree

Showing 10 changed files with 14 additions and 14 deletions.
diff --git a/benchmarks/docs.nim b/benchmarks/docs.nim
@@ -1,7 +1,7 @@
 ## # Benchmarking
 ## The key performance advantage of nimCSO comes from how it handles checking how many datapoints would have to be removed if a given set of elements were removed. You can quickly compare performance (speed and memory usage) of nimCSO to other approaches based on (a) native ``Python`` sets and (b) well-optimized ``NumPy`` implementation. 
 ## 
-## In the ``benchmarks`` directory, you will find 3 scripts, which will automatically ingest the example dataset we ship (``alloyList.txt``) with 2,150 data points and try to remove all entries containing elements from a fixed set of 5 ("Fe", "Cr", "Ni", "Co", "Al", "Ti"). This is repeated thousands of times to get a good average, but should not take more than several seconds on a modern machine.
+## In the ``benchmarks`` directory, you will find 3 scripts, which will automatically ingest the example dataset we ship (``dataList.txt``) with 2,150 data points and try to remove all entries containing elements from a fixed set of 5 ("Fe", "Cr", "Ni", "Co", "Al", "Ti"). This is repeated thousands of times to get a good average, but should not take more than several seconds on a modern machine.
 ## 
 ## - ``nimcso.nim`` - The ``nimCSO`` implementation based around ``BitArray``s. From the root of the project, you can run it with a simple:
 ##   ```cmd

diff --git a/benchmarks/nimcso.nim b/benchmarks/nimcso.nim
@@ -27,7 +27,7 @@ block:
     let elapsed = (epochTime() - t0) * 10
     let elapsedStr = elapsed.formatFloat(format = ffDecimal, precision = 1)
     styledEcho "CPU Time [per dataset evaluation] ", styleBright, fgGreen, elapsedStr, "μs", resetStyle
-    let elapsedPC = elapsed / nimcso.alloyN * 1000
+    let elapsedPC = elapsed / nimcso.dataN * 1000
     let elapsedStrPC = elapsedPC.formatFloat(format = ffDecimal, precision = 1)
     styledEcho "CPU Time [per comparison] ", styleBright, fgGreen, elapsedStrPC, "ns\n", resetStyle
 
@@ -49,6 +49,6 @@ block:
     let elapsed = (epochTime() - t0) * 10
     let elapsedStr = elapsed.formatFloat(format = ffDecimal, precision = 3)
     styledEcho "CPU Time [per dataset evaluation] ", styleBright, fgGreen, elapsedStr, "μs", resetStyle
-    let elapsedPC = elapsed / nimcso.alloyN * 1000
+    let elapsedPC = elapsed / nimcso.dataN * 1000
     let elapsedStrPC = elapsedPC.formatFloat(format = ffDecimal, precision = 3)
     styledEcho "CPU Time [per comparison] ", styleBright, fgGreen, elapsedStrPC, "ns", resetStyle
diff --git a/benchmarks/pythonNative.py b/benchmarks/pythonNative.py
@@ -1,9 +1,9 @@
 import time
 import sys
 
-with open('alloyList.txt', 'r') as f:
-    alloyList = f.read().splitlines()
-elementalList = [s.split(',') for s in alloyList]
+with open('dataList.txt', 'r') as f:
+    dataList = f.read().splitlines()
+elementalList = [s.split(',') for s in dataList]
 
 memoryUsed = sys.getsizeof(elementalList) + \
     sum(sys.getsizeof(row) for row in elementalList) + \

diff --git a/benchmarks/pythonNumPy.py b/benchmarks/pythonNumPy.py
@@ -2,9 +2,9 @@
 import sys
 import numpy as np
 
-with open('alloyList.txt', 'r') as f:
-    alloyList = f.read().splitlines()
-elementalList = [s.split(',') for s in alloyList]
+with open('dataList.txt', 'r') as f:
+    dataList = f.read().splitlines()
+elementalList = [s.split(',') for s in dataList]
 
 assert len(elementalList)==2150
 elementOrder = ["Fe", "Cr", "Ni", "Co", "Al", "Ti", "Nb", "Cu", "Mo", "Ta", "Zr", "V", "Hf", "W", "Mn", "Si", "Re", "B", "Ru", "C", "Sn", "Mg", "Zn", "Li", "O", "Y", "Pd", "N", "Ca", "Ir", "Sc", "Ge", "Be", "Ag", "Nd", "S", "Ga"]

diff --git a/config.yaml b/config.yaml
@@ -1,6 +1,6 @@
 taskName: Default Task
-taskDescription: Optimization of a set of indivisual chemical elements for a large set ot high entropy alloys
-datasetPath: alloyList.txt
+taskDescription: Optimization of a set of indivisual chemical elements for a large set ot High Entropy Alloys from ULTERA database
+datasetPath: dataList.txt
 
 elementOrder:
 - Fe

diff --git a/config_rhea.yaml b/config_rhea.yaml
@@ -1,6 +1,6 @@
 taskName: RCCA_Senkov2018
 taskDescription: RCCA Palette in Senkov 2018 Review
-datasetPath: alloyList.txt
+datasetPath: dataList.txt
 
 elementOrder:
 - Mo

diff --git a/alloyList.txt → dataList.txt b/alloyList.txt → dataList.txt
diff --git a/tests/config.yaml b/tests/config.yaml
@@ -1,6 +1,6 @@
 taskName: Test1
 taskDescription: Reduced Palette in Senkov 2018 Review
-datasetPath: tests/testAlloyList1.txt
+datasetPath: tests/testDataList.txt
 
 elementOrder:
 - Mo

diff --git a/tests/docs.nim b/tests/docs.nim
@@ -7,6 +7,6 @@
 ## nim c -f -r -d:release -d:configPath=tests/config.yaml tests/runAll
 ## ```
 ## 
-## which, as one can see, uses the ``test/config.yaml`` file to configure the tests for a smaller set of elements (to reduce runtime) and a custom alloy data file ``tests/testAlloyList1.txt``,
+## which, as one can see, uses the ``test/config.yaml`` file to configure the tests for a smaller set of elements (to reduce runtime) and a custom data file ``tests/testDataList.txt``,
 ## which includes some elements like unobtanium (``Ub``) to verify filtering works as expected.
 ## 
diff --git a/tests/testAlloyList1.txt → tests/testDataList.txt b/tests/testAlloyList1.txt → tests/testDataList.txt