Merge pull request #20 from data-intuitive/develop

Bump LuciusCore dependency to 4.1.2
data-intuitive · Feb 10, 2023 · 755d1c9 · 755d1c9
2 parents 46300ec + 3d61cb5
commit 755d1c9
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,7 @@ There's still a lot of work to be done on this (version numbers don't reflect ev
 | 5.1.1     | 4.1.1      | 0.11.1          | 2.4.7 |
 | 5.1.2     | 4.1.1      | 0.11.1          | 2.4.7 |
 | 5.1.3     | 4.1.1      | 0.11.1          | 2.4.7 |
+| 5.1.4     | 4.1.2      | 0.11.1          | 2.4.7 |
 
 # API Documentation
 

diff --git a/build.sbt b/build.sbt
@@ -2,15 +2,15 @@ name := "LuciusAPI"
 
 import aether.AetherKeys._
 
-ThisBuild / version := "5.1.3"
+ThisBuild / version := "5.1.4"
 
 scalaVersion := "2.11.12"
 
 resolvers += Resolver.githubPackages("data-intuitive")
 resolvers += "Artifactory" at "https://sparkjobserver.jfrog.io/artifactory/jobserver/"
 
 libraryDependencies ++= Seq(
-  "com.data-intuitive" %% "luciuscore"        % "4.1.1",
+  "com.data-intuitive" %% "luciuscore"        % "4.1.2",
   "spark.jobserver"    %% "job-server-api"    % "0.11.1"     % "provided",
   "spark.jobserver"    %% "job-server-extras" % "0.11.1"     % "provided",
   "org.scalactic"      %% "scalactic"         % "3.0.7"      % "test"    ,

diff --git a/src/main/scala/com/dataintuitive/luciusapi/Common.scala b/src/main/scala/com/dataintuitive/luciusapi/Common.scala
@@ -312,6 +312,16 @@ object Common extends Serializable {
         .getOrElse(Map.empty)
     }
 
+    /**
+     * In order to make sure Lucius handles heavy load and slow response times,
+     * it's important to dev/test with larger datasets than a small dev dataset allows.
+     * This paramter multiplies input data this many times so the apparant dataset is larger.
+     */
+    def paramMultiplicity(config: Config):Int = {
+      Try(config.getString("multiplicity").toInt)
+        .getOrElse(1)
+    }
+
   }
 
 }
diff --git a/src/main/scala/com/dataintuitive/luciusapi/initialize.scala b/src/main/scala/com/dataintuitive/luciusapi/initialize.scala
@@ -36,7 +36,8 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
                      dbVersion: String,
                      partitions: Int,
                      storageLevel: StorageLevel,
-                     geneDataTypes: Map[String, String])
+                     geneDataTypes: Map[String, String],
+                     multiplicity: Int)
   type JobOutput = collection.Map[String, Any]
 
   override def validate(sparkSession: SparkSession,
@@ -49,8 +50,9 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
     val partitions = paramPartitions(config)
     val storageLevel = paramStorageLevel(config)
     val geneDataTypes = paramGeneDataTypes(config)
+    val multiplicity = paramMultiplicity(config)
 
-    withGood(db, genes) { JobData(_, _, dbVersion, partitions, storageLevel, geneDataTypes) }
+    withGood(db, genes) { JobData(_, _, dbVersion, partitions, storageLevel, geneDataTypes, multiplicity) }
 
   }
 
@@ -94,10 +96,6 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
 
     val thisVersion = state.state.filter(_.version.major.toString == majorVersion)
 
-    println(outputs)
-    println(state)
-    println(thisVersion)
-
     val parquets = thisVersion.map(_.obj.toString).map(
       sparkSession.read
         .schema(Encoders.product[Perturbation].schema) // This assists parquet file reading so that it is more independent of our current Perturbation format.
@@ -111,7 +109,9 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
         case (parquet, _)  => parquet.as[Perturbation]
       }
     }
-    val db = dbRaws.reduce(_ union _).repartition(data.partitions)
+    val db_single = dbRaws.reduce(_ union _)
+
+    val db = (1 to data.multiplicity).map{ i => db_single}.reduce(_ union _).repartition(data.partitions)
 
     val dbNamedDataset = NamedDataSet[Perturbation](db, forceComputation = true, storageLevel = data.storageLevel)