Skip to content

Commit

Permalink
Merge pull request #20 from data-intuitive/develop
Browse files Browse the repository at this point in the history
Bump LuciusCore dependency to 4.1.2
  • Loading branch information
Grifs authored Feb 10, 2023
2 parents 46300ec + 3d61cb5 commit 755d1c9
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 9 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ There's still a lot of work to be done on this (version numbers don't reflect ev
| 5.1.1 | 4.1.1 | 0.11.1 | 2.4.7 |
| 5.1.2 | 4.1.1 | 0.11.1 | 2.4.7 |
| 5.1.3 | 4.1.1 | 0.11.1 | 2.4.7 |
| 5.1.4 | 4.1.2 | 0.11.1 | 2.4.7 |

# API Documentation

Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ name := "LuciusAPI"

import aether.AetherKeys._

ThisBuild / version := "5.1.3"
ThisBuild / version := "5.1.4"

scalaVersion := "2.11.12"

resolvers += Resolver.githubPackages("data-intuitive")
resolvers += "Artifactory" at "https://sparkjobserver.jfrog.io/artifactory/jobserver/"

libraryDependencies ++= Seq(
"com.data-intuitive" %% "luciuscore" % "4.1.1",
"com.data-intuitive" %% "luciuscore" % "4.1.2",
"spark.jobserver" %% "job-server-api" % "0.11.1" % "provided",
"spark.jobserver" %% "job-server-extras" % "0.11.1" % "provided",
"org.scalactic" %% "scalactic" % "3.0.7" % "test" ,
Expand Down
10 changes: 10 additions & 0 deletions src/main/scala/com/dataintuitive/luciusapi/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,16 @@ object Common extends Serializable {
.getOrElse(Map.empty)
}

/**
* In order to make sure Lucius handles heavy load and slow response times,
* it's important to dev/test with larger datasets than a small dev dataset allows.
* This paramter multiplies input data this many times so the apparant dataset is larger.
*/
def paramMultiplicity(config: Config):Int = {
Try(config.getString("multiplicity").toInt)
.getOrElse(1)
}

}

}
14 changes: 7 additions & 7 deletions src/main/scala/com/dataintuitive/luciusapi/initialize.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
dbVersion: String,
partitions: Int,
storageLevel: StorageLevel,
geneDataTypes: Map[String, String])
geneDataTypes: Map[String, String],
multiplicity: Int)
type JobOutput = collection.Map[String, Any]

override def validate(sparkSession: SparkSession,
Expand All @@ -49,8 +50,9 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
val partitions = paramPartitions(config)
val storageLevel = paramStorageLevel(config)
val geneDataTypes = paramGeneDataTypes(config)
val multiplicity = paramMultiplicity(config)

withGood(db, genes) { JobData(_, _, dbVersion, partitions, storageLevel, geneDataTypes) }
withGood(db, genes) { JobData(_, _, dbVersion, partitions, storageLevel, geneDataTypes, multiplicity) }

}

Expand Down Expand Up @@ -94,10 +96,6 @@ object initialize extends SparkSessionJob with NamedObjectSupport {

val thisVersion = state.state.filter(_.version.major.toString == majorVersion)

println(outputs)
println(state)
println(thisVersion)

val parquets = thisVersion.map(_.obj.toString).map(
sparkSession.read
.schema(Encoders.product[Perturbation].schema) // This assists parquet file reading so that it is more independent of our current Perturbation format.
Expand All @@ -111,7 +109,9 @@ object initialize extends SparkSessionJob with NamedObjectSupport {
case (parquet, _) => parquet.as[Perturbation]
}
}
val db = dbRaws.reduce(_ union _).repartition(data.partitions)
val db_single = dbRaws.reduce(_ union _)

val db = (1 to data.multiplicity).map{ i => db_single}.reduce(_ union _).repartition(data.partitions)

val dbNamedDataset = NamedDataSet[Perturbation](db, forceComputation = true, storageLevel = data.storageLevel)

Expand Down

0 comments on commit 755d1c9

Please sign in to comment.