Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
clairemcginty committed Sep 19, 2024
1 parent 23bcbd9 commit eb205db
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 188 deletions.
106 changes: 2 additions & 104 deletions jmh/src/test/scala/magnolify/jmh/MagnolifyBench.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,13 @@

package magnolify.jmh

import magnolify.parquet.ParquetType.WriteSupport
import magnolify.parquet.{MagnolifyParquetProperties, ParquetType}

import java.util.concurrent.TimeUnit

import magnolify.scalacheck.auto._
import magnolify.test.Simple._
import org.apache.hadoop.conf.Configuration
import org.scalacheck._
import org.openjdk.jmh.annotations._

import scala.jdk.CollectionConverters._

object MagnolifyBench {
val seed: rng.Seed = rng.Seed(0)
val prms: Gen.Parameters = Gen.Parameters.default
Expand Down Expand Up @@ -92,103 +87,6 @@ class AvroBench {
@Benchmark def avroSchema: Schema = AvroType[Nested].schema
}

@State(Scope.Benchmark)
class ParquetReadState(pt: ParquetType[Nested]) {
import org.apache.parquet.io._
import org.apache.parquet.column.impl.ColumnWriteStoreV1
import org.apache.parquet.column.ParquetProperties
import org.apache.parquet.hadoop.api.InitContext

var reader: RecordReader[Nested] = null

@Setup(Level.Invocation)
def setup(): Unit = {
// Write page
val columnIO = new ColumnIOFactory(true).getColumnIO(pt.schema)
val memPageStore = new ParquetInMemoryPageStore(1)
val columns = new ColumnWriteStoreV1(
pt.schema,
memPageStore,
ParquetProperties.builder.withPageSize(800).withDictionaryEncoding(false).build
)
val writeSupport = pt.writeSupport
val recordWriter = columnIO.getRecordWriter(columns)
writeSupport.prepareForWrite(recordWriter)
writeSupport.write(MagnolifyBench.nested)
recordWriter.flush()
columns.flush()

// Read and convert page
val conf = new Configuration()
val readSupport = pt.readSupport
reader = columnIO.getRecordReader(
memPageStore,
readSupport.prepareForRead(
conf,
new java.util.HashMap,
pt.schema,
readSupport.init(new InitContext(conf, new java.util.HashMap, pt.schema)))
)
}
}

@State(Scope.Benchmark)
class ParquetWriteState(pt: ParquetType[Nested]) {
import org.apache.parquet.io._
import org.apache.parquet.column.impl.ColumnWriteStoreV1
import org.apache.parquet.column.ParquetProperties

var writer: WriteSupport[Nested] = null

@Setup(Level.Invocation)
def setup(): Unit = {
val columnIO = new ColumnIOFactory(true).getColumnIO(pt.schema)
val memPageStore = new ParquetInMemoryPageStore(1)
val columns = new ColumnWriteStoreV1(
pt.schema,
memPageStore,
ParquetProperties.builder.withPageSize(800).withDictionaryEncoding(false).build
)
val writeSupport = pt.writeSupport
val recordWriter = columnIO.getRecordWriter(columns)
writeSupport.prepareForWrite(recordWriter)
this.writer = writeSupport
}
}

object ParquetStates {
def confWithGroupedArraysProp(propValue: Boolean): Configuration = {
val conf = new Configuration()
conf.setBoolean(MagnolifyParquetProperties.WriteGroupedArrays, propValue)
conf
}
class DefaultParquetReadState extends ParquetReadState(ParquetType[Nested](confWithGroupedArraysProp(false)))
class DefaultParquetWriteState extends ParquetWriteState(ParquetType[Nested](confWithGroupedArraysProp(false)))

class ParquetAvroCompatReadState extends ParquetReadState(ParquetType[Nested](confWithGroupedArraysProp(true)))
class ParquetAvroCompatWriteState extends ParquetWriteState(ParquetType[Nested](confWithGroupedArraysProp(true)))
}

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
class ParquetBench {
import MagnolifyBench._

@Benchmark def parquetWrite(state: ParquetStates.DefaultParquetWriteState): Unit = state.writer.write(nested)
@Benchmark def parquetRead(state: ParquetStates.DefaultParquetReadState): Nested = state.reader.read()
}

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
class ParquetAvroCompatBench {
import MagnolifyBench._

@Benchmark def parquetWrite(state: ParquetStates.ParquetAvroCompatWriteState): Unit = state.writer.write(nested)
@Benchmark def parquetRead(state: ParquetStates.ParquetAvroCompatReadState): Nested = state.reader.read()
}

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
Expand Down Expand Up @@ -259,7 +157,7 @@ class ExampleBench {
private val exampleNested = implicitly[Arbitrary[ExampleNested]].arbitrary(prms, seed).get
private val example = exampleType.to(exampleNested).build()
@Benchmark def exampleTo: Example.Builder = exampleType.to(exampleNested)
@Benchmark def exampleFrom: ExampleNested = exampleType.from(example.getFeatures.getFeatureMap.asScala.toMap)
@Benchmark def exampleFrom: ExampleNested = exampleType.from(example)
}

// Collections are not supported
Expand Down
77 changes: 0 additions & 77 deletions jmh/src/test/scala/magnolify/jmh/ParquetInMemoryPageStore.scala

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,4 @@ object MagnolifyParquetProperties {

val ReadTypeKey = "parquet.type.read.type"
val WriteTypeKey = "parquet.type.write.type"

// Hash any Configuration values that might affect schema creation to use as part of Schema cache key
private[parquet] def hashValues(conf: Configuration): Int =
Option(conf.get(WriteGroupedArrays))
.map(_.toBoolean)
.getOrElse(WriteGroupedArraysDefault)
.hashCode()
}

0 comments on commit eb205db

Please sign in to comment.