Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GroupBy variable converter in Jupyter #663

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ else cast()

public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>

public fun <T, G> GroupBy<*, *>.cast(): GroupBy<T, G> = this as GroupBy<T, G>

public inline fun <reified T> AnyRow.cast(verify: Boolean = true): DataRow<T> = df().cast<T>(verify)[0]

public fun <T> AnyCol.cast(): DataColumn<T> = this as DataColumn<T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.Marker
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
import org.jetbrains.kotlinx.dataframe.codeGen.ProvidedCodeConverter
import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGeneratorImpl
import org.jetbrains.kotlinx.dataframe.impl.codeGen.FullyQualifiedNames
import org.jetbrains.kotlinx.dataframe.impl.codeGen.ShortNames
Expand All @@ -22,7 +23,7 @@ public enum class InterfaceGenerationMode {
None;
}

public data class CodeGenResult(val code: CodeWithConverter, val newMarkers: List<Marker>)
public data class CodeGenResult(val code: CodeWithConverter<ProvidedCodeConverter>, val newMarkers: List<Marker>)

public interface CodeGenerator : ExtensionsCodeGenerator {

Expand All @@ -43,7 +44,7 @@ public interface CodeGenerator : ExtensionsCodeGenerator {
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
readDfMethod: DefaultReadDfMethod? = null,
): CodeWithConverter
): CodeWithConverter<ProvidedCodeConverter>

public companion object {
public fun create(useFqNames: Boolean = true): CodeGenerator {
Expand All @@ -61,7 +62,7 @@ internal fun CodeGenerator.generate(
markerClass: KClass<*>,
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
): CodeWithConverter = generate(
): CodeWithConverter<ProvidedCodeConverter> = generate(
MarkersExtractor.get(markerClass),
interfaceMode,
extensionProperties
Expand All @@ -70,4 +71,4 @@ internal fun CodeGenerator.generate(
public inline fun <reified T> CodeGenerator.generate(
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
): CodeWithConverter = generate(T::class, interfaceMode, extensionProperties)
): CodeWithConverter<ProvidedCodeConverter> = generate(T::class, interfaceMode, extensionProperties)
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,19 @@ import org.jetbrains.kotlinx.jupyter.api.VariableName
* Class representing generated code declarations for a [Marker].
*
* @param declarations The generated code.
* @param converter Needs to provide additional info (name) from org.jetbrains.dataframe.impl.codeGen.CodeGenerator to its callers
* But at the same time name doesn't make sense for GroupBy where code to be executed contains two declarations
* @param converter Optional converter for the [Marker], such as a [org.jetbrains.kotlinx.dataframe.api.cast], often used for Jupyter.
*/
public data class CodeWithConverter(val declarations: Code, val converter: (VariableName) -> Code = EmptyConverter) {
public data class CodeWithConverter<T : CodeConverter>(
val declarations: Code,
val converter: T
) {

public companion object {
public const val EmptyDeclarations: Code = ""
public val EmptyConverter: (VariableName) -> Code = { it }
public val Empty: CodeWithConverter = CodeWithConverter(EmptyDeclarations, EmptyConverter)
public val EmptyConverter: CodeConverter = CodeConverter { it }
public val Empty: CodeWithConverter<CodeConverter> = CodeWithConverter(EmptyDeclarations, EmptyConverter)
}

val hasDeclarations: Boolean get() = declarations.isNotBlank()
Expand All @@ -27,3 +32,19 @@ public data class CodeWithConverter(val declarations: Code, val converter: (Vari
else -> declarations + "\n" + converter(name)
}
}

public sealed interface CodeConverter : (VariableName) -> Code

public class CodeConverterImpl(private val f: (VariableName) -> Code) : CodeConverter {
override fun invoke(p1: VariableName): Code {
return f(p1)
}
}

public fun CodeConverter(f: (VariableName) -> Code): CodeConverter = CodeConverterImpl(f)

public class ProvidedCodeConverter(public val markerName: String) : CodeConverter {
override fun invoke(p1: VariableName): Code {
return "$p1.cast<$markerName>()"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import org.jetbrains.kotlinx.dataframe.impl.codeGen.ExtensionsCodeGeneratorImpl
import org.jetbrains.kotlinx.dataframe.impl.codeGen.ShortNames

public interface ExtensionsCodeGenerator {
public fun generate(marker: IsolatedMarker): CodeWithConverter
public fun generate(marker: IsolatedMarker): CodeWithConverter<*>

public companion object {
public fun create(): ExtensionsCodeGenerator = ExtensionsCodeGeneratorImpl(ShortNames)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package org.jetbrains.dataframe.impl.codeGen

import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter
import org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGeneratorImpl
import org.jetbrains.kotlinx.jupyter.api.Code
Expand All @@ -13,15 +14,17 @@ internal interface ReplCodeGenerator {
fun process(
df: AnyFrame,
property: KProperty<*>? = null,
): CodeWithConverter
): CodeWithConverter<*>

fun process(
row: AnyRow,
property: KProperty<*>? = null,
): CodeWithConverter
): CodeWithConverter<*>

fun process(markerClass: KClass<*>): Code

fun process(groupBy: GroupBy<*, *>): CodeWithConverter<*>

companion object {
fun create(): ReplCodeGenerator = ReplCodeGeneratorImpl()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.IsolatedMarker
import org.jetbrains.kotlinx.dataframe.codeGen.Marker
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
import org.jetbrains.kotlinx.dataframe.codeGen.ProvidedCodeConverter
import org.jetbrains.kotlinx.dataframe.codeGen.SchemaProcessor
import org.jetbrains.kotlinx.dataframe.codeGen.ValidFieldName
import org.jetbrains.kotlinx.dataframe.codeGen.toNullable
Expand All @@ -43,7 +44,7 @@ internal fun Iterable<Marker>.filterRequiredForSchema(schema: DataFrameSchema) =
internal val charsToQuote = """[ `(){}\[\].<>'"/|\\!?@:;%^&*#$-]""".toRegex()

internal fun createCodeWithConverter(code: String, markerName: String) =
CodeWithConverter(code) { "$it.cast<$markerName>()" }
CodeWithConverter(code, ProvidedCodeConverter(markerName))

private val letterCategories = setOf(
CharCategory.UPPERCASE_LETTER,
Expand Down Expand Up @@ -345,7 +346,7 @@ internal open class ExtensionsCodeGeneratorImpl(
return declarations.joinToString("\n")
}

override fun generate(marker: IsolatedMarker): CodeWithConverter {
override fun generate(marker: IsolatedMarker): CodeWithConverter<*> {
val code = generateExtensionProperties(marker)
return createCodeWithConverter(code, marker.name)
}
Expand All @@ -370,7 +371,7 @@ internal class CodeGeneratorImpl(typeRendering: TypeRenderingStrategy = FullyQua
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
readDfMethod: DefaultReadDfMethod?,
): CodeWithConverter {
): CodeWithConverter<ProvidedCodeConverter> {
val code = when (interfaceMode) {
NoFields, WithFields ->
generateInterface(
Expand Down Expand Up @@ -505,7 +506,7 @@ internal class CodeGeneratorImpl(typeRendering: TypeRenderingStrategy = FullyQua
}
}

public fun CodeWithConverter.toStandaloneSnippet(packageName: String, additionalImports: List<String>): String =
public fun CodeWithConverter<*>.toStandaloneSnippet(packageName: String, additionalImports: List<String>): String =
declarations.toStandaloneSnippet(packageName, additionalImports)

public fun Code.toStandaloneSnippet(packageName: String, additionalImports: List<String>): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.codeGen.CodeConverter
import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter
import org.jetbrains.kotlinx.dataframe.codeGen.Marker
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
import org.jetbrains.kotlinx.dataframe.codeGen.ProvidedCodeConverter
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.jupyter.api.Code
import kotlin.reflect.KClass
Expand Down Expand Up @@ -44,9 +47,9 @@ internal class ReplCodeGeneratorImpl : ReplCodeGenerator {
else -> null
}

override fun process(row: AnyRow, property: KProperty<*>?): CodeWithConverter = process(row.df(), property)
override fun process(row: AnyRow, property: KProperty<*>?): CodeWithConverter<*> = process(row.df(), property)

override fun process(df: AnyFrame, property: KProperty<*>?): CodeWithConverter {
override fun process(df: AnyFrame, property: KProperty<*>?): CodeWithConverter<*> {
var targetSchema = df.schema()

if (property != null) {
Expand Down Expand Up @@ -78,11 +81,21 @@ internal class ReplCodeGeneratorImpl : ReplCodeGenerator {
return generate(schema = targetSchema, name = markerInterfacePrefix, isOpen = true)
}

override fun process(groupBy: GroupBy<*, *>): CodeWithConverter<*> {
val key = generate(groupBy.keys.schema(), markerInterfacePrefix + "Keys", isOpen = false)
val group = generate(groupBy.groups.schema.value, markerInterfacePrefix + "Groups", isOpen = false)

return CodeWithConverter(
key.declarations.plus("\n").plus(group.declarations),
converter = CodeConverter { "$it.cast<${key.converter.markerName}, ${group.converter.markerName}>()" }
)
}

fun generate(
schema: DataFrameSchema,
name: String,
isOpen: Boolean,
): CodeWithConverter {
): CodeWithConverter<ProvidedCodeConverter> {
val result = generator.generate(
schema = schema,
name = name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import org.jetbrains.kotlinx.dataframe.api.asDataFrame
import org.jetbrains.kotlinx.dataframe.api.columnsCount
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.name
import org.jetbrains.kotlinx.dataframe.codeGen.CodeConverter
import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithConverter
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand Down Expand Up @@ -64,7 +65,7 @@ internal class Integration(

val version = options["v"]

private fun KotlinKernelHost.execute(codeWithConverter: CodeWithConverter, argument: String): VariableName? {
private fun KotlinKernelHost.execute(codeWithConverter: CodeWithConverter<*>, argument: String): VariableName? {
val code = codeWithConverter.with(argument)
return if (code.isNotBlank()) {
val result = execute(code)
Expand All @@ -75,7 +76,7 @@ internal class Integration(
}

private fun KotlinKernelHost.execute(
codeWithConverter: CodeWithConverter,
codeWithConverter: CodeWithConverter<*>,
property: KProperty<*>,
type: KType,
): VariableName? {
Expand Down Expand Up @@ -148,7 +149,7 @@ internal class Integration(
codeGen: ReplCodeGenerator,
): VariableName? = if (col.isColumnGroup()) {
val codeWithConverter = codeGen.process(col.asColumnGroup().asDataFrame(), property).let { c ->
CodeWithConverter(c.declarations) { c.converter("$it.asColumnGroup()") }
CodeWithConverter(c.declarations, converter = CodeConverter { c.converter("$it.asColumnGroup()") })
}
execute(
codeWithConverter = codeWithConverter,
Expand Down Expand Up @@ -282,6 +283,7 @@ internal class Integration(
is AnyRow -> updateAnyRowVariable(instance, property, codeGen)
is AnyFrame -> updateAnyFrameVariable(instance, property, codeGen)
is ImportDataSchema -> updateImportDataSchemaVariable(instance, property)
is GroupBy<*, *> -> execute(codeGen.process(instance), property, GroupBy::class.createStarProjectedType(false))
else -> error("${instance::class} should not be handled by Dataframe field handler")
}
}
Expand All @@ -290,7 +292,8 @@ internal class Integration(
value is ColumnGroup<*> ||
value is AnyRow ||
value is AnyFrame ||
value is ImportDataSchema
value is ImportDataSchema ||
value is GroupBy<*, *>
}
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,22 @@ class CodeGenerationTests : DataFrameJupyterTest() {
""".checkCompilation()
}

@Test
fun `groupBy`() {
"""
val groupBy = dataFrameOf("a")("1", "11", "2", "22").groupBy { expr { "a"<String>().length } named "k" }
groupBy.keys.k
""".checkCompilation()
}

@Test
fun `groupBy add`() {
"""
val groupBy = dataFrameOf("a")("1", "11", "2", "22").groupBy { expr { "a"<String>().length } named "k" }.add("newCol") { 42 }
groupBy.aggregate { newCol into "newCol" }
""".checkCompilation()
}

@Test
fun `interface without body compiled correctly`() {
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ else cast()

public fun <T> AnyRow.cast(): DataRow<T> = this as DataRow<T>

public fun <T, G> GroupBy<*, *>.cast(): GroupBy<T, G> = this as GroupBy<T, G>

public inline fun <reified T> AnyRow.cast(verify: Boolean = true): DataRow<T> = df().cast<T>(verify)[0]

public fun <T> AnyCol.cast(): DataColumn<T> = this as DataColumn<T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.Marker
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
import org.jetbrains.kotlinx.dataframe.codeGen.ProvidedCodeConverter
import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGeneratorImpl
import org.jetbrains.kotlinx.dataframe.impl.codeGen.FullyQualifiedNames
import org.jetbrains.kotlinx.dataframe.impl.codeGen.ShortNames
Expand All @@ -22,7 +23,7 @@ public enum class InterfaceGenerationMode {
None;
}

public data class CodeGenResult(val code: CodeWithConverter, val newMarkers: List<Marker>)
public data class CodeGenResult(val code: CodeWithConverter<ProvidedCodeConverter>, val newMarkers: List<Marker>)

public interface CodeGenerator : ExtensionsCodeGenerator {

Expand All @@ -43,7 +44,7 @@ public interface CodeGenerator : ExtensionsCodeGenerator {
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
readDfMethod: DefaultReadDfMethod? = null,
): CodeWithConverter
): CodeWithConverter<ProvidedCodeConverter>

public companion object {
public fun create(useFqNames: Boolean = true): CodeGenerator {
Expand All @@ -61,7 +62,7 @@ internal fun CodeGenerator.generate(
markerClass: KClass<*>,
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
): CodeWithConverter = generate(
): CodeWithConverter<ProvidedCodeConverter> = generate(
MarkersExtractor.get(markerClass),
interfaceMode,
extensionProperties
Expand All @@ -70,4 +71,4 @@ internal fun CodeGenerator.generate(
public inline fun <reified T> CodeGenerator.generate(
interfaceMode: InterfaceGenerationMode,
extensionProperties: Boolean,
): CodeWithConverter = generate(T::class, interfaceMode, extensionProperties)
): CodeWithConverter<ProvidedCodeConverter> = generate(T::class, interfaceMode, extensionProperties)
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,19 @@ import org.jetbrains.kotlinx.jupyter.api.VariableName
* Class representing generated code declarations for a [Marker].
*
* @param declarations The generated code.
* @param converter Needs to provide additional info (name) from org.jetbrains.dataframe.impl.codeGen.CodeGenerator to its callers
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are two @param converters now

* But at the same time name doesn't make sense for GroupBy where code to be executed contains two declarations
* @param converter Optional converter for the [Marker], such as a [org.jetbrains.kotlinx.dataframe.api.cast], often used for Jupyter.
*/
public data class CodeWithConverter(val declarations: Code, val converter: (VariableName) -> Code = EmptyConverter) {
public data class CodeWithConverter<T : CodeConverter>(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see most functions return a CodeWithWithConverter<*> and since both CodeConverterImpl and ProvidedCodeConverter are essentially just a (VariableName) -> Code, do we need the generic type T?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For GroupBy i need to combine names of two generated interfaces together for cast, and so CodeWithConverter provides this additional info. This parameter doesn't make sense for other usages, so there we use more generic CodeWithWithConverter<*>

val declarations: Code,
val converter: T
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe it's an idea to rename this variable or rework it a bit, because looking at this with new eyes, even with the documentation, I have no idea what's going on

Copy link
Collaborator Author

@koperagen koperagen Jun 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idk, hard to tell more than expressed in the code itself. Basically CodeWithConverter is all the information that dataframe integration needs to generate except name of the variable being updated. Variable name is provided by the caller due to separation of concerns. So there's hardly any special "domain" meaning behind it

) {

public companion object {
public const val EmptyDeclarations: Code = ""
public val EmptyConverter: (VariableName) -> Code = { it }
public val Empty: CodeWithConverter = CodeWithConverter(EmptyDeclarations, EmptyConverter)
public val EmptyConverter: CodeConverter = CodeConverter { it }
public val Empty: CodeWithConverter<CodeConverter> = CodeWithConverter(EmptyDeclarations, EmptyConverter)
}

val hasDeclarations: Boolean get() = declarations.isNotBlank()
Expand All @@ -27,3 +32,19 @@ public data class CodeWithConverter(val declarations: Code, val converter: (Vari
else -> declarations + "\n" + converter(name)
}
}

public sealed interface CodeConverter : (VariableName) -> Code
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably needs docs to explain what it does/should do. As it's a foreign concept and essentially just a String->String, which doesn't tell much. Maybe it could steal some docs from CodeWithConverter


public class CodeConverterImpl(private val f: (VariableName) -> Code) : CodeConverter {
override fun invoke(p1: VariableName): Code {
return f(p1)
}
}

public fun CodeConverter(f: (VariableName) -> Code): CodeConverter = CodeConverterImpl(f)

public class ProvidedCodeConverter(public val markerName: String) : CodeConverter {
override fun invoke(p1: VariableName): Code {
return "$p1.cast<$markerName>()"
}
}
Loading