-
Notifications
You must be signed in to change notification settings - Fork 328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refine single-column Table to act as a Column #12165
Changes from all commits
d16a695
3d84772
9936506
0ea4db9
edf7b98
28b54a9
5f5fada
b9af812
d4a4617
eedc808
649ce8e
4f64b0e
4f2f8f5
7be3f68
334afcd
55d5f60
6d2158f
ad09f59
fb0f1f8
3553c1b
6ee6c9c
85e8bc1
545e4e1
3a8eb76
e2f311e
bc718a2
3bf49a2
c9ee850
18efc0e
609ae4b
0812321
fcda638
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,7 @@ import project.Internal.IR.Query.Query | |
import project.Internal.IR.SQL_Expression.SQL_Expression | ||
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind | ||
import project.Internal.SQL_Type_Reference.SQL_Type_Reference | ||
import project.Internal.Type_Refinements.DB_Table_Refinements | ||
import project.SQL_Query.SQL_Query | ||
import project.SQL_Statement.SQL_Statement | ||
import project.SQL_Type.SQL_Type | ||
|
@@ -80,9 +81,8 @@ polyglot java import java.util.UUID | |
|
||
## Represents a column-oriented table data structure backed by a database. | ||
type DB_Table | ||
## PRIVATE | ||
|
||
Represents a column-oriented table data structure backed by a database. | ||
## Internal constructor that should not be used directly. | ||
Please use `DB_Table.new` instead. | ||
|
||
Arguments: | ||
- internal_name: The name of the table. | ||
|
@@ -91,6 +91,14 @@ type DB_Table | |
- context: The context associated with this table. | ||
private Value internal_name:Text connection:(Connection | Any) (internal_columns:(Vector Internal_Column)) context:Context | ||
|
||
## The internal constructor used to construct a DB_Table instance. | ||
|
||
It can perform some additional operations, like refining the type, | ||
so it should always be preferred over calling `DB_Table.Value` directly. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How would I know this without reading this comment? Guess we need some sort of convention for module private constructors similar to what we have for module priavte methods There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I will add a comment in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can also consider changing the name of the constructor from |
||
private new (internal_name:Text) connection:(Connection | Any) (internal_columns:(Vector Internal_Column)) context:Context -> DB_Table = | ||
DB_Table_Refinements.refine_table <| | ||
DB_Table.Value internal_name connection internal_columns context | ||
|
||
## GROUP Standard.Base.Metadata | ||
ICON metadata | ||
The name of the table. | ||
|
@@ -1648,7 +1656,7 @@ type DB_Table | |
problem_builder.attach_problems_before on_problems <| | ||
new_from = From_Spec.Join sql_join_kind left_setup.subquery right_setup.subquery on_expressions | ||
new_ctx = Context.for_subquery new_from . set_where_filters where_expressions | ||
DB_Table.Value new_table_name self.connection result_columns new_ctx | ||
DB_Table.new new_table_name self.connection result_columns new_ctx | ||
|
||
## ALIAS append, cartesian join | ||
GROUP Standard.Base.Calculations | ||
|
@@ -2067,7 +2075,7 @@ type DB_Table | |
input_column = Internal_Column.Value name (infer_return_type expression) expression | ||
dialect.adapt_unified_column input_column result_type infer_return_type | ||
|
||
DB_Table.Value union_alias self.connection new_columns new_ctx | ||
DB_Table.new union_alias self.connection new_columns new_ctx | ||
|
||
## ALIAS average, count, count distinct, first, group by, last, longest, maximum, mean, median, minimum, mode, percentile, shortest, standard deviation, sum, summarize, variance | ||
GROUP Standard.Base.Calculations | ||
|
@@ -2805,7 +2813,7 @@ type DB_Table | |
Arguments: | ||
- columns: The columns with which to update this table. | ||
updated_columns : Vector Internal_Column -> DB_Table | ||
updated_columns self internal_columns = DB_Table.Value self.name self.connection internal_columns self.context | ||
updated_columns self internal_columns = DB_Table.new self.name self.connection internal_columns self.context | ||
|
||
## PRIVATE | ||
|
||
|
@@ -2814,7 +2822,7 @@ type DB_Table | |
Arguments: | ||
- ctx: The new context for this table. | ||
updated_context : Context -> DB_Table | ||
updated_context self ctx = DB_Table.Value self.name self.connection self.internal_columns ctx | ||
updated_context self ctx = DB_Table.new self.name self.connection self.internal_columns ctx | ||
|
||
## PRIVATE | ||
|
||
|
@@ -2838,9 +2846,9 @@ type DB_Table | |
setup = ctx.as_subquery self.name [internal_columns] | ||
new_ctx = Context.for_subquery setup.subquery | ||
new_columns = setup.new_columns.first | ||
DB_Table.Value self.name self.connection new_columns new_ctx | ||
DB_Table.new self.name self.connection new_columns new_ctx | ||
False -> | ||
DB_Table.Value self.name self.connection internal_columns ctx | ||
DB_Table.new self.name self.connection internal_columns ctx | ||
|
||
## PRIVATE | ||
Nests a table as a subquery, using `updated_context_and_columns`, which | ||
|
@@ -2926,10 +2934,11 @@ type DB_Table | |
|
||
- `Auto_Detect`: The file format is determined by the provided file. | ||
- `Bytes` and `Plain_Text`: The Table does not support these types in | ||
the `write` function. If passed as format, an | ||
`Illegal_Argument` is raised. To write out the table as plain | ||
text, the user needs to call the `Text.from Table` method and then | ||
use the `Text.write` function. | ||
the `write` function. If passed as format, an | ||
`Illegal_Argument` is raised. To write out the table as plain | ||
text, the user needs to convert the Table to Text | ||
(e.g. using `to_delimited` method) and then use the `Text.write` | ||
function. | ||
|
||
> Example | ||
Write a database table to a CSV file. | ||
|
@@ -3216,7 +3225,7 @@ make_table connection table_name columns ctx on_problems = | |
problem_builder.report_unique_name_strategy column_names_validator | ||
# We do not want to stop the table from being fetched, so we report the issues as warnings. | ||
problem_builder.attach_problems_before on_problems <| | ||
DB_Table.Value table_name connection cols ctx | ||
DB_Table.new table_name connection cols ctx | ||
|
||
## PRIVATE | ||
By default, join on the first column, unless it's a cross join, in which | ||
|
@@ -3276,7 +3285,7 @@ make_literal_table connection column_vectors column_names alias = | |
if needs_cast.not then base_column else | ||
connection.dialect.make_cast base_column sql_type infer_type_from_database | ||
|
||
DB_Table.Value alias connection internal_columns context | ||
DB_Table.new alias connection internal_columns context | ||
|
||
## PRIVATE | ||
Many_Files_List.from (that : DB_Table) = | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
private | ||
|
||
from Standard.Base import all | ||
|
||
import project.DB_Column.DB_Column | ||
import project.DB_Table.DB_Table | ||
from project.Internal.Type_Refinements.Single_Column_DB_Table_Conversions import all | ||
|
||
refine_table (table : DB_Table) = | ||
if is_single_column table . not then table else | ||
r = table : DB_Table & DB_Column | ||
r | ||
|
||
is_single_column table:DB_Table -> Boolean = | ||
table.column_count == 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
private | ||
|
||
from Standard.Base import all | ||
|
||
import project.DB_Column.DB_Column | ||
import project.DB_Table.DB_Table | ||
from project.Internal.Type_Refinements.DB_Table_Refinements import is_single_column | ||
|
||
## This conversion is internal and should never be exported. | ||
DB_Column.from (that : DB_Table) -> DB_Column = | ||
Runtime.assert (is_single_column that) | ||
that.at 0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,26 +77,26 @@ _merge_input_and_tables (input_table : Table) (tables_for_rows : Vector Read_Man | |
|
||
multiplicated_inputs = duplicate_rows input_table counts | ||
Runtime.assert (unified_data.row_count == multiplicated_inputs.row_count) | ||
Runtime.assert (unified_metadata.is_nothing || (unified_metadata.row_count == unified_data.row_count)) | ||
Runtime.assert ((Nothing == unified_metadata) || (unified_metadata.row_count == unified_data.row_count)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this change needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Due to #12185. Essentially -
|
||
|
||
first_pass = if unified_metadata.is_nothing then multiplicated_inputs else | ||
first_pass = if Nothing == unified_metadata then multiplicated_inputs else | ||
multiplicated_inputs.zip unified_metadata right_prefix="" | ||
first_pass.zip unified_data right_prefix="" | ||
|
||
## Unifies provided metadata tables, knowing that some tables may have no | ||
metadata - in such case we want to insert as many Nothing rows for metadata | ||
as there are rows in the corresponding data table. | ||
_unify_metadata (tables : Vector Read_Many_As_Table_Result) (on_problems : Problem_Behavior) -> Table | Nothing = | ||
has_no_metadata = tables.all r-> r.metadata.is_nothing | ||
has_no_metadata = tables.all r-> Nothing == r.metadata | ||
if has_no_metadata then Nothing else | ||
unique = Column_Naming_Helper.in_memory.create_unique_name_strategy | ||
tables.each r-> | ||
if r.metadata.is_nothing.not then unique.mark_used r.metadata.column_names | ||
if Nothing != r.metadata then unique.mark_used r.metadata.column_names | ||
|
||
# A dummy column because we cannot create a table with 0 columns, it will be removed after union. We find an unique name for it to avoid conflicts. | ||
dummy_column_name = unique.make_unique "_Internal_Placeholder_Column_" | ||
tables_for_union = tables.map r-> | ||
if r.metadata.is_nothing.not then r.metadata else | ||
if Nothing != r.metadata then r.metadata else | ||
Table.new [Column.from_repeated_item dummy_column_name Nothing r.data.row_count] | ||
|
||
# Metadata are always merged by-name and columns that appear only in some tables are kept. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since #11777 we can simplify the error handling code, let's use it :)