Skip to content

Commit

Permalink
sql: support copy encoding utf8 syntax
Browse files Browse the repository at this point in the history
Fixes cockroachdb#114200

Adding support to  the ENCODING option for COPY.
- Included ENCODING in grammar file.
- Processing parsed value and validating it is set to UTF8
- Removed Unimplemented syntax tessts

Release note (sql change): Added support for the ENCODING option of
COPY, as long as the encoding of 'UTF8' is specified.
  • Loading branch information
lpessoa authored and rafiss committed Jan 29, 2024
1 parent 53e4efd commit 782efff
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 11 deletions.
2 changes: 2 additions & 0 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -2085,6 +2085,7 @@ copy_options ::=
| 'HEADER'
| 'QUOTE' 'SCONST'
| 'ESCAPE' 'SCONST'
| 'ENCODING' 'SCONST'

copy_generic_options ::=
'DESTINATION' string_or_placeholder
Expand All @@ -2099,6 +2100,7 @@ copy_generic_options ::=
| 'HEADER' 'FALSE'
| 'QUOTE' 'SCONST'
| 'ESCAPE' 'SCONST'
| 'ENCODING' 'SCONST'

db_object_name_component ::=
name
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/copy_from.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type copyOptions struct {
delimiter byte
format tree.CopyFormat
null string
encoding string
}

// TODO(#sql-sessions): copy all pre-condition checks from the PG code
Expand Down Expand Up @@ -184,6 +185,17 @@ func processCopyOptions(
)
}

if opts.Encoding != nil {
e, err := exprEval.String(ctx, opts.Encoding)
if err != nil {
return c, err
}
if strings.ToUpper(e) != "UTF8" {
return c, pgerror.New(pgcode.FeatureNotSupported, "only 'utf8' ENCODING is supported")
}
c.encoding = "utf8"
}

return c, nil
}

Expand Down
2 changes: 0 additions & 2 deletions pkg/sql/parser/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,13 +348,11 @@ func TestUnimplementedSyntax(t *testing.T) {

{`COPY t FROM STDIN OIDS`, 41608, `oids`, ``},
{`COPY t FROM STDIN FREEZE`, 41608, `freeze`, ``},
{`COPY t FROM STDIN ENCODING 'utf-8'`, 41608, `encoding`, ``},
{`COPY t FROM STDIN FORCE QUOTE *`, 41608, `quote`, ``},
{`COPY t FROM STDIN FORCE NULL *`, 41608, `force_null`, ``},
{`COPY t FROM STDIN FORCE NOT NULL *`, 41608, `force_not_null`, ``},
{`COPY t FROM STDIN WITH (OIDS)`, 41608, `oids`, ``},
{`COPY t FROM STDIN (FREEZE)`, 41608, `freeze`, ``},
{`COPY t FROM STDIN WITH (ESCAPE ',', ENCODING 'utf-8')`, 41608, `encoding`, ``},
{`COPY t FROM STDIN WITH (FORCE_QUOTE) *`, 41608, `quote`, ``},
{`COPY t FROM STDIN (FORCE_NULL) *`, 41608, `force_null`, ``},
{`COPY t FROM STDIN (HEADER, FORCE_NOT_NULL) *`, 41608, `force_not_null`, ``},
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -4216,9 +4216,9 @@ copy_options:
{
return unimplementedWithIssueDetail(sqllex, 41608, "force_null")
}
| ENCODING SCONST error
| ENCODING SCONST
{
return unimplementedWithIssueDetail(sqllex, 41608, "encoding")
$$.val = &tree.CopyOptions{Encoding: tree.NewStrVal($2)}
}

copy_generic_options:
Expand Down Expand Up @@ -4301,9 +4301,9 @@ copy_generic_options:
{
return unimplementedWithIssueDetail(sqllex, 41608, "force_null")
}
| ENCODING SCONST error
| ENCODING SCONST
{
return unimplementedWithIssueDetail(sqllex, 41608, "encoding")
$$.val = &tree.CopyOptions{Encoding: tree.NewStrVal($2)}
}

// %Help: CANCEL
Expand Down
42 changes: 37 additions & 5 deletions pkg/sql/parser/testdata/copy
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', DESTINATION '_') --
COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename') -- identifiers removed

parse
COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' HEADER
COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' ' ESCAPE 'x' HEADER ENCODING 'utf8'
----
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- normalized!
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER (' '), DESTINATION ('filename'), ESCAPE ('x'), HEADER true) -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', DESTINATION '_', ESCAPE '_', HEADER true) -- literals removed
COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- identifiers removed
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', ENCODING 'utf8', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- normalized!
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER (' '), ENCODING ('utf8'), DESTINATION ('filename'), ESCAPE ('x'), HEADER true) -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', ENCODING '_', DESTINATION '_', ESCAPE '_', HEADER true) -- literals removed
COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', ENCODING 'utf8', DESTINATION 'filename', ESCAPE 'x', HEADER true) -- identifiers removed

parse
COPY t TO STDOUT
Expand Down Expand Up @@ -388,3 +388,35 @@ at or near "explain": syntax error
DETAIL: source SQL:
COPY (EXPLAIN SELECT * FROM t) TO STDOUT
^

parse
COPY "copytab" FROM STDIN (ENCODING 'utf8')
----
COPY copytab FROM STDIN WITH (ENCODING 'utf8') -- normalized!
COPY copytab FROM STDIN WITH (ENCODING ('utf8')) -- fully parenthesized
COPY copytab FROM STDIN WITH (ENCODING '_') -- literals removed
COPY _ FROM STDIN WITH (ENCODING 'utf8') -- identifiers removed

parse
COPY "copytab" FROM STDIN (HEADER true, ESCAPE '%', ENCODING 'utf8')
----
COPY copytab FROM STDIN WITH (ENCODING 'utf8', ESCAPE '%', HEADER true) -- normalized!
COPY copytab FROM STDIN WITH (ENCODING ('utf8'), ESCAPE ('%'), HEADER true) -- fully parenthesized
COPY copytab FROM STDIN WITH (ENCODING '_', ESCAPE '_', HEADER true) -- literals removed
COPY _ FROM STDIN WITH (ENCODING 'utf8', ESCAPE '%', HEADER true) -- identifiers removed

parse
COPY t (a, b, c) FROM STDIN destination = 'filename' CSV DELIMITER ' '
----
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename') -- normalized!
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER (' '), DESTINATION ('filename')) -- fully parenthesized
COPY t (a, b, c) FROM STDIN WITH (FORMAT CSV, DELIMITER '_', DESTINATION '_') -- literals removed
COPY _ (_, _, _) FROM STDIN WITH (FORMAT CSV, DELIMITER ' ', DESTINATION 'filename') -- identifiers removed

error
COPY "copytab" FROM STDIN (FORMAT csv, ENCODING 'abc', ENCODING 'def')
----
at or near "def": syntax error: encoding option specified multiple times
DETAIL: source SQL:
COPY "copytab" FROM STDIN (FORMAT csv, ENCODING 'abc', ENCODING 'def')
^
13 changes: 13 additions & 0 deletions pkg/sql/sem/tree/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type CopyOptions struct {
Escape *StrVal
Header bool
Quote *StrVal
Encoding *StrVal

// Additional flags are needed to keep track of whether explicit default
// values were already set.
Expand Down Expand Up @@ -117,6 +118,12 @@ func (o *CopyOptions) Format(ctx *FmtCtx) {
ctx.FormatNode(o.Delimiter)
addSep = true
}
if o.Encoding != nil {
maybeAddSep()
ctx.WriteString("ENCODING ")
ctx.FormatNode(o.Encoding)
addSep = true
}
if o.Null != nil {
maybeAddSep()
ctx.WriteString("NULL ")
Expand Down Expand Up @@ -181,6 +188,12 @@ func (o *CopyOptions) CombineWith(other *CopyOptions) error {
}
o.Delimiter = other.Delimiter
}
if other.Encoding != nil {
if o.Encoding != nil {
return pgerror.Newf(pgcode.Syntax, "encoding option specified multiple times")
}
o.Encoding = other.Encoding
}
if other.Null != nil {
if o.Null != nil {
return pgerror.Newf(pgcode.Syntax, "null option specified multiple times")
Expand Down

0 comments on commit 782efff

Please sign in to comment.