diff --git a/README.md b/README.md index 3bf90db..982c594 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,47 @@ This will create a table with the following schema and insert the rows. | 2024-02-04 03:29:55.504942 | Alice | *null* | Red | | 2024-02-04 03:29:55.504943 | Bob | 30 | *null* | +## CLI + +### Install + +```bash +go install github.com/m-mizutani/bqs/cmd/bqs@latest +``` + +### Example + +```bash +$ cat test.jsonl +{"color":"blue", "number":5, "property":{"age": 18}} +{"color":"green", "number":1, "property":{"name":"Alice"}} +$ bqs infer test.jsonl +[ + { + "name": "color", + "type": "STRING" + }, + { + "name": "number", + "type": "FLOAT" + }, + { + "fields": [ + { + "name": "name", + "type": "STRING" + }, + { + "name": "age", + "type": "FLOAT" + } + ], + "name": "property", + "type": "RECORD" + } +] +``` + ## License Apache License 2.0 diff --git a/cmd/bqs/infer.go b/cmd/bqs/infer.go new file mode 100644 index 0000000..5878371 --- /dev/null +++ b/cmd/bqs/infer.go @@ -0,0 +1,109 @@ +package main + +import ( + "encoding/json" + "io" + "os" + "path/filepath" + + "cloud.google.com/go/bigquery" + "github.com/m-mizutani/bqs" + "github.com/m-mizutani/goerr" + "github.com/urfave/cli/v2" +) + +func inferCommand() *cli.Command { + var ( + output string + ) + return &cli.Command{ + Name: "infer", + UsageText: "bqs infer [command options] [json files...]", + Description: "Infer schema from JSON data and output as BigQuery schema file. If no file is specified, read from stdin.", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o"}, + Usage: "Output schema file path", + Value: "-", + Destination: &output, + }, + }, + Action: func(c *cli.Context) error { + var w io.Writer + if output == "-" { + w = os.Stdout + } else { + file, err := os.Create(filepath.Clean(output)) + if err != nil { + return goerr.Wrap(err, "Failed to create schema file").With("path", output) + } + defer file.Close() + w = file + } + + type reader struct { + r io.Reader + name string + } + + var readers []*reader + if c.Args().Len() == 0 { + readers = append(readers, &reader{ + r: os.Stdin, + name: "(stdin)", + }) + } else { + for _, path := range c.Args().Slice() { + file, err := os.Open(filepath.Clean(path)) + if err != nil { + return goerr.Wrap(err, "Failed to open file").With("path", path) + } + defer file.Close() + readers = append(readers, &reader{ + r: file, + name: path, + }) + } + } + + var schema bigquery.Schema + for _, reader := range readers { + logger.Debug("infer schema", "input", reader.name) + + decoder := json.NewDecoder(reader.r) + for i := 0; ; i++ { + var data any + if err := decoder.Decode(&data); err != nil { + if err == io.EOF { + break + } + return goerr.Wrap(err, "Failed to decode JSON data").With("input", reader.name) + } + + inferred, err := bqs.Infer(data) + if err != nil { + return goerr.Wrap(err, "Failed to infer schema").With("data", data).With("input", reader.name).With("line", i+1) + } + + merged, err := bqs.Merge(schema, inferred) + if err != nil { + return goerr.Wrap(err, "Failed to merge schema").With("input", reader.name).With("line", i+1) + } + + schema = merged + } + } + + raw, err := schema.ToJSONFields() + if err != nil { + return goerr.Wrap(err, "Failed to convert schema to JSON") + } + if _, err := w.Write(raw); err != nil { + return goerr.Wrap(err, "Failed to write schema").With("output", output) + } + + return nil + }, + } +} diff --git a/cmd/bqs/logger.go b/cmd/bqs/logger.go new file mode 100644 index 0000000..5b2fb74 --- /dev/null +++ b/cmd/bqs/logger.go @@ -0,0 +1,50 @@ +package main + +import ( + "errors" + "io" + "log/slog" + + "github.com/fatih/color" + "github.com/m-mizutani/clog" +) + +var logger *slog.Logger = slog.Default() + +func configureLogger(level string, w io.Writer) error { + levelMap := map[string]slog.Level{ + "debug": slog.LevelDebug, + "info": slog.LevelInfo, + "warn": slog.LevelWarn, + "error": slog.LevelError, + } + + logLevel, ok := levelMap[level] + if !ok { + return errors.New("invalid log level") + } + + handler := clog.New( + clog.WithWriter(w), + clog.WithLevel(logLevel), + // clog.WithReplaceAttr(filter), + clog.WithSource(true), + // clog.WithTimeFmt("2006-01-02 15:04:05"), + clog.WithColorMap(&clog.ColorMap{ + Level: map[slog.Level]*color.Color{ + slog.LevelDebug: color.New(color.FgGreen, color.Bold), + slog.LevelInfo: color.New(color.FgCyan, color.Bold), + slog.LevelWarn: color.New(color.FgYellow, color.Bold), + slog.LevelError: color.New(color.FgRed, color.Bold), + }, + LevelDefault: color.New(color.FgBlue, color.Bold), + Time: color.New(color.FgWhite), + Message: color.New(color.FgHiWhite), + AttrKey: color.New(color.FgHiCyan), + AttrValue: color.New(color.FgHiWhite), + }), + ) + logger = slog.New(handler) + + return nil +} diff --git a/cmd/bqs/main.go b/cmd/bqs/main.go new file mode 100644 index 0000000..f276c20 --- /dev/null +++ b/cmd/bqs/main.go @@ -0,0 +1,63 @@ +package main + +import ( + "io" + "os" + "path/filepath" + + "github.com/m-mizutani/goerr" + "github.com/urfave/cli/v2" +) + +func main() { + var ( + logLevel string + logOutput string + ) + + app := cli.App{ + Name: "bqs", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "log-level", + Category: "Log", + Aliases: []string{"l"}, + Usage: "Log level (debug, info, warn, error)", + Value: "info", + Destination: &logLevel, + }, + &cli.StringFlag{ + Name: "log-output", + Category: "Log", + Aliases: []string{"L"}, + Usage: "Log output destination, stdout('-') or file path", + Value: "-", + Destination: &logOutput, + }, + }, + + Before: func(c *cli.Context) error { + var logWriter io.Writer + switch logOutput { + case "-", "stdout": + logWriter = os.Stdout + default: + file, err := os.Create(filepath.Clean(logOutput)) + if err != nil { + return goerr.Wrap(err, "Failed to open log file") + } + logWriter = file + } + + return configureLogger(logLevel, logWriter) + }, + + Commands: []*cli.Command{ + inferCommand(), + }, + } + + if err := app.Run(os.Args); err != nil { + logger.Error("Failed", "error", err) + } +} diff --git a/go.mod b/go.mod index 2bfadd9..5593719 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,11 @@ go 1.21.0 require ( cloud.google.com/go/bigquery v1.60.0 + github.com/fatih/color v1.15.0 + github.com/m-mizutani/clog v0.0.4 + github.com/m-mizutani/goerr v0.1.8 github.com/m-mizutani/gt v0.0.8 + github.com/urfave/cli/v2 v2.27.1 google.golang.org/api v0.172.0 ) @@ -14,6 +18,7 @@ require ( cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.1.7 // indirect github.com/apache/arrow/go/v14 v14.0.2 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -26,9 +31,14 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/gax-go/v2 v2.12.3 // indirect + github.com/k0kubun/pp/v3 v3.2.0 // indirect github.com/klauspost/compress v1.17.8 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.50.0 // indirect diff --git a/go.sum b/go.sum index ebd310a..89bbc29 100644 --- a/go.sum +++ b/go.sum @@ -21,6 +21,8 @@ github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybF github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -28,6 +30,8 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -74,17 +78,30 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.12.3 h1:5/zPPDvw8Q1SuXjrqrZslrqT7dL/uJT2CQii/cLCKqA= github.com/googleapis/gax-go/v2 v2.12.3/go.mod h1:AKloxT6GtNbaLm8QTNSidHUVsHYcBHwWRvkNFJUQcS4= +github.com/k0kubun/pp/v3 v3.2.0 h1:h33hNTZ9nVFNP3u2Fsgz8JXiF5JINoZfFq4SvKJwNcs= +github.com/k0kubun/pp/v3 v3.2.0/go.mod h1:ODtJQbQcIRfAD3N+theGCV1m/CBxweERz2dapdz1EwA= github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/m-mizutani/clog v0.0.4 h1:6hY5CzHwNS4zuJhF6puazYPtGeaEEGIbrD4Ccimyaow= +github.com/m-mizutani/clog v0.0.4/go.mod h1:a2J7BlnXOkaMQ0fNeDBG3IyyyWnCnSKYH8ltHFNDcHE= +github.com/m-mizutani/goerr v0.1.8 h1:6UtsMmOkJsaYNtAsMNLvWIteZPl1NOxpKFYK5m65vpQ= +github.com/m-mizutani/goerr v0.1.8/go.mod h1:fQkXuu06q+oLlp4FkbiTFzI/N/+WAK/Mz1W5kPZ6yzs= github.com/m-mizutani/gt v0.0.8 h1:6BB+FS9WfVmTHnRJK+GI3VQZIJBqvXG/Ci0Qz/lPXa4= github.com/m-mizutani/gt v0.0.8/go.mod h1:0MPYSfGBLmYjTduzADVmIqD58ELQ5IfBFiK/f0FmB3k= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -93,6 +110,10 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= +github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= +github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= @@ -143,7 +164,9 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=