From 0a5c43195afa02dc1473b2e91b91f79022f6c2f2 Mon Sep 17 00:00:00 2001 From: Ross Light Date: Sat, 2 Dec 2023 11:51:15 -0800 Subject: [PATCH] internal/repocache: new package Proof of concept that populates a SQLite database with objects from a repository. --- default.nix | 2 +- go.mod | 12 ++ go.sum | 20 ++ internal/repocache/objects/find.sql | 12 ++ internal/repocache/objects/insert.sql | 18 ++ internal/repocache/repocache.go | 269 ++++++++++++++++++++++++++ internal/repocache/repocache_test.go | 198 +++++++++++++++++++ internal/repocache/schema.sql | 93 +++++++++ internal/repocache/sync.go | 178 +++++++++++++++++ 9 files changed, 801 insertions(+), 1 deletion(-) create mode 100644 internal/repocache/objects/find.sql create mode 100644 internal/repocache/objects/insert.sql create mode 100644 internal/repocache/repocache.go create mode 100644 internal/repocache/repocache_test.go create mode 100644 internal/repocache/schema.sql create mode 100644 internal/repocache/sync.go diff --git a/default.nix b/default.nix index f46e92d..21eaccb 100644 --- a/default.nix +++ b/default.nix @@ -37,7 +37,7 @@ in buildGoModule { "-X" "main.buildCommit=${commit}" ]; - vendorHash = "sha256-56Sah030xbWsoOu8r3c3nN2UGHvQORheavebP+Z1Wc8="; + vendorHash = "sha256-S8g7BAdGyt4RznioaRO5jmLDMX4O6JZqt1ul3RKAuDQ="; nativeBuildInputs = [ pandoc installShellFiles makeWrapper ]; nativeCheckInputs = [ bash coreutils git ]; diff --git a/go.mod b/go.mod index 4261774..a4e46db 100644 --- a/go.mod +++ b/go.mod @@ -23,4 +23,16 @@ require ( golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691 golang.org/x/sys v0.1.0 golang.org/x/tools v0.2.0 + zombiezen.com/go/sqlite v0.13.1 +) + +require ( + github.com/dustin/go-humanize v1.0.0 // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/mattn/go-isatty v0.0.16 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + modernc.org/libc v1.22.3 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.21.1 // indirect ) diff --git a/go.sum b/go.sum index 9abb538..fb7b04a 100644 --- a/go.sum +++ b/go.sum @@ -44,6 +44,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -102,6 +104,8 @@ github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -113,8 +117,13 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -233,6 +242,7 @@ golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -375,6 +385,16 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +modernc.org/libc v1.22.3 h1:D/g6O5ftAfavceqlLOFwaZuA5KYafKwmr30A6iSqoyY= +modernc.org/libc v1.22.3/go.mod h1:MQrloYP209xa2zHome2a8HLiLm6k0UT8CoHpV74tOFw= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.21.1 h1:GyDFqNnESLOhwwDRaHGdp2jKLDzpyT/rNLglX3ZkMSU= +modernc.org/sqlite v1.21.1/go.mod h1:XwQ0wZPIh1iKb5mkvCJ3szzbhk+tykC8ZWqTRTgYRwI= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +zombiezen.com/go/sqlite v0.13.1 h1:qDzxyWWmMtSSEH5qxamqBFmqA2BLSSbtODi3ojaE02o= +zombiezen.com/go/sqlite v0.13.1/go.mod h1:Ht/5Rg3Ae2hoyh1I7gbWtWAl89CNocfqeb/aAMTkJr4= diff --git a/internal/repocache/objects/find.sql b/internal/repocache/objects/find.sql new file mode 100644 index 0000000..57f65b0 --- /dev/null +++ b/internal/repocache/objects/find.sql @@ -0,0 +1,12 @@ +select + "oid" as "oid", + "type" as "type", + "size" as "uncompressed_size", + length("content") as "compressed_size" +from "objects" +where + "sha1" = :sha1 and + "type" is not null and + "size" >= 0 and + "content" is not null +limit 1; diff --git a/internal/repocache/objects/insert.sql b/internal/repocache/objects/insert.sql new file mode 100644 index 0000000..a5490c9 --- /dev/null +++ b/internal/repocache/objects/insert.sql @@ -0,0 +1,18 @@ +insert into "objects" ( + "sha1", + "type", + "size", + "content" +) values ( + :sha1, + :type, + :uncompressed_size, + zeroblob(:compressed_size) +) +on conflict ("sha1") do + update set + "type" = :type, + "size" = :uncompressed_size, + "content" = zeroblob(:compressed_size) + where "size" < 0 or "type" is null or "content" is null +returning "oid" as "oid"; diff --git a/internal/repocache/repocache.go b/internal/repocache/repocache.go new file mode 100644 index 0000000..30a0f3b --- /dev/null +++ b/internal/repocache/repocache.go @@ -0,0 +1,269 @@ +// Copyright 2023 The gg Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +// Package repocache provides optimized queries over a Git repository +// using an on-disk index. +package repocache + +import ( + "compress/zlib" + "context" + "crypto/sha1" + "embed" + "errors" + "fmt" + "io" + + "gg-scm.io/pkg/git/githash" + "gg-scm.io/pkg/git/object" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/ext/refunc" + "zombiezen.com/go/sqlite/sqlitex" +) + +const ( + objectsTable = "objects" + contentColumn = "content" +) + +//go:embed schema.sql +//go:embed objects/*.sql +var sqlFiles embed.FS + +const appID int32 = 0x40a9233d + +const currentUserVersion = 1 + +// Cache represents an open connection to a cache database. +type Cache struct { + conn *sqlite.Conn +} + +// Open opens a cache file on disk, creating it if necessary. +func Open(ctx context.Context, path string) (*Cache, error) { + conn, err := sqlite.OpenConn(path, sqlite.OpenCreate|sqlite.OpenReadWrite) + if err != nil { + return nil, fmt.Errorf("open git repo cache %s: %w", path, err) + } + if err := refunc.Register(conn); err != nil { + conn.Close() + return nil, fmt.Errorf("open git repo cache %s: %w", path, err) + } + if err := sqlitex.ExecuteTransient(conn, "PRAGMA page_size = 8192;", nil); err != nil { + conn.Close() + return nil, fmt.Errorf("open git repo cache %s: %w", path, err) + } + + conn.SetInterrupt(ctx.Done()) + if err := migrate(conn); err != nil { + conn.Close() + return nil, fmt.Errorf("open git repo cache %s: %w", path, err) + } + if err := sqlitex.ExecuteTransient(conn, `PRAGMA foreign_keys = on;`, nil); err != nil { + conn.Close() + return nil, fmt.Errorf("open git repo cache %s: %w", path, err) + } + conn.SetInterrupt(nil) + return &Cache{conn}, nil +} + +func migrate(conn *sqlite.Conn) (err error) { + endFn, err := sqlitex.ImmediateTransaction(conn) + if err != nil { + return err + } + defer endFn(&err) + + gotVersion, err := ensureAppID(conn) + if err != nil { + return err + } + if gotVersion != currentUserVersion { + if err := dropAllTables(conn); err != nil { + return err + } + } + if err := sqlitex.ExecuteScriptFS(conn, sqlFiles, "schema.sql", nil); err != nil { + return err + } + userVersionStmt := fmt.Sprintf("PRAGMA user_version = %d;", currentUserVersion) + if err := sqlitex.ExecuteTransient(conn, userVersionStmt, nil); err != nil { + return err + } + return nil +} + +// Cat copies the content of the given object from the cache into dst. +// If the object is not present in the cache, +// then Cat will return an error that wraps [ErrObjectNotFound]. +// If Cat does not return an error, +// it guarantees that the bytes written to dst match the hash. +func (c *Cache) Cat(ctx context.Context, dst io.Writer, id githash.SHA1) (_ object.Type, err error) { + c.conn.SetInterrupt(ctx.Done()) + defer c.conn.SetInterrupt(nil) + defer sqlitex.Transaction(c.conn)(&err) + + return c.cat(dst, id) +} + +func (c *Cache) stat(id githash.SHA1) (oid int64, tp object.Type, uncompressedSize int64, err error) { + uncompressedSize = -1 + err = sqlitex.ExecuteTransientFS(c.conn, sqlFiles, "objects/find.sql", &sqlitex.ExecOptions{ + Named: map[string]any{ + ":sha1": id[:], + }, + ResultFunc: func(stmt *sqlite.Stmt) error { + oid = stmt.GetInt64("oid") + tp = object.Type(stmt.GetText("type")) + uncompressedSize = stmt.GetInt64("uncompressed_size") + return nil + }, + }) + if err != nil { + return 0, "", 0, fmt.Errorf("read git object %v: %v", id, err) + } + if uncompressedSize < 0 { + return 0, "", 0, fmt.Errorf("read git object %v: %w", id, ErrObjectNotFound) + } + return oid, tp, uncompressedSize, nil +} + +func (c *Cache) cat(dst io.Writer, id githash.SHA1) (_ object.Type, err error) { + defer sqlitex.Save(c.conn)(&err) + + oid, tp, uncompressedSize, err := c.stat(id) + if err != nil { + return "", err + } + compressedContent, err := c.conn.OpenBlob("", objectsTable, contentColumn, oid, false) + if err != nil { + return "", fmt.Errorf("read git object %v: %v", id, err) + } + defer compressedContent.Close() + h := sha1.New() + h.Write(object.AppendPrefix(nil, tp, uncompressedSize)) + uncompressedContent, err := zlib.NewReader(compressedContent) + if err != nil { + return "", fmt.Errorf("read git object %v: %v", id, err) + } + gotSize, err := io.Copy(io.MultiWriter(h, dst), uncompressedContent) + uncompressedContent.Close() + if err != nil { + return "", fmt.Errorf("read git object %v: %v", id, err) + } + if gotSize != uncompressedSize { + return "", fmt.Errorf("read git object %v: corrupted content (advertised size was %d bytes; found %d bytes)", id, uncompressedSize, gotSize) + } + var gotHash githash.SHA1 + h.Sum(gotHash[:0]) + if gotHash != id { + return "", fmt.Errorf("read git object %v: corrupted content (hash = %v)", id, gotHash) + } + return tp, nil +} + +// Close releases all resources associated with the cache connection. +func (c *Cache) Close() error { + return c.conn.Close() +} + +func dropAllTables(conn *sqlite.Conn) (err error) { + defer sqlitex.Save(conn)(&err) + + var tables, views []string + const query = `SELECT "type", "name" FROM sqlite_schema WHERE "type" in ('table', 'view');` + err = sqlitex.ExecuteTransient(conn, query, &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + name := stmt.ColumnText(1) + switch stmt.ColumnText(0) { + case "table": + tables = append(tables, name) + case "view": + views = append(views, name) + } + return nil + }, + }) + if err != nil { + return fmt.Errorf("drop all tables: %w", err) + } + for _, name := range views { + if err := sqlitex.ExecuteTransient(conn, `DROP VIEW "`+name+`";`, nil); err != nil { + return fmt.Errorf("drop all tables: %w", err) + } + } + for _, name := range tables { + if err := sqlitex.ExecuteTransient(conn, `DROP TABLE "`+name+`";`, nil); err != nil { + return fmt.Errorf("drop all tables: %w", err) + } + } + return nil +} + +func userVersion(conn *sqlite.Conn) (int32, error) { + var version int32 + err := sqlitex.ExecuteTransient(conn, "PRAGMA user_version;", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + version = stmt.ColumnInt32(0) + return nil + }, + }) + if err != nil { + return 0, fmt.Errorf("get database user_version: %w", err) + } + return version, nil +} + +func ensureAppID(conn *sqlite.Conn) (schemaVersion int32, err error) { + defer sqlitex.Save(conn)(&err) + + var hasSchema bool + err = sqlitex.ExecuteTransient(conn, "VALUES ((SELECT COUNT(*) FROM sqlite_master) > 0);", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + hasSchema = stmt.ColumnInt(0) != 0 + return nil + }, + }) + if err != nil { + return 0, err + } + var dbAppID int32 + err = sqlitex.ExecuteTransient(conn, "PRAGMA application_id;", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + dbAppID = stmt.ColumnInt32(0) + return nil + }, + }) + if err != nil { + return 0, err + } + if dbAppID != appID && !(dbAppID == 0 && !hasSchema) { + return 0, fmt.Errorf("database application_id = %#x (expected %#x)", dbAppID, appID) + } + schemaVersion, err = userVersion(conn) + if err != nil { + return 0, err + } + // Using Sprintf because PRAGMAs don't permit arbitrary expressions, and thus + // don't permit using parameter substitution. + err = sqlitex.ExecuteTransient(conn, fmt.Sprintf("PRAGMA application_id = %d;", appID), nil) + if err != nil { + return 0, err + } + return schemaVersion, nil +} + +var ErrObjectNotFound = errors.New("git object not found") diff --git a/internal/repocache/repocache_test.go b/internal/repocache/repocache_test.go new file mode 100644 index 0000000..54b6177 --- /dev/null +++ b/internal/repocache/repocache_test.go @@ -0,0 +1,198 @@ +// Copyright 2023 The gg Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package repocache + +import ( + "bytes" + "context" + "fmt" + "path/filepath" + "strings" + "testing" + "time" + + "gg-scm.io/pkg/git" + "gg-scm.io/pkg/git/object" + "gg-scm.io/pkg/git/packfile/client" + "gg-scm.io/tool/internal/filesystem" + "github.com/google/go-cmp/cmp" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +func TestOpen(t *testing.T) { + t.Run("Clean", func(t *testing.T) { + ctx := context.Background() + cache, err := Open(ctx, filepath.Join(t.TempDir(), "foo.db")) + if err != nil { + t.Fatal(err) + } + if err := cache.Close(); err != nil { + t.Fatal(err) + } + }) + + t.Run("DifferentVersion", func(t *testing.T) { + ctx := context.Background() + dbPath := filepath.Join(t.TempDir(), "foo.db") + conn, err := sqlite.OpenConn(dbPath, sqlite.OpenCreate|sqlite.OpenReadWrite) + if err != nil { + t.Fatal(err) + } + defer conn.Close() + err = sqlitex.ExecuteTransient(conn, fmt.Sprintf("PRAGMA application_id = %d;", appID), nil) + if err != nil { + t.Fatal(err) + } + err = sqlitex.ExecuteTransient(conn, fmt.Sprintf("PRAGMA user_version = %d;", currentUserVersion+1), nil) + if err != nil { + t.Fatal(err) + } + err = sqlitex.ExecuteTransient(conn, "CREATE TABLE testjunktable (foo);", nil) + if err != nil { + t.Fatal(err) + } + + cache, err := Open(ctx, dbPath) + if err != nil { + t.Fatal(err) + } + if err := cache.Close(); err != nil { + t.Fatal(err) + } + + err = sqlitex.ExecuteTransient(conn, "VALUES (EXISTS(SELECT 1 FROM sqlite_schema WHERE name = 'testjunktable'));", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + if stmt.ColumnBool(0) { + t.Error("testjunktable exists") + } + return nil + }, + }) + if err != nil { + t.Error(err) + } + + err = sqlitex.ExecuteTransient(conn, "PRAGMA user_version;", &sqlitex.ExecOptions{ + ResultFunc: func(stmt *sqlite.Stmt) error { + if got := stmt.ColumnInt(0); got != currentUserVersion { + t.Errorf("user_version = %d; want %d", got, currentUserVersion) + } + return nil + }, + }) + if err != nil { + t.Error(err) + } + }) +} + +func TestCopyFrom(t *testing.T) { + const ( + fileName = "foo.txt" + fileContents = "Hello, World!\n" + + commitMessage = "Initial import" + commitAuthor object.User = "Ross Light " + ) + commitTime := time.Date(2023, time.December, 2, 17, 30, 0, 0, time.UTC) + + ctx := context.Background() + gitDir := filesystem.Dir(t.TempDir()) + g, err := git.New(git.Options{Dir: gitDir.String()}) + if err != nil { + t.Fatal(err) + } + if err := g.Init(ctx, "."); err != nil { + t.Fatal(err) + } + err = gitDir.Apply(filesystem.Write(fileName, fileContents)) + if err != nil { + t.Fatal(err) + } + err = g.Add(ctx, []git.Pathspec{git.LiteralPath(fileName)}, git.AddOptions{}) + if err != nil { + t.Fatal(err) + } + err = g.Commit(ctx, commitMessage, git.CommitOptions{ + Author: commitAuthor, + AuthorTime: commitTime, + Committer: commitAuthor, + CommitTime: commitTime, + }) + if err != nil { + t.Fatal(err) + } + fileObjectName, err := object.BlobSum(strings.NewReader(fileContents), int64(len(fileContents))) + if err != nil { + t.Fatal(err) + } + treeObjectName := object.Tree{{ + Name: fileName, + Mode: object.ModePlain, + ObjectID: fileObjectName, + }}.SHA1() + commitObject := &object.Commit{ + Tree: treeObjectName, + Author: commitAuthor, + AuthorTime: commitTime, + Committer: commitAuthor, + CommitTime: commitTime, + Message: commitMessage, + } + want, err := commitObject.MarshalBinary() + if err != nil { + t.Fatal(err) + } + commitObjectName := commitObject.SHA1() + headRev, err := g.Head(ctx) + if err != nil { + t.Fatal(err) + } + if headRev.Commit != commitObjectName { + t.Fatalf("%s = %v; want %v", git.Head, headRev.Commit, commitObjectName) + } + + cache, err := Open(ctx, filepath.Join(t.TempDir(), "foo.db")) + if err != nil { + t.Fatal(err) + } + defer func() { + if err := cache.Close(); err != nil { + t.Error(err) + } + }() + gitClient, err := client.NewRemote(client.URLFromPath(gitDir.String()), nil) + if err != nil { + t.Fatal(err) + } + if err := cache.CopyFrom(ctx, gitClient); err != nil { + t.Error("CopyFrom:", err) + } + + got := new(bytes.Buffer) + gotType, err := cache.Cat(ctx, got, commitObjectName) + if err != nil { + t.Fatal("Cat:", err) + } + if wantType := object.TypeCommit; gotType != wantType { + t.Errorf("type = %q; want %q", gotType, wantType) + } + if diff := cmp.Diff(want, got.Bytes()); diff != "" { + t.Errorf("content (-want +got):\n%s", diff) + } +} diff --git a/internal/repocache/schema.sql b/internal/repocache/schema.sql new file mode 100644 index 0000000..2794ca8 --- /dev/null +++ b/internal/repocache/schema.sql @@ -0,0 +1,93 @@ +create table "objects" ( + "oid" integer + primary key + not null, + "sha1" blob + not null + unique + check (length("sha1") = 20), + "type" text + check ("type" is null or "type" in ('blob', 'tree', 'commit', 'tag')), + "size" integer + not null + check ("size" >= -1) + default -1, + "content" blob +) strict; + +create table "users" ( + "user_id" integer + primary key + not null, + "user" text + not null + unique + check (length("user") > 0) +) strict; + +create table "commits" ( + "oid" integer + primary key + not null + references "objects", + "tree" integer + not null + references "objects", + + "author" integer + references "users", + "author_timestamp" integer, + "author_tzoffset_mins" integer + check ("author_tzoffset_mins" is null or abs("author_tzoffset_mins") < 60 * 100), + + "committer" integer + references "users", + "committer_timestamp" integer, + "committer_tzoffset_mins" integer + check ("committer_tzoffset_mins" is null or abs("committer_tzoffset_mins") < 60 * 100), + + "message" text +); + +create table "commit_parents" ( + "oid" integer + not null + references "commits", + "n" integer + not null + default 0 + check ("n" >= 0), + "parent" integer + not null + references "objects", + primary key ("oid", "n") +) strict; + +create table "label_names" ( + "label_id" integer + primary key + not null, + "label_name" text + not null + unique + check ("label_name" regexp '\S+') +) strict; + +create table "labels" ( + "oid" integer + not null + references "commits", + "label_id" integer + not null + references "label_names", + "label_type" integer + not null + default 1 + check ("label_type" in (-1, 0, 1)), + "orig_id" integer + not null + references "commits", + "value" text + check ("value" is null or "value" regexp '\S*'), + primary key ("oid", "label_id") +) strict; diff --git a/internal/repocache/sync.go b/internal/repocache/sync.go new file mode 100644 index 0000000..f47bd21 --- /dev/null +++ b/internal/repocache/sync.go @@ -0,0 +1,178 @@ +// Copyright 2023 The gg Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package repocache + +import ( + "bufio" + "compress/zlib" + "context" + "crypto/sha1" + "fmt" + "io" + + "gg-scm.io/pkg/git/githash" + "gg-scm.io/pkg/git/object" + "gg-scm.io/pkg/git/packfile" + "gg-scm.io/pkg/git/packfile/client" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitefile" + "zombiezen.com/go/sqlite/sqlitex" +) + +// CopyFrom caches any objects from the remote not present in the cache. +func (c *Cache) CopyFrom(ctx context.Context, remote *client.Remote) (err error) { + stream, err := remote.StartPull(ctx) + if err != nil { + return fmt.Errorf("cache git data: %v", err) + } + defer stream.Close() + + refs, err := stream.ListRefs(string(githash.Head), "refs/heads/", "refs/tags/") + if err != nil { + return fmt.Errorf("cache git data: %v", err) + } + + req := new(client.PullRequest) + // TODO(soon): Fill in req.Have. + if stream.Capabilities().Has(client.PullCapFilter) { + req.Filter = "blob:none" + } + for _, ref := range refs { + req.Want = append(req.Want, ref.ObjectID) + } + resp, err := stream.Negotiate(req) + if err != nil { + return fmt.Errorf("cache git data: %v", err) + } + defer resp.Packfile.Close() + + c.conn.SetInterrupt(ctx.Done()) + defer c.conn.SetInterrupt(nil) + endFn, err := sqlitex.ImmediateTransaction(c.conn) + if err != nil { + return fmt.Errorf("cache git data: %v", err) + } + defer endFn(&err) + + contentsBuf, err := sqlitefile.NewBufferSize(c.conn, 32<<10) // 32 KiB + if err != nil { + return err + } + defer contentsBuf.Close() + + r := packfile.NewReader(bufio.NewReader(resp.Packfile)) + h := sha1.New() + var prefixBuf []byte + var sumBuf githash.SHA1 + var newCommits []githash.SHA1 + insertStmt, err := sqlitex.PrepareTransientFS(c.conn, sqlFiles, "objects/insert.sql") + if err != nil { + return fmt.Errorf("cache git data: %v", err) + } + defer insertStmt.Finalize() + + for { + hdr, err := r.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + var tp object.Type + switch hdr.Type { + case packfile.Commit: + tp = object.TypeCommit + case packfile.Tree: + tp = object.TypeTree + case packfile.Tag: + tp = object.TypeTag + default: + continue + } + + h.Reset() + prefixBuf = object.AppendPrefix(prefixBuf[:0], tp, hdr.Size) + h.Write(prefixBuf) + contentsBuf.Reset() + zw := zlib.NewWriter(contentsBuf) + if _, err := io.Copy(io.MultiWriter(h, zw), r); err != nil { + return err + } + if err := zw.Close(); err != nil { + return err + } + h.Sum(sumBuf[:0]) + + isNew, err := insertObject(c.conn, insertStmt, sumBuf, tp, hdr.Size, contentsBuf.Len(), contentsBuf) + if err != nil { + return err + } + if isNew && tp == object.TypeCommit { + newCommits = append(newCommits, sumBuf) + } + } + + // TODO(now): Index inserted objects. + _ = newCommits + + return nil +} + +func insertObject(conn *sqlite.Conn, insertStmt *sqlite.Stmt, name githash.SHA1, tp object.Type, uncompressedSize, compressedSize int64, compressedReader io.Reader) (inserted bool, err error) { + defer func() { + if err != nil { + err = fmt.Errorf("cache %s %v: %v", tp, name, err) + } + }() + defer sqlitex.Save(conn)(&err) + + insertStmt.SetBytes(":sha1", name[:]) + insertStmt.SetText(":type", string(tp)) + insertStmt.SetInt64(":uncompressed_size", uncompressedSize) + insertStmt.SetInt64(":compressed_size", compressedSize) + inserted, err = insertStmt.Step() + if err != nil { + return false, err + } + var oid int64 + if inserted { + oid = insertStmt.GetInt64("oid") + } + if err := insertStmt.Reset(); err != nil { + return false, err + } + if !inserted { + return false, nil + } + + contentCol, err := conn.OpenBlob("", objectsTable, contentColumn, oid, true) + if err != nil { + return false, err + } + _, copyErr := io.Copy(contentCol, compressedReader) + closeErr := contentCol.Close() + if copyErr != nil { + return false, copyErr + } + if closeErr != nil { + return false, closeErr + } + return true, nil +} + +const syncPageSize = 32 << 10 // 32 KiB