From 54aae59837e68abf7a5794bcc7541dae118c673f Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Fri, 27 Nov 2020 19:19:05 -0500 Subject: [PATCH 01/20] first pass at adding ability to query the GitHub API --- cmd/root.go | 6 +- go.mod | 5 + go.sum | 4 + pkg/askgit/askgit.go | 25 +++- pkg/ghqlite/repo_iterator.go | 229 ++++++++++++++++++++++++++++++ pkg/ghqlite/repo_iterator_test.go | 35 +++++ pkg/ghqlite/repos_vtab.go | 194 +++++++++++++++++++++++++ pkg/ghqlite/repos_vtab_test.go | 3 + 8 files changed, 494 insertions(+), 7 deletions(-) create mode 100644 pkg/ghqlite/repo_iterator.go create mode 100644 pkg/ghqlite/repo_iterator_test.go create mode 100644 pkg/ghqlite/repos_vtab.go create mode 100644 pkg/ghqlite/repos_vtab_test.go diff --git a/cmd/root.go b/cmd/root.go index 80b732b3..e231424f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -21,6 +21,7 @@ var ( useGitCLI bool cui bool presetQuery string + githubOrg string ) func init() { @@ -29,6 +30,7 @@ func init() { rootCmd.PersistentFlags().BoolVar(&useGitCLI, "use-git-cli", false, "whether to use the locally installed git command (if it's available). Defaults to false.") rootCmd.PersistentFlags().BoolVarP(&cui, "interactive", "i", false, "whether to run in interactive mode, which displays a terminal UI") rootCmd.PersistentFlags().StringVar(&presetQuery, "preset", "", "used to pick a preset query") + rootCmd.PersistentFlags().StringVar(&githubOrg, "github-org", "", "used to pick a preset query") } func handleError(err error) { @@ -108,7 +110,9 @@ var rootCmd = &cobra.Command{ return } ag, err := askgit.New(dir, &askgit.Options{ - UseGitCLI: useGitCLI, + UseGitCLI: useGitCLI, + GitHubOrg: githubOrg, + GitHubToken: os.Getenv("GITHUB_TOKEN"), }) handleError(err) diff --git a/go.mod b/go.mod index 0d4848a2..e87c4da4 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,8 @@ go 1.13 require ( github.com/DATA-DOG/go-sqlmock v1.5.0 github.com/gitsight/go-vcsurl v1.0.0 + github.com/google/go-github v17.0.0+incompatible + github.com/google/go-querystring v1.0.0 // indirect github.com/jroimartin/gocui v0.4.0 github.com/kr/text v0.2.0 // indirect github.com/libgit2/git2go/v30 v30.2.2 @@ -15,6 +17,9 @@ require ( github.com/olekukonko/tablewriter v0.0.4 github.com/spf13/cobra v1.1.1 golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a // indirect + golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 + golang.org/x/sync v0.0.0-20190423024810-112230192c58 + golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect gopkg.in/yaml.v2 v2.3.0 // indirect ) diff --git a/go.sum b/go.sum index 808e17fd..d4f92215 100644 --- a/go.sum +++ b/go.sum @@ -66,6 +66,10 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= +github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= +github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= diff --git a/pkg/askgit/askgit.go b/pkg/askgit/askgit.go index 77017d29..815611fc 100644 --- a/pkg/askgit/askgit.go +++ b/pkg/askgit/askgit.go @@ -9,6 +9,7 @@ import ( "path" "strings" + "github.com/augmentable-dev/askgit/pkg/ghqlite" "github.com/augmentable-dev/askgit/pkg/gitqlite" "github.com/gitsight/go-vcsurl" git "github.com/libgit2/git2go/v30" @@ -42,11 +43,17 @@ func init() { if err != nil { return err } + err = conn.CreateModule("git_stats", gitqlite.NewGitStatsModule()) if err != nil { return err } + err = conn.CreateModule("github_repos", ghqlite.NewReposModule()) + if err != nil { + return err + } + err = loadHelperFuncs(conn) if err != nil { return err @@ -60,10 +67,14 @@ func init() { type AskGit struct { db *sql.DB repoPath string + options *Options } type Options struct { - UseGitCLI bool + UseGitCLI bool + GitHubToken string + GitHubOrg string + GitHubUser string } // New creates an instance of AskGit @@ -79,7 +90,7 @@ func New(repoPath string, options *Options) (*AskGit, error) { return nil, err } - g := &AskGit{db: db, repoPath: repoPath} + g := &AskGit{db: db, repoPath: repoPath, options: options} err = g.ensureTables(options) if err != nil { @@ -132,10 +143,12 @@ func (a *AskGit) ensureTables(options *Options) error { return err } - // _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_repos(%s, '%s');", os.Getenv("GITHUB_ORG"), os.Getenv("GITHUB_TOKEN"))) - // if err != nil { - // return err - // } + if a.options.GitHubOrg != "" { + _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_repos(%s, '%s');", a.options.GitHubOrg, a.options.GitHubToken)) + if err != nil { + return err + } + } return nil } diff --git a/pkg/ghqlite/repo_iterator.go b/pkg/ghqlite/repo_iterator.go new file mode 100644 index 00000000..255e74be --- /dev/null +++ b/pkg/ghqlite/repo_iterator.go @@ -0,0 +1,229 @@ +package ghqlite + +import ( + "context" + "net/http" + "time" + + "github.com/google/go-github/github" + "golang.org/x/oauth2" + "golang.org/x/sync/errgroup" + "golang.org/x/time/rate" +) + +// RepoIterator iterates over GitHub repositories belonging to a single owner +type RepoIterator struct { + options *RepoIteratorOptions + owner string + ownerType OwnerType + currentPages []*page + totalPages *int + pageIndex int + repoIndex int +} + +type page struct { + repos []*github.Repository + res *github.Response +} + +type OwnerType string + +// TODO this behavior might need to be split out into two separate iterators +// one for orgs, one for users +const ( + OwnerTypeOrganization OwnerType = "Organization" + OwnerTypeUser OwnerType = "User" +) + +// RepoIteratorOptions determines how the iterator should behave +type RepoIteratorOptions struct { + Client *github.Client // GitHub API client to use when making requests + PerPage int // number of repos per page, GitHub API caps it at 100 + PreloadPages int // number of pages to "preload" - i.e. download concurrently + RateLimiter *rate.Limiter // rate limiter to use (tune to avoid hitting the API rate limits) +} + +// we define a custom http.Transport here that removes the Accept header +// see this issue for why it needs to be done this way: https://github.com/google/go-github/issues/999 +// the header is removed as the defaults used by go-github sometimes cause 502s from the GitHub API +type noAcceptTransport struct { + originalTransport http.RoundTripper +} + +func (t *noAcceptTransport) RoundTrip(r *http.Request) (*http.Response, error) { + r.Header.Del("Accept") + return t.originalTransport.RoundTrip(r) +} + +// NewRepoIterator creates a *RepoIterator from an owner (GitHub organization or user) +// oauth token and options. If the token is an empty string, no authentication is used +// note that unauthenticated requests are subject to a more stringent rate limit from the API +func NewRepoIterator(owner string, ownerType OwnerType, token string, options *RepoIteratorOptions) *RepoIterator { + if options.Client == nil { + if token != "" { // if token is specified setup an oauth http client + ts := oauth2.StaticTokenSource( + &oauth2.Token{AccessToken: token}, + ) + tc := oauth2.NewClient(context.Background(), ts) + + tc.Transport = &noAcceptTransport{tc.Transport} + options.Client = github.NewClient(tc) + } else { + options.Client = github.NewClient(nil) + } + } + if options.PreloadPages <= 0 { + // we want to make sure this value is always at least 1 - it's the number of pages + // the iterator will fetch concurrently + options.PreloadPages = 1 + } + if options.RateLimiter == nil { + // if the rate limiter is not provided, supply a default one + // https://docs.github.com/en/free-pro-team@latest/developers/apps/rate-limits-for-github-apps + options.RateLimiter = rate.NewLimiter(rate.Every(10*time.Second), 15) + } + return &RepoIterator{options, owner, ownerType, nil, nil, 0, 0} +} + +// fetchPage retrieves a single page of repos +func (iter *RepoIterator) fetchPage(p int) ([]*github.Repository, *github.Response, error) { + listOpt := github.ListOptions{Page: p} + + // use the user provided per page value, if it's greater than 0 + // otherwise don't set it and use the GitHub API default + if iter.options.PerPage > 0 { + listOpt.PerPage = iter.options.PerPage + } + + switch iter.ownerType { + case OwnerTypeOrganization: + opt := &github.RepositoryListByOrgOptions{ + ListOptions: listOpt, + } + return iter.options.Client.Repositories.ListByOrg(context.Background(), iter.owner, opt) + case OwnerTypeUser: + opt := &github.RepositoryListOptions{ + ListOptions: listOpt, + } + return iter.options.Client.Repositories.List(context.Background(), iter.owner, opt) + } + + // should never reach this point + return nil, nil, nil +} + +// fetchPages retries a *set* of pages given a nextPage +// if X is the nextPage and N is the preload pages value +// this will call fetchPage N times retrieving the X+N page +func (iter *RepoIterator) fetchPages(nextPage int) error { + + // retrieve the N pages concurrently + g := new(errgroup.Group) + for p := 0; p < iter.options.PreloadPages; p++ { + + // if we already know the total number of expected pages, and we're requesting a page outside of that + // break the loop, since we've reached the end + // if a current page is nil, it indicates we're over the last page + if iter.totalPages != nil && nextPage+p > *iter.totalPages { + iter.currentPages[p] = nil + break + } + + func(p int) { + g.Go(func() error { + // apply the rate limiter here + err := iter.options.RateLimiter.Wait(context.Background()) + if err != nil { + return err + } + + // fetch the page + repos, res, err := iter.fetchPage(nextPage + p) + if err != nil { + return err + } + + // TODO remove this commented line at some point, it can be useful for debugging rate limit issues + // fmt.Println(res.Rate.Limit, res.Rate.Remaining, res.Rate.Reset.Format(time.RFC3339)) + + // if there are repos returned + // if we've preloaded pages beyond the end of list, responses won't have repos + if len(repos) > 0 { + // store the new page we just retrieved + // in currentPages in the right place + newPage := page{repos, res} + iter.currentPages[p] = &newPage + } + + // if the response tells us what the last page is, set it + // this is used above to check whether additional pages should be fetched + if res.LastPage != 0 { + iter.totalPages = &res.LastPage + } + + return nil + }) + }(p) + } + + return g.Wait() +} + +// Next yields the next repository in the iterator +// it should return nil, nil if the iteration is complete and there are no more repositories to retrieve +func (iter *RepoIterator) Next() (*github.Repository, error) { + + // if we are at the very beginning of the iteration, there will be no (nil) currentPages + if iter.currentPages == nil { + // initialize the currentPages the size of the number of pages to preload + iter.currentPages = make([]*page, iter.options.PreloadPages) + // fetch the first pages (starting at 1, but fetching N pages where N = number to preload) + err := iter.fetchPages(1) + if err != nil { + return nil, err + } + } + + // if the repoIndex has exceeded the number of repos held in the current page by 1 + // increment to the next page and reset the repo index + if iter.repoIndex == len(iter.currentPages[iter.pageIndex].repos) { + iter.pageIndex++ + iter.repoIndex = 0 + } + + // if we've gone over the last page, however + if iter.pageIndex == len(iter.currentPages) { + // retrieve the last page we were on (but exhausted already) + lastPage := iter.currentPages[iter.pageIndex-1] + // if the API response for this previous page indicates there's no next page + // we're at the end of the iteration, return nil + next := lastPage.res.NextPage + if next == 0 { + return nil, nil + } + + // otherwise, reset the page index and fetch the next batch of pages + iter.pageIndex = 0 + err := iter.fetchPages(next) + if err != nil { + return nil, err + } + } + + // if we've reached a nil page + // which is possible, as part of the batch may have exceeded the total number of pages + // we're at the end of iteration + if iter.currentPages[iter.pageIndex] == nil { + return nil, nil + } + + currentPage := iter.currentPages[iter.pageIndex] + // fmt.Println(len(currentPage.repos)) + + // finally, pull out the current repo the indices point to to be returned + currentRepo := currentPage.repos[iter.repoIndex] + iter.repoIndex++ + + return currentRepo, nil +} diff --git a/pkg/ghqlite/repo_iterator_test.go b/pkg/ghqlite/repo_iterator_test.go new file mode 100644 index 00000000..5852c85e --- /dev/null +++ b/pkg/ghqlite/repo_iterator_test.go @@ -0,0 +1,35 @@ +package ghqlite + +import ( + "fmt" + "testing" +) + +func TestRepoIterator(t *testing.T) { + testCases := []*RepoIteratorOptions{ + {PerPage: 1, PreloadPages: 1}, + {PerPage: 5, PreloadPages: 2}, + {PerPage: 100, PreloadPages: 2}, + } + + minRepos := 10 + for i, options := range testCases { + iter := NewRepoIterator("augmentable-dev", OwnerTypeOrganization, "", options) + + t.Run(fmt.Sprintf("Case#%d", i), func(t *testing.T) { + for k := 0; k < minRepos; k++ { + repo, err := iter.Next() + if err != nil { + t.Fatal(err) + } + if repo == nil { + t.Fatalf("expected at least %d repos", minRepos) + } + if repo.GetName() == "" { + t.Fatalf("expected a repo name") + } + } + }) + + } +} diff --git a/pkg/ghqlite/repos_vtab.go b/pkg/ghqlite/repos_vtab.go new file mode 100644 index 00000000..3dc12bb7 --- /dev/null +++ b/pkg/ghqlite/repos_vtab.go @@ -0,0 +1,194 @@ +package ghqlite + +import ( + "encoding/json" + "fmt" + "time" + + "github.com/google/go-github/github" + "github.com/mattn/go-sqlite3" + "golang.org/x/time/rate" +) + +type ReposModule struct{} + +func NewReposModule() *ReposModule { + return &ReposModule{} +} + +func (m *ReposModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { + err := c.DeclareVTab(fmt.Sprintf(` + CREATE TABLE %s ( + id INT, + node_id TEXT, + name TEXT, + full_name TEXT, + owner TEXT, + private BOOL, + description TEXT, + fork BOOL, + homepage TEXT, + language TEXT, + forks_count INT, + stargazers_count INT, + watchers_count INT, + size INT, + default_branch TEXT, + open_issues_count INT, + topics TEXT, + has_issues BOOL, + has_projects BOOL, + has_wiki BOOL, + has_pages BOOL, + has_downloads BOOL, + archived BOOL, + pushed_at DATETIME, + created_at DATETIME, + updated_at DATETIME, + permissions TEXT + )`, args[0])) + if err != nil { + return nil, err + } + + return &reposTable{args[3], args[4][1 : len(args[4])-1]}, nil +} + +func (m *ReposModule) Connect(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { + return m.Create(c, args) +} + +func (m *ReposModule) DestroyModule() {} + +type reposTable struct { + owner string + token string +} + +func (v *reposTable) Open() (sqlite3.VTabCursor, error) { + return &reposCursor{v, nil, nil, false}, nil +} + +func (v *reposTable) BestIndex(cst []sqlite3.InfoConstraint, ob []sqlite3.InfoOrderBy) (*sqlite3.IndexResult, error) { + return &sqlite3.IndexResult{}, nil +} + +func (v *reposTable) Disconnect() error { return nil } +func (v *reposTable) Destroy() error { return nil } + +type reposCursor struct { + table *reposTable + iter *RepoIterator + currentRepo *github.Repository + eof bool +} + +func (vc *reposCursor) Column(c *sqlite3.SQLiteContext, col int) error { + repo := vc.currentRepo + switch col { + case 0: + c.ResultInt64(repo.GetID()) + case 1: + c.ResultText(repo.GetNodeID()) + case 2: + c.ResultText(repo.GetName()) + case 3: + c.ResultText(repo.GetFullName()) + case 4: + c.ResultText(repo.GetOwner().GetLogin()) + case 5: + c.ResultBool(repo.GetPrivate()) + case 6: + c.ResultText(repo.GetDescription()) + case 7: + c.ResultBool(repo.GetFork()) + case 8: + c.ResultText(repo.GetHomepage()) + case 9: + c.ResultText(repo.GetLanguage()) + case 10: + c.ResultInt(repo.GetForksCount()) + case 11: + c.ResultInt(repo.GetStargazersCount()) + case 12: + c.ResultInt(repo.GetWatchersCount()) + case 13: + c.ResultInt(repo.GetSize()) + case 14: + c.ResultText(repo.GetDefaultBranch()) + case 15: + c.ResultInt(repo.GetOpenIssuesCount()) + case 16: + str, err := json.Marshal(repo.Topics) + if err != nil { + return err + } + c.ResultText(string(str)) + case 17: + c.ResultBool(repo.GetHasIssues()) + case 18: + c.ResultBool(repo.GetHasProjects()) + case 19: + c.ResultBool(repo.GetHasWiki()) + case 20: + c.ResultBool(repo.GetHasPages()) + case 21: + c.ResultBool(repo.GetHasDownloads()) + case 22: + c.ResultBool(repo.GetArchived()) + case 23: + c.ResultText(repo.PushedAt.Format(time.RFC3339Nano)) + case 24: + c.ResultText(repo.CreatedAt.Format(time.RFC3339Nano)) + case 25: + c.ResultText(repo.UpdatedAt.Format(time.RFC3339Nano)) + case 26: + str, err := json.Marshal(repo.GetPermissions()) + if err != nil { + return err + } + c.ResultText(string(str)) + } + return nil +} + +func (vc *reposCursor) Filter(idxNum int, idxStr string, vals []interface{}) error { + var rateLimiter *rate.Limiter + if vc.table.token == "" { + rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 60) + } else { + rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 80) + } + iter := NewRepoIterator(vc.table.owner, OwnerTypeOrganization, vc.table.token, &RepoIteratorOptions{ + PerPage: 100, + PreloadPages: 10, + RateLimiter: rateLimiter, + }) + vc.iter = iter + return vc.Next() +} + +func (vc *reposCursor) Next() error { + nextRepo, err := vc.iter.Next() + if err != nil { + return err + } + if nextRepo == nil { + vc.eof = true + return nil + } + vc.currentRepo = nextRepo + return nil +} + +func (vc *reposCursor) EOF() bool { + return vc.eof +} + +func (vc *reposCursor) Rowid() (int64, error) { + return vc.currentRepo.GetID(), nil +} + +func (vc *reposCursor) Close() error { + return nil +} diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go new file mode 100644 index 00000000..03ffbd74 --- /dev/null +++ b/pkg/ghqlite/repos_vtab_test.go @@ -0,0 +1,3 @@ +package ghqlite + +// TODO From a339beb083bd35ac56f3bd4d44a88202aba4cdd6 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Fri, 27 Nov 2020 19:38:48 -0500 Subject: [PATCH 02/20] start setting up some testing --- pkg/ghqlite/ghqlite_test.go | 55 ++++++++++++++++++++++++++++++++++ pkg/ghqlite/repos_vtab_test.go | 12 +++++++- pkg/gitqlite/gitqlite_test.go | 4 +++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 pkg/ghqlite/ghqlite_test.go diff --git a/pkg/ghqlite/ghqlite_test.go b/pkg/ghqlite/ghqlite_test.go new file mode 100644 index 00000000..8aafecc7 --- /dev/null +++ b/pkg/ghqlite/ghqlite_test.go @@ -0,0 +1,55 @@ +package ghqlite + +import ( + "database/sql" + "fmt" + "os" + "testing" + + "github.com/mattn/go-sqlite3" +) + +var ( + DB *sql.DB +) + +func init() { + sql.Register("ghqlite", &sqlite3.SQLiteDriver{ + ConnectHook: func(conn *sqlite3.SQLiteConn) error { + err := conn.CreateModule("github_repos", NewReposModule()) + if err != nil { + return err + } + + return nil + }, + }) +} + +func TestMain(m *testing.M) { + err := initFixtureDB() + if err != nil { + panic(err) + } + code := m.Run() + err = DB.Close() + if err != nil { + panic(err) + } + os.Exit(code) +} + +func initFixtureDB() error { + db, err := sql.Open("ghqlite", ":memory:") + if err != nil { + return err + } + + _, err = db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_repos(%s, '%s');", "augmentable-dev", os.Getenv("GITHUB_TOKEN"))) + if err != nil { + return err + } + + DB = db + return nil +} diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go index 03ffbd74..f481bf66 100644 --- a/pkg/ghqlite/repos_vtab_test.go +++ b/pkg/ghqlite/repos_vtab_test.go @@ -1,3 +1,13 @@ package ghqlite -// TODO +import ( + "testing" +) + +func TestReposTable(t *testing.T) { + _, err := DB.Query("SELECT count(*) FROM repos") + if err != nil { + t.Fatal(err) + } + +} diff --git a/pkg/gitqlite/gitqlite_test.go b/pkg/gitqlite/gitqlite_test.go index fa5f6f2a..1fe260c2 100644 --- a/pkg/gitqlite/gitqlite_test.go +++ b/pkg/gitqlite/gitqlite_test.go @@ -66,6 +66,10 @@ func TestMain(m *testing.M) { } code := m.Run() close() + err = fixtureDB.Close() + if err != nil { + panic(err) + } os.Exit(code) } From 42c639633e1e8de56fc961e6722c80fabac981c5 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Fri, 27 Nov 2020 22:13:58 -0500 Subject: [PATCH 03/20] Update repos_vtab_test.go --- pkg/ghqlite/repos_vtab_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go index f481bf66..284bce41 100644 --- a/pkg/ghqlite/repos_vtab_test.go +++ b/pkg/ghqlite/repos_vtab_test.go @@ -5,7 +5,7 @@ import ( ) func TestReposTable(t *testing.T) { - _, err := DB.Query("SELECT count(*) FROM repos") + _, err := DB.Query("SELECT * FROM repos LIMIT 5") if err != nil { t.Fatal(err) } From 22225170d57b7290903fe23b55486cd8dcc8ac4d Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 09:31:36 -0500 Subject: [PATCH 04/20] rename some of the test helpers --- pkg/ghqlite/ghqlite_test.go | 33 +++++++++++++++++++++++++++++++ pkg/ghqlite/repos_vtab.go | 2 +- pkg/ghqlite/repos_vtab_test.go | 7 ++++++- pkg/gitqlite/git_branches_test.go | 4 ++-- pkg/gitqlite/git_files_test.go | 4 ++-- pkg/gitqlite/git_log_cli_test.go | 4 ++-- pkg/gitqlite/git_log_test.go | 6 +++--- pkg/gitqlite/git_stats_test.go | 8 ++++---- pkg/gitqlite/git_tags_test.go | 4 ++-- pkg/gitqlite/gitqlite_test.go | 13 ++++++------ 10 files changed, 61 insertions(+), 24 deletions(-) diff --git a/pkg/ghqlite/ghqlite_test.go b/pkg/ghqlite/ghqlite_test.go index 8aafecc7..576e73e9 100644 --- a/pkg/ghqlite/ghqlite_test.go +++ b/pkg/ghqlite/ghqlite_test.go @@ -53,3 +53,36 @@ func initFixtureDB() error { DB = db return nil } + +func GetRowContents(rows *sql.Rows) (colCount int, contents [][]string, err error) { + columns, err := rows.Columns() + if err != nil { + return colCount, nil, err + } + + pointers := make([]interface{}, len(columns)) + container := make([]sql.NullString, len(columns)) + + for i := range pointers { + pointers[i] = &container[i] + } + + for rows.Next() { + err = rows.Scan(pointers...) + if err != nil { + return colCount, nil, err + } + + r := make([]string, len(columns)) + for i, c := range container { + if c.Valid { + r[i] = c.String + } else { + r[i] = "NULL" + } + } + contents = append(contents, r) + } + return colCount, contents, err + +} diff --git a/pkg/ghqlite/repos_vtab.go b/pkg/ghqlite/repos_vtab.go index 3dc12bb7..ac0a6e0c 100644 --- a/pkg/ghqlite/repos_vtab.go +++ b/pkg/ghqlite/repos_vtab.go @@ -155,7 +155,7 @@ func (vc *reposCursor) Column(c *sqlite3.SQLiteContext, col int) error { func (vc *reposCursor) Filter(idxNum int, idxStr string, vals []interface{}) error { var rateLimiter *rate.Limiter if vc.table.token == "" { - rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 60) + rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 30) } else { rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 80) } diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go index 284bce41..4f548164 100644 --- a/pkg/ghqlite/repos_vtab_test.go +++ b/pkg/ghqlite/repos_vtab_test.go @@ -5,9 +5,14 @@ import ( ) func TestReposTable(t *testing.T) { - _, err := DB.Query("SELECT * FROM repos LIMIT 5") + rows, err := DB.Query("SELECT * FROM repos LIMIT 5") if err != nil { t.Fatal(err) } + _, contents, err := GetRowContents(rows) + if len(contents) != 5 { + t.Fatalf("expected: 5 rows, got: %d rows", len(contents)) + } + } diff --git a/pkg/gitqlite/git_branches_test.go b/pkg/gitqlite/git_branches_test.go index 7e873e15..3af5d11e 100644 --- a/pkg/gitqlite/git_branches_test.go +++ b/pkg/gitqlite/git_branches_test.go @@ -18,7 +18,7 @@ func TestBranches(t *testing.T) { t.Fatal(err) } - rowNum, contents, err := GetContents(branchRows) + rowNum, contents, err := GetRowContents(branchRows) if err != nil { t.Fatalf("err %d at row Number %d", err, rowNum) } @@ -60,7 +60,7 @@ func BenchmarkBranchCount(b *testing.B) { if err != nil { b.Fatal(err) } - rowNum, _, err := GetContents(rows) + rowNum, _, err := GetRowContents(rows) if err != nil { b.Fatalf("err %d at row Number %d", err, rowNum) } diff --git a/pkg/gitqlite/git_files_test.go b/pkg/gitqlite/git_files_test.go index 5dc9fd41..724fc32b 100644 --- a/pkg/gitqlite/git_files_test.go +++ b/pkg/gitqlite/git_files_test.go @@ -61,7 +61,7 @@ func TestFileColumns(t *testing.T) { t.Fatalf("expected %d columns got : %d", 6, len(columns)) } - _, contents, err := GetContents(columnQuery) + _, contents, err := GetRowContents(columnQuery) if err != nil { t.Fatal(err) } @@ -134,7 +134,7 @@ func TestFileByID(t *testing.T) { t.Fatal(err) } - _, contents, err := GetContents(rows) + _, contents, err := GetRowContents(rows) if err != nil { t.Fatal(err) } diff --git a/pkg/gitqlite/git_log_cli_test.go b/pkg/gitqlite/git_log_cli_test.go index 3016c2ca..017b68b4 100644 --- a/pkg/gitqlite/git_log_cli_test.go +++ b/pkg/gitqlite/git_log_cli_test.go @@ -52,7 +52,7 @@ func TestCommitCounts(t *testing.T) { if err != nil { t.Fatal(err) } - rowNum, contents, err := GetContents(rows) + rowNum, contents, err := GetRowContents(rows) if err != nil { t.Fatalf("err %d at row Number %d", err, rowNum) } @@ -82,7 +82,7 @@ func BenchmarkCLICommitCounts(b *testing.B) { if err != nil { b.Fatal(err) } - rowNum, _, err := GetContents(rows) + rowNum, _, err := GetRowContents(rows) if err != nil { b.Fatalf("err %d at row Number %d", err, rowNum) } diff --git a/pkg/gitqlite/git_log_test.go b/pkg/gitqlite/git_log_test.go index 9464736a..d3d15b94 100644 --- a/pkg/gitqlite/git_log_test.go +++ b/pkg/gitqlite/git_log_test.go @@ -55,7 +55,7 @@ func TestCommits(t *testing.T) { if err != nil { t.Fatal(err) } - rowNum, contents, err := GetContents(rows) + rowNum, contents, err := GetRowContents(rows) if err != nil { t.Fatalf("err %d at row Number %d", err, rowNum) } @@ -98,7 +98,7 @@ func TestCommitByID(t *testing.T) { } defer rows.Close() - _, contents, err := GetContents(rows) + _, contents, err := GetRowContents(rows) if err != nil { t.Fatal(err) } @@ -120,7 +120,7 @@ func BenchmarkCommitCounts(b *testing.B) { if err != nil { b.Fatal(err) } - rowNum, _, err := GetContents(rows) + rowNum, _, err := GetRowContents(rows) if err != nil { b.Fatalf("err %d at row Number %d", err, rowNum) } diff --git a/pkg/gitqlite/git_stats_test.go b/pkg/gitqlite/git_stats_test.go index 7264072d..0bde5239 100644 --- a/pkg/gitqlite/git_stats_test.go +++ b/pkg/gitqlite/git_stats_test.go @@ -32,7 +32,7 @@ func TestStatsTable(t *testing.T) { } defer rows.Close() - _, contents, err := GetContents(rows) + _, contents, err := GetRowContents(rows) if err != nil { t.Fatal(err) } @@ -50,7 +50,7 @@ func TestStatsTableCommitIDIndex(t *testing.T) { } defer rows.Close() - _, contents, err := GetContents(rows) + _, contents, err := GetRowContents(rows) if err != nil { t.Fatal(err) } @@ -94,7 +94,7 @@ func TestStatsTotals(t *testing.T) { } defer rows.Close() - _, contents, err := GetContents(rows) + _, contents, err := GetRowContents(rows) if err != nil { t.Fatal(err) } @@ -124,7 +124,7 @@ func BenchmarkStats(b *testing.B) { if err != nil { b.Fatal(err) } - rowNum, _, err := GetContents(rows) + rowNum, _, err := GetRowContents(rows) if err != nil { b.Fatalf("err %d at row Number %d", err, rowNum) } diff --git a/pkg/gitqlite/git_tags_test.go b/pkg/gitqlite/git_tags_test.go index 1ec59043..4c66433b 100644 --- a/pkg/gitqlite/git_tags_test.go +++ b/pkg/gitqlite/git_tags_test.go @@ -15,7 +15,7 @@ func TestTags(t *testing.T) { t.Fatal(err) } - rowNum, contents, err := GetContents(tagRows) + rowNum, contents, err := GetRowContents(tagRows) if err != nil { t.Fatalf("err %d at row Number %d", err, rowNum) } @@ -48,7 +48,7 @@ func BenchmarkTagsCounts(b *testing.B) { if err != nil { b.Fatal(err) } - rowNum, _, err := GetContents(rows) + rowNum, _, err := GetRowContents(rows) if err != nil { b.Fatalf("err %d at row Number %d", err, rowNum) } diff --git a/pkg/gitqlite/gitqlite_test.go b/pkg/gitqlite/gitqlite_test.go index 1fe260c2..ebafd6eb 100644 --- a/pkg/gitqlite/gitqlite_test.go +++ b/pkg/gitqlite/gitqlite_test.go @@ -143,16 +143,15 @@ func GetRowsCount(rows *sql.Rows) int { return count } -func GetContents(rows *sql.Rows) (int, [][]string, error) { - count := 0 + +func GetRowContents(rows *sql.Rows) (colCount int, contents [][]string, err error) { columns, err := rows.Columns() if err != nil { - return count, nil, err + return colCount, nil, err } pointers := make([]interface{}, len(columns)) container := make([]sql.NullString, len(columns)) - var ret [][]string for i := range pointers { pointers[i] = &container[i] @@ -161,7 +160,7 @@ func GetContents(rows *sql.Rows) (int, [][]string, error) { for rows.Next() { err = rows.Scan(pointers...) if err != nil { - return count, nil, err + return colCount, nil, err } r := make([]string, len(columns)) @@ -172,8 +171,8 @@ func GetContents(rows *sql.Rows) (int, [][]string, error) { r[i] = "NULL" } } - ret = append(ret, r) + contents = append(contents, r) } - return count, ret, err + return colCount, contents, err } From 7aca057afe924a3c4a1d992114ae86e8eaf3d429 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 09:35:43 -0500 Subject: [PATCH 05/20] fix lint issue --- pkg/ghqlite/repos_vtab_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go index 4f548164..ed9ed79b 100644 --- a/pkg/ghqlite/repos_vtab_test.go +++ b/pkg/ghqlite/repos_vtab_test.go @@ -11,6 +11,10 @@ func TestReposTable(t *testing.T) { } _, contents, err := GetRowContents(rows) + if err != nil { + t.Fatal(err) + } + if len(contents) != 5 { t.Fatalf("expected: 5 rows, got: %d rows", len(contents)) } From 282b3b0086db5fb67efac0fbf133abaa69c6fc52 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 09:49:57 -0500 Subject: [PATCH 06/20] allow user vs org to be specified when scanning repos --- cmd/root.go | 5 ++++- pkg/askgit/askgit.go | 16 ++++++++++++++-- pkg/ghqlite/ghqlite_test.go | 2 +- pkg/ghqlite/repos_vtab.go | 17 ++++++++++------- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index e231424f..fab8a14b 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -22,6 +22,7 @@ var ( cui bool presetQuery string githubOrg string + githubUser string ) func init() { @@ -30,7 +31,8 @@ func init() { rootCmd.PersistentFlags().BoolVar(&useGitCLI, "use-git-cli", false, "whether to use the locally installed git command (if it's available). Defaults to false.") rootCmd.PersistentFlags().BoolVarP(&cui, "interactive", "i", false, "whether to run in interactive mode, which displays a terminal UI") rootCmd.PersistentFlags().StringVar(&presetQuery, "preset", "", "used to pick a preset query") - rootCmd.PersistentFlags().StringVar(&githubOrg, "github-org", "", "used to pick a preset query") + rootCmd.PersistentFlags().StringVar(&githubOrg, "github-org", "", "used to specify a GitHub *org* to query against when scanning repos") + rootCmd.PersistentFlags().StringVar(&githubUser, "github-user", "", "used to specify a GitHub *user* to query against when scanning repos") } func handleError(err error) { @@ -112,6 +114,7 @@ var rootCmd = &cobra.Command{ ag, err := askgit.New(dir, &askgit.Options{ UseGitCLI: useGitCLI, GitHubOrg: githubOrg, + GitHubUser: githubUser, GitHubToken: os.Getenv("GITHUB_TOKEN"), }) handleError(err) diff --git a/pkg/askgit/askgit.go b/pkg/askgit/askgit.go index 815611fc..a6691757 100644 --- a/pkg/askgit/askgit.go +++ b/pkg/askgit/askgit.go @@ -49,7 +49,12 @@ func init() { return err } - err = conn.CreateModule("github_repos", ghqlite.NewReposModule()) + err = conn.CreateModule("github_org_repos", ghqlite.NewReposModule(ghqlite.OwnerTypeOrganization)) + if err != nil { + return err + } + + err = conn.CreateModule("github_user_repos", ghqlite.NewReposModule(ghqlite.OwnerTypeUser)) if err != nil { return err } @@ -144,7 +149,14 @@ func (a *AskGit) ensureTables(options *Options) error { } if a.options.GitHubOrg != "" { - _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_repos(%s, '%s');", a.options.GitHubOrg, a.options.GitHubToken)) + _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_org_repos(%s, '%s');", a.options.GitHubOrg, a.options.GitHubToken)) + if err != nil { + return err + } + } + + if a.options.GitHubUser != "" { + _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_user_repos(%s, '%s');", a.options.GitHubUser, a.options.GitHubToken)) if err != nil { return err } diff --git a/pkg/ghqlite/ghqlite_test.go b/pkg/ghqlite/ghqlite_test.go index 576e73e9..a864b357 100644 --- a/pkg/ghqlite/ghqlite_test.go +++ b/pkg/ghqlite/ghqlite_test.go @@ -16,7 +16,7 @@ var ( func init() { sql.Register("ghqlite", &sqlite3.SQLiteDriver{ ConnectHook: func(conn *sqlite3.SQLiteConn) error { - err := conn.CreateModule("github_repos", NewReposModule()) + err := conn.CreateModule("github_repos", NewReposModule(OwnerTypeOrganization)) if err != nil { return err } diff --git a/pkg/ghqlite/repos_vtab.go b/pkg/ghqlite/repos_vtab.go index ac0a6e0c..ae0562bd 100644 --- a/pkg/ghqlite/repos_vtab.go +++ b/pkg/ghqlite/repos_vtab.go @@ -10,10 +10,12 @@ import ( "golang.org/x/time/rate" ) -type ReposModule struct{} +type ReposModule struct { + ownerType OwnerType +} -func NewReposModule() *ReposModule { - return &ReposModule{} +func NewReposModule(ownerType OwnerType) *ReposModule { + return &ReposModule{ownerType} } func (m *ReposModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { @@ -51,7 +53,7 @@ func (m *ReposModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab return nil, err } - return &reposTable{args[3], args[4][1 : len(args[4])-1]}, nil + return &reposTable{args[3], m.ownerType, args[4][1 : len(args[4])-1]}, nil } func (m *ReposModule) Connect(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { @@ -61,8 +63,9 @@ func (m *ReposModule) Connect(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTa func (m *ReposModule) DestroyModule() {} type reposTable struct { - owner string - token string + owner string + ownerType OwnerType + token string } func (v *reposTable) Open() (sqlite3.VTabCursor, error) { @@ -159,7 +162,7 @@ func (vc *reposCursor) Filter(idxNum int, idxStr string, vals []interface{}) err } else { rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 80) } - iter := NewRepoIterator(vc.table.owner, OwnerTypeOrganization, vc.table.token, &RepoIteratorOptions{ + iter := NewRepoIterator(vc.table.owner, vc.table.ownerType, vc.table.token, &RepoIteratorOptions{ PerPage: 100, PreloadPages: 10, RateLimiter: rateLimiter, From 6899298530e8f7cccd450f553f0a5377307791a3 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 09:58:27 -0500 Subject: [PATCH 07/20] add a test case for user repos --- pkg/ghqlite/ghqlite_test.go | 14 ++++++++++++-- pkg/ghqlite/repos_vtab_test.go | 21 +++++++++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/pkg/ghqlite/ghqlite_test.go b/pkg/ghqlite/ghqlite_test.go index a864b357..1c62a2d1 100644 --- a/pkg/ghqlite/ghqlite_test.go +++ b/pkg/ghqlite/ghqlite_test.go @@ -16,7 +16,12 @@ var ( func init() { sql.Register("ghqlite", &sqlite3.SQLiteDriver{ ConnectHook: func(conn *sqlite3.SQLiteConn) error { - err := conn.CreateModule("github_repos", NewReposModule(OwnerTypeOrganization)) + err := conn.CreateModule("github_org_repos", NewReposModule(OwnerTypeOrganization)) + if err != nil { + return err + } + + err = conn.CreateModule("github_user_repos", NewReposModule(OwnerTypeUser)) if err != nil { return err } @@ -45,7 +50,12 @@ func initFixtureDB() error { return err } - _, err = db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_repos(%s, '%s');", "augmentable-dev", os.Getenv("GITHUB_TOKEN"))) + _, err = db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS org_repos USING github_org_repos(%s, '%s');", "augmentable-dev", os.Getenv("GITHUB_TOKEN"))) + if err != nil { + return err + } + + _, err = db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS user_repos USING github_user_repos(%s, '%s');", "patrickdevivo", os.Getenv("GITHUB_TOKEN"))) if err != nil { return err } diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go index ed9ed79b..e39757ba 100644 --- a/pkg/ghqlite/repos_vtab_test.go +++ b/pkg/ghqlite/repos_vtab_test.go @@ -4,8 +4,25 @@ import ( "testing" ) -func TestReposTable(t *testing.T) { - rows, err := DB.Query("SELECT * FROM repos LIMIT 5") +func TestOrgReposTable(t *testing.T) { + rows, err := DB.Query("SELECT * FROM org_repos LIMIT 5") + if err != nil { + t.Fatal(err) + } + + _, contents, err := GetRowContents(rows) + if err != nil { + t.Fatal(err) + } + + if len(contents) != 5 { + t.Fatalf("expected: 5 rows, got: %d rows", len(contents)) + } + +} + +func TestUserReposTable(t *testing.T) { + rows, err := DB.Query("SELECT * FROM user_repos LIMIT 5") if err != nil { t.Fatal(err) } From 137317e0e42841df22ae31d3d4e49745fd09325b Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 12:45:24 -0500 Subject: [PATCH 08/20] add some notes to the README --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fa0cc226..fcdc906b 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,9 @@ It can execute queries that look like: -- how many commits have been authored by user@email.com? SELECT count(*) FROM commits WHERE author_email = 'user@email.com' ``` + +There's also preliminary support for executing queries against the GitHub API. + More in-depth examples and documentation can be found below. ## Installation @@ -101,7 +104,11 @@ See `-h` for all the options. ### Tables -#### `commits` +#### Local Git Repository + +When a repo is specified (either by the `--repo` flag or from the current directory), the following tables are available to query. + +##### `commits` Similar to `git log`, the `commits` table includes all commits in the history of the currently checked out commit. @@ -120,7 +127,7 @@ Similar to `git log`, the `commits` table includes all commits in the history of | parent_count | INT | | tree_id | TEXT | -#### `files` +##### `files` The `files` table iterates over _ALL_ the files in a commit history, by default from what's checked out in the repository. The full table is every file in every tree of a commit history. @@ -136,7 +143,7 @@ Use the `commit_id` column to filter for files that belong to the work tree of a | executable | BOOL | -#### `branches` +##### `branches` | Column | Type | |--------|------| @@ -145,7 +152,7 @@ Use the `commit_id` column to filter for files that belong to the work tree of a | target | TEXT | | head | BOOL | -#### `tags` +##### `tags` | Column | Type | |--------------|------| @@ -158,7 +165,7 @@ Use the `commit_id` column to filter for files that belong to the work tree of a | message | TEXT | | target_type | TEXT | -#### `stats` +##### `stats` | Column | Type | |-----------|------| @@ -167,6 +174,51 @@ Use the `commit_id` column to filter for files that belong to the work tree of a | additions | INT | | deletions | INT | +#### GitHub Tables + +**This functionality is under development and likely to change** + +The following GitHub tables execute API requests to retrieve data during query execution. +As such, you should ensure the `GITHUB_TOKEN` environment variable is set so that authenticated API requests are made. +Unauthenticated API requests (no `GITHUB_TOKEN` set) are subject to a stricter rate limit by GitHub, and may take longer to execute (as query execution will try to respect the rate limit). + +##### `repos` + +This table will only be available if either `--github-org` or `--github-user` are provided. +If both are provided, `--github-org` will be used. +Each specifies the GitHub org or user to query repositories from when scanning the `repos` table. +In other words, this table returns the repositories belonging to a given GitHub organzation or user. + +| Column | Type | +|-------------------|----------| +| id | INT | +| node_id | TEXT | +| name | TEXT | +| full_name | TEXT | +| owner | TEXT | +| private | BOOL | +| description | TEXT | +| fork | BOOL | +| homepage | TEXT | +| language | TEXT | +| forks_count | INT | +| stargazers_count | INT | +| watchers_count | INT | +| size | INT | +| default_branch | TEXT | +| open_issues_count | INT | +| topics | TEXT | +| has_issues | BOOL | +| has_projects | BOOL | +| has_wiki | BOOL | +| has_pages | BOOL | +| has_downloads | BOOL | +| archived | BOOL | +| pushed_at | DATETIME | +| created_at | DATETIME | +| updated_at | DATETIME | +| permissions | TEXT | + ### Example Queries This will return all commits in the history of the currently checked out branch/commit of the repo. From 43d88d87c20ae34ab19901ef03f283d349915929 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 12:50:45 -0500 Subject: [PATCH 09/20] some wording changes --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fcdc906b..c338a373 100644 --- a/README.md +++ b/README.md @@ -178,9 +178,9 @@ Use the `commit_id` column to filter for files that belong to the work tree of a **This functionality is under development and likely to change** -The following GitHub tables execute API requests to retrieve data during query execution. -As such, you should ensure the `GITHUB_TOKEN` environment variable is set so that authenticated API requests are made. -Unauthenticated API requests (no `GITHUB_TOKEN` set) are subject to a stricter rate limit by GitHub, and may take longer to execute (as query execution will try to respect the rate limit). +The following GitHub tables make GitHub API requests to retrieve data during query execution. +As such, you should ensure the `GITHUB_TOKEN` environment variable is set so that API requests are authenticated. +Unauthenticated API requests (no `GITHUB_TOKEN` set) are subject to a stricter rate limit by GitHub, and may take much longer to execute (as query execution will try to respect the applicable rate limit). ##### `repos` From 96a175585a0e75a7d319c84a8bea6f8464d0f66c Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 12:55:11 -0500 Subject: [PATCH 10/20] more wording --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c338a373..765b3af9 100644 --- a/README.md +++ b/README.md @@ -178,16 +178,16 @@ Use the `commit_id` column to filter for files that belong to the work tree of a **This functionality is under development and likely to change** -The following GitHub tables make GitHub API requests to retrieve data during query execution. +The following tables make GitHub API requests to retrieve data during query execution. As such, you should ensure the `GITHUB_TOKEN` environment variable is set so that API requests are authenticated. -Unauthenticated API requests (no `GITHUB_TOKEN` set) are subject to a stricter rate limit by GitHub, and may take much longer to execute (as query execution will try to respect the applicable rate limit). +Unauthenticated API requests (no `GITHUB_TOKEN`) are subject to a stricter rate limit by GitHub, and may take longer to execute (query execution will try to respect the applicable rate limit). ##### `repos` This table will only be available if either `--github-org` or `--github-user` are provided. If both are provided, `--github-org` will be used. -Each specifies the GitHub org or user to query repositories from when scanning the `repos` table. -In other words, this table returns the repositories belonging to a given GitHub organzation or user. +Each specifies the org or user to query repositories from when scanning the `repos` table. +In other words, this table returns the repositories belonging to a GitHub organization or user, with the following schema: | Column | Type | |-------------------|----------| From acebd07ef13a1584a95ba0530a1d65cbbf4128a2 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 13:03:47 -0500 Subject: [PATCH 11/20] fix up how interactive mode accesses askgit instance --- cmd/root.go | 10 ++++++---- pkg/tui/conf.go | 9 ++++----- pkg/tui/keybind.go | 4 ---- pkg/tui/tui.go | 19 +++++++------------ 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index fab8a14b..f279c1ca 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -107,10 +107,7 @@ var rootCmd = &cobra.Command{ if err != nil { handleError(err) } - if cui { - tui.RunGUI(repo, dir, query) - return - } + ag, err := askgit.New(dir, &askgit.Options{ UseGitCLI: useGitCLI, GitHubOrg: githubOrg, @@ -119,6 +116,11 @@ var rootCmd = &cobra.Command{ }) handleError(err) + if cui { + tui.RunGUI(ag, query) + return + } + rows, err := ag.DB().Query(query) handleError(err) diff --git a/pkg/tui/conf.go b/pkg/tui/conf.go index 86cdebd4..86808063 100644 --- a/pkg/tui/conf.go +++ b/pkg/tui/conf.go @@ -12,11 +12,10 @@ var ( FROM commits GROUP BY author_email ORDER BY count(*) DESC`, - "author-stats": `SELECT - count(*) AS commits, SUM(additions) AS additions, SUM(deletions) AS deletions, author_email - FROM commits - GROUP BY author_email - ORDER BY commits`, + "author-stats": `SELECT count(DISTINCT commits.id) AS commits, SUM(additions) AS additions, SUM(deletions) AS deletions, author_email + FROM commits LEFT JOIN stats ON commits.id = stats.commit_id + WHERE commits.parent_count < 2 + GROUP BY author_email ORDER BY commits`, "author-commits-dow": `SELECT count(*) AS commits, diff --git a/pkg/tui/keybind.go b/pkg/tui/keybind.go index 7ad525a3..2d53b77f 100644 --- a/pkg/tui/keybind.go +++ b/pkg/tui/keybind.go @@ -133,10 +133,6 @@ func RunQuery(g *gocui.Gui, v *gocui.View) error { return err } query = input.Buffer() - ag, err := askgit.New(repoPath, &askgit.Options{}) - if err != nil { - return err - } start := time.Now() rows, err := ag.DB().Query(query) if err != nil { diff --git a/pkg/tui/tui.go b/pkg/tui/tui.go index 213c970d..98cb4bd5 100644 --- a/pkg/tui/tui.go +++ b/pkg/tui/tui.go @@ -10,10 +10,10 @@ import ( ) var ( - active = 0 - query = "" - repoPath = "" - usrInpt = "" + active = 0 + query = "" + usrInpt = "" + ag *askgit.AskGit ) func layout(g *gocui.Gui) error { @@ -46,11 +46,7 @@ func layout(g *gocui.Gui) error { return err } v.Title = "Info" - git, err := askgit.New(repoPath, &askgit.Options{}) - if err != nil { - return err - } - err = DisplayInformation(g, git, 0) + err = DisplayInformation(g, ag, 0) if err != nil { return err } @@ -83,15 +79,14 @@ func test(g *gocui.Gui, v *gocui.View) error { func quit(g *gocui.Gui, v *gocui.View) error { return gocui.ErrQuit } -func RunGUI(repo string, directory string, q string) { +func RunGUI(askgitInstance *askgit.AskGit, q string) { g, err := gocui.NewGui(gocui.OutputNormal) if err != nil { log.Panicln(err) } defer g.Close() query = q - repoPath = directory - usrInpt = repo + ag = askgitInstance g.Highlight = true g.Cursor = true g.SelFgColor = gocui.ColorGreen From 58dc5a0c565f29f5a969e209fb45617280f747ed Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 13:12:51 -0500 Subject: [PATCH 12/20] move the stats table further up in the README --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 765b3af9..de493c6e 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,15 @@ Similar to `git log`, the `commits` table includes all commits in the history of | parent_count | INT | | tree_id | TEXT | +##### `stats` + +| Column | Type | +|-----------|------| +| commit_id | TEXT | +| file | TEXT | +| additions | INT | +| deletions | INT | + ##### `files` The `files` table iterates over _ALL_ the files in a commit history, by default from what's checked out in the repository. @@ -165,15 +174,6 @@ Use the `commit_id` column to filter for files that belong to the work tree of a | message | TEXT | | target_type | TEXT | -##### `stats` - -| Column | Type | -|-----------|------| -| commit_id | TEXT | -| file | TEXT | -| additions | INT | -| deletions | INT | - #### GitHub Tables **This functionality is under development and likely to change** From 120292dbfc2eee9682b4c59cb4971a4fc8767ec3 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 14:13:58 -0500 Subject: [PATCH 13/20] add a TODO --- pkg/askgit/askgit.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/askgit/askgit.go b/pkg/askgit/askgit.go index a6691757..8d7d7830 100644 --- a/pkg/askgit/askgit.go +++ b/pkg/askgit/askgit.go @@ -84,6 +84,12 @@ type Options struct { // New creates an instance of AskGit func New(repoPath string, options *Options) (*AskGit, error) { + // TODO with the addition of the GitHub API virtual tables, repoPath should no longer be required for creating + // as *AskGit instance, as the caller may just be interested in querying against the GitHub API (or some other + // to be define virtual table that doesn't need a repo on disk). + // This should be reformulated, as it means currently the askgit command requires a local git repo, even if the query + // only executes agains the GitHub API + // see https://github.com/mattn/go-sqlite3/issues/204 // also mentioned in the FAQ of the README: https://github.com/mattn/go-sqlite3#faq db, err := sql.Open("askgit", fmt.Sprintf("file:%x?mode=memory&cache=shared", md5.Sum([]byte(repoPath)))) From 72b1ced37cfcc6bf103038362554a8080f0db153 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 28 Nov 2020 14:14:29 -0500 Subject: [PATCH 14/20] include the json1 extension, since some columns in the GitHub API tables return JSON strings --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 40edefb1..156f8222 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -gotags = "sqlite_vtable,static,system_libgit2" +gotags = "sqlite_vtable,sqlite_json1,static,system_libgit2" vet: go vet -v -tags=$(gotags) ./... From 6bd744ade03dbc593f6263a70ae7da78ab50f844 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 12 Dec 2020 19:08:07 -0500 Subject: [PATCH 15/20] upgrade libgit2/git2go, add and amend github vtabs, misc fixes lot's going on in this one... --- .github/workflows/test.yml | 2 +- .gitignore | 1 + .vscode/settings.json | 4 +- cmd/root.go | 8 +- go.mod | 4 +- go.sum | 8 +- pkg/askgit/askgit.go | 42 +-- pkg/askgit/askgit_test.go | 2 +- pkg/ghqlite/ghqlite_test.go | 26 +- pkg/ghqlite/github_iterator.go | 200 +++++++++++++ pkg/ghqlite/pull_request_iterator.go | 59 ++++ pkg/ghqlite/pull_request_iterator_test.go | 36 +++ pkg/ghqlite/pull_requests_vtab.go | 348 ++++++++++++++++++++++ pkg/ghqlite/pull_requests_vtab_test.go | 22 ++ pkg/ghqlite/repo_iterator.go | 209 ++----------- pkg/ghqlite/repo_iterator_test.go | 13 +- pkg/ghqlite/repos_vtab.go | 124 +++++--- pkg/ghqlite/repos_vtab_test.go | 4 +- pkg/gitlog/parse_test.go | 2 +- pkg/gitqlite/git_branches.go | 2 +- pkg/gitqlite/git_branches_test.go | 2 +- pkg/gitqlite/git_file_iter.go | 2 +- pkg/gitqlite/git_files.go | 2 +- pkg/gitqlite/git_files_test.go | 2 +- pkg/gitqlite/git_log.go | 2 +- pkg/gitqlite/git_log_cli_test.go | 2 +- pkg/gitqlite/git_log_test.go | 2 +- pkg/gitqlite/git_stats.go | 2 +- pkg/gitqlite/git_stats_iter.go | 2 +- pkg/gitqlite/git_tags.go | 2 +- pkg/gitqlite/gitqlite_test.go | 2 +- scripts/install_libgit2.sh | 2 +- 32 files changed, 854 insertions(+), 286 deletions(-) create mode 100644 pkg/ghqlite/github_iterator.go create mode 100644 pkg/ghqlite/pull_request_iterator.go create mode 100644 pkg/ghqlite/pull_request_iterator_test.go create mode 100644 pkg/ghqlite/pull_requests_vtab.go create mode 100644 pkg/ghqlite/pull_requests_vtab_test.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a6577a4e..3d4df145 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,7 +36,7 @@ jobs: run: | git clone https://github.com/libgit2/libgit2.git ~/libgit2 cd ~/libgit2 - git checkout v1.0.1 + git checkout v1.1.0 mkdir build && cd build sudo cmake .. -DBUILD_CLAR=0 sudo cmake --build . --target install diff --git a/.gitignore b/.gitignore index 2d830686..2f22e4d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ coverage.out +.env diff --git a/.vscode/settings.json b/.vscode/settings.json index 0cf54c28..c930e0a0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,5 +10,7 @@ ], "go.toolsEnvVars": { "GOFLAGS": "-tags='sqlite_vtable'" - } + }, + "go.testTimeout": "120s", + "go.testEnvFile": "${workspaceFolder}/.env" } diff --git a/cmd/root.go b/cmd/root.go index f279c1ca..928cc4ac 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -10,7 +10,7 @@ import ( "github.com/augmentable-dev/askgit/pkg/askgit" "github.com/augmentable-dev/askgit/pkg/tui" "github.com/gitsight/go-vcsurl" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/spf13/cobra" ) @@ -21,8 +21,6 @@ var ( useGitCLI bool cui bool presetQuery string - githubOrg string - githubUser string ) func init() { @@ -31,8 +29,6 @@ func init() { rootCmd.PersistentFlags().BoolVar(&useGitCLI, "use-git-cli", false, "whether to use the locally installed git command (if it's available). Defaults to false.") rootCmd.PersistentFlags().BoolVarP(&cui, "interactive", "i", false, "whether to run in interactive mode, which displays a terminal UI") rootCmd.PersistentFlags().StringVar(&presetQuery, "preset", "", "used to pick a preset query") - rootCmd.PersistentFlags().StringVar(&githubOrg, "github-org", "", "used to specify a GitHub *org* to query against when scanning repos") - rootCmd.PersistentFlags().StringVar(&githubUser, "github-user", "", "used to specify a GitHub *user* to query against when scanning repos") } func handleError(err error) { @@ -110,8 +106,6 @@ var rootCmd = &cobra.Command{ ag, err := askgit.New(dir, &askgit.Options{ UseGitCLI: useGitCLI, - GitHubOrg: githubOrg, - GitHubUser: githubUser, GitHubToken: os.Getenv("GITHUB_TOKEN"), }) handleError(err) diff --git a/go.mod b/go.mod index e87c4da4..5e26f66c 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/google/go-querystring v1.0.0 // indirect github.com/jroimartin/gocui v0.4.0 github.com/kr/text v0.2.0 // indirect - github.com/libgit2/git2go/v30 v30.2.2 + github.com/libgit2/git2go/v31 v31.3.4 github.com/mattn/go-runewidth v0.0.9 // indirect github.com/mattn/go-sqlite3 v1.14.4 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect @@ -23,3 +23,5 @@ require ( gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect gopkg.in/yaml.v2 v2.3.0 // indirect ) + +replace github.com/mattn/go-sqlite3 v1.14.4 => github.com/patrickdevivo/go-sqlite3 v1.14.6-0.20201211024840-146d4a910383 diff --git a/go.sum b/go.sum index d4f92215..13ad9405 100644 --- a/go.sum +++ b/go.sum @@ -122,8 +122,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/libgit2/git2go/v30 v30.2.2 h1:6TzX6xDpLH24gexY6dcAt+56mFbg8gs9p2C8ZThKNGc= -github.com/libgit2/git2go/v30 v30.2.2/go.mod h1:bEqWPWaJjDpnkerA2FlyUdsuhc5/3UPBjYJ6SV0X3gY= +github.com/libgit2/git2go/v31 v31.3.4 h1:uwm+k+cgxIpyJtHxafYxdIDaO3EAdZJfGPO6pxSxA+c= +github.com/libgit2/git2go/v31 v31.3.4/go.mod h1:mnc0hPGPs0nDi9INrurTpioeRzje9DvSXqON/+JEhwY= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= @@ -131,8 +131,6 @@ github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+tw github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-sqlite3 v1.14.4 h1:4rQjbDxdu9fSgI/r3KN72G3c2goxknAqHHgPWWs8UlI= -github.com/mattn/go-sqlite3 v1.14.4/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGwNd0Lj+XmI= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= @@ -156,6 +154,8 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/patrickdevivo/go-sqlite3 v1.14.6-0.20201211024840-146d4a910383 h1:yIH5Hs1SaI3AbLVf5lmUHLBDcMUjTxq2MlnnWfr7SuI= +github.com/patrickdevivo/go-sqlite3 v1.14.6-0.20201211024840-146d4a910383/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/pkg/askgit/askgit.go b/pkg/askgit/askgit.go index 8d7d7830..c00f22e6 100644 --- a/pkg/askgit/askgit.go +++ b/pkg/askgit/askgit.go @@ -4,16 +4,19 @@ import ( "crypto/md5" "database/sql" "fmt" + "os" "os/exec" "os/user" "path" "strings" + "time" "github.com/augmentable-dev/askgit/pkg/ghqlite" "github.com/augmentable-dev/askgit/pkg/gitqlite" "github.com/gitsight/go-vcsurl" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" + "golang.org/x/time/rate" ) func init() { @@ -49,12 +52,29 @@ func init() { return err } - err = conn.CreateModule("github_org_repos", ghqlite.NewReposModule(ghqlite.OwnerTypeOrganization)) + githubToken := os.Getenv("GITHUB_TOKEN") + rateLimiter := rate.NewLimiter(rate.Every(2*time.Second), 1) + + err = conn.CreateModule("github_org_repos", ghqlite.NewReposModule(ghqlite.OwnerTypeOrganization, ghqlite.ReposModuleOptions{ + Token: githubToken, + RateLimiter: rateLimiter, + })) + if err != nil { + return err + } + + err = conn.CreateModule("github_user_repos", ghqlite.NewReposModule(ghqlite.OwnerTypeUser, ghqlite.ReposModuleOptions{ + Token: githubToken, + RateLimiter: rateLimiter, + })) if err != nil { return err } - err = conn.CreateModule("github_user_repos", ghqlite.NewReposModule(ghqlite.OwnerTypeUser)) + err = conn.CreateModule("github_pull_requests", ghqlite.NewPullRequestsModule(ghqlite.PullRequestsModuleOptions{ + Token: githubToken, + RateLimiter: rateLimiter, + })) if err != nil { return err } @@ -78,8 +98,6 @@ type AskGit struct { type Options struct { UseGitCLI bool GitHubToken string - GitHubOrg string - GitHubUser string } // New creates an instance of AskGit @@ -154,20 +172,6 @@ func (a *AskGit) ensureTables(options *Options) error { return err } - if a.options.GitHubOrg != "" { - _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_org_repos(%s, '%s');", a.options.GitHubOrg, a.options.GitHubToken)) - if err != nil { - return err - } - } - - if a.options.GitHubUser != "" { - _, err = a.db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS repos USING github_user_repos(%s, '%s');", a.options.GitHubUser, a.options.GitHubToken)) - if err != nil { - return err - } - } - return nil } diff --git a/pkg/askgit/askgit_test.go b/pkg/askgit/askgit_test.go index e70660f8..2fdc9581 100644 --- a/pkg/askgit/askgit_test.go +++ b/pkg/askgit/askgit_test.go @@ -7,7 +7,7 @@ import ( "os" "testing" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) var ( diff --git a/pkg/ghqlite/ghqlite_test.go b/pkg/ghqlite/ghqlite_test.go index 1c62a2d1..03b98c06 100644 --- a/pkg/ghqlite/ghqlite_test.go +++ b/pkg/ghqlite/ghqlite_test.go @@ -2,7 +2,6 @@ package ghqlite import ( "database/sql" - "fmt" "os" "testing" @@ -16,12 +15,23 @@ var ( func init() { sql.Register("ghqlite", &sqlite3.SQLiteDriver{ ConnectHook: func(conn *sqlite3.SQLiteConn) error { - err := conn.CreateModule("github_org_repos", NewReposModule(OwnerTypeOrganization)) + err := conn.CreateModule("github_org_repos", NewReposModule(OwnerTypeOrganization, ReposModuleOptions{ + Token: os.Getenv("GITHUB_TOKEN"), + })) if err != nil { return err } - err = conn.CreateModule("github_user_repos", NewReposModule(OwnerTypeUser)) + err = conn.CreateModule("github_user_repos", NewReposModule(OwnerTypeUser, ReposModuleOptions{ + Token: os.Getenv("GITHUB_TOKEN"), + })) + if err != nil { + return err + } + + err = conn.CreateModule("github_pull_requests", NewPullRequestsModule(PullRequestsModuleOptions{ + Token: os.Getenv("GITHUB_TOKEN"), + })) if err != nil { return err } @@ -50,16 +60,6 @@ func initFixtureDB() error { return err } - _, err = db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS org_repos USING github_org_repos(%s, '%s');", "augmentable-dev", os.Getenv("GITHUB_TOKEN"))) - if err != nil { - return err - } - - _, err = db.Exec(fmt.Sprintf("CREATE VIRTUAL TABLE IF NOT EXISTS user_repos USING github_user_repos(%s, '%s');", "patrickdevivo", os.Getenv("GITHUB_TOKEN"))) - if err != nil { - return err - } - DB = db return nil } diff --git a/pkg/ghqlite/github_iterator.go b/pkg/ghqlite/github_iterator.go new file mode 100644 index 00000000..43228f07 --- /dev/null +++ b/pkg/ghqlite/github_iterator.go @@ -0,0 +1,200 @@ +package ghqlite + +import ( + "context" + "net/http" + "time" + + "github.com/google/go-github/github" + "golang.org/x/oauth2" + "golang.org/x/sync/errgroup" + "golang.org/x/time/rate" +) + +// GitHubIterator iterates over resources from the GitHub API +type GitHubIterator struct { + options GitHubIteratorOptions + currentPages []*githubIteratorPage + totalPages *int + pageIndex int + itemIndex int + fetchPage func(githubIter *GitHubIterator, page int) ([]interface{}, *github.Response, error) +} + +type githubIteratorPage struct { + items []interface{} + res *github.Response +} + +type GitHubIteratorOptions struct { + Client *github.Client // GitHub API client to use when making requests + Token string + PerPage int // number of items per page, GitHub API caps it at 100 + PreloadPages int // number of pages to "preload" - i.e. download concurrently + RateLimiter *rate.Limiter // rate limiter to use (tune to avoid hitting the API rate limits) +} + +// we define a custom http.Transport here that removes the Accept header +// see this issue for why it needs to be done this way: https://github.com/google/go-github/issues/999 +// the header is removed as the defaults used by go-github sometimes cause 502s from the GitHub API +type noAcceptTransport struct { + originalTransport http.RoundTripper +} + +func (t *noAcceptTransport) RoundTrip(r *http.Request) (*http.Response, error) { + r.Header.Del("Accept") + return t.originalTransport.RoundTrip(r) +} + +// NewGitHubIterator creates a *GitHubIterator +// oauth token and options. If the token is an empty string, no authentication is used +// note that unauthenticated requests are subject to a more stringent rate limit from the API +func NewGitHubIterator(nextPageFunc func(*GitHubIterator, int) ([]interface{}, *github.Response, error), options GitHubIteratorOptions) *GitHubIterator { + if options.Client == nil { + if options.Token != "" { // if token is specified setup an oauth http client + ts := oauth2.StaticTokenSource( + &oauth2.Token{AccessToken: options.Token}, + ) + tc := oauth2.NewClient(context.Background(), ts) + + tc.Transport = &noAcceptTransport{tc.Transport} + options.Client = github.NewClient(tc) + } else { + options.Client = github.NewClient(nil) + } + } + if options.PreloadPages <= 0 { + // we want to make sure this value is always at least 1 - it's the number of pages + // the iterator will fetch concurrently + options.PreloadPages = 1 + } + if options.RateLimiter == nil { + // if the rate limiter is not provided, supply a default one + // https://docs.github.com/en/free-pro-team@latest/developers/apps/rate-limits-for-github-apps + options.RateLimiter = rate.NewLimiter(rate.Every(10*time.Second), 8) + } + return &GitHubIterator{options, nil, nil, 0, 0, nextPageFunc} +} + +// fetchPages retries a *set* of pages given a nextPage +// if X is the nextPage and N is the preload pages value +// this will call fetchPage N times retrieving the X+N page +func (iter *GitHubIterator) fetchPages(nextPage int) error { + + // retrieve the N pages concurrently + g := new(errgroup.Group) + for p := 0; p < iter.options.PreloadPages; p++ { + + // if we already know the total number of expected pages, and we're requesting a page outside of that + // break the loop, since we've reached the end + // if a current page is nil, it indicates we're over the last page + if iter.totalPages != nil && nextPage+p > *iter.totalPages { + iter.currentPages[p] = nil + break + } + + func(p int) { + g.Go(func() error { + // apply the rate limiter here + err := iter.options.RateLimiter.Wait(context.Background()) + if err != nil { + return err + } + + // fetch the page + items, res, err := iter.fetchPage(iter, nextPage+p) + if err != nil { + return err + } + + // TODO remove this commented line at some point, it can be useful for debugging rate limit issues + // fmt.Println(res.Request.URL, res.StatusCode, res.Status, res.Header.Get("Retry-After"), res.Rate.Limit, res.Rate.Remaining, res.Rate.Reset.Format(time.RFC3339)) + + // if there are items returned + // if we've preloaded pages beyond the end of list, responses won't have items + if len(items) > 0 { + // store the new page we just retrieved + // in currentPages in the right place + newPage := githubIteratorPage{items, res} + iter.currentPages[p] = &newPage + } + + // if the response tells us what the last page is, set it + // this is used above to check whether additional pages should be fetched + if res.LastPage != 0 { + iter.totalPages = &res.LastPage + } + + return nil + }) + }(p) + } + + return g.Wait() +} + +// Next yields the next item in the iterator +// it should return nil, nil if the iteration is complete and there are no more items to retrieve +func (iter *GitHubIterator) Next() (interface{}, error) { + // if we are at the very beginning of the iteration, there will be no (nil) currentPages + if iter.currentPages == nil { + // initialize the currentPages the size of the number of pages to preload + iter.currentPages = make([]*githubIteratorPage, iter.options.PreloadPages) + // fetch the first pages (starting at 1, but fetching N pages where N = number to preload) + err := iter.fetchPages(1) + if err != nil { + return nil, err + } + } + + currentPage := iter.currentPages[iter.pageIndex] + + // if we've reached a nil page + // which is possible, as part of the batch may have exceeded the total number of pages + // we're at the end of iteration + if currentPage == nil { + return nil, nil + } + + // if the itemIndex has exceeded the number of items held in the current page by 1 + // increment to the next page and reset the item index + if iter.itemIndex > len(currentPage.items)-1 { + iter.pageIndex++ + if iter.pageIndex < len(iter.currentPages) { + currentPage = iter.currentPages[iter.pageIndex] + } + iter.itemIndex = 0 + } + + // if we've gone over the last page, however + if iter.pageIndex > len(iter.currentPages)-1 { + // retrieve the last page we were on (but exhausted already) + lastPage := iter.currentPages[iter.pageIndex-1] + // if the API response for this previous page indicates there's no next page + // we're at the end of the iteration, return nil + next := lastPage.res.NextPage + if next == 0 { + return nil, nil + } + + // otherwise, reset the page index and fetch the next batch of pages + iter.pageIndex = 0 + iter.itemIndex = 0 + err := iter.fetchPages(next) + if err != nil { + return nil, err + } + // reset the current page + currentPage = iter.currentPages[iter.pageIndex] + } + + if currentPage == nil { + return nil, nil + } + + // finally, pull out the current item the indices point to to be returned + currentItem := currentPage.items[iter.itemIndex] + iter.itemIndex++ + + return currentItem, nil +} diff --git a/pkg/ghqlite/pull_request_iterator.go b/pkg/ghqlite/pull_request_iterator.go new file mode 100644 index 00000000..19f4ed1e --- /dev/null +++ b/pkg/ghqlite/pull_request_iterator.go @@ -0,0 +1,59 @@ +package ghqlite + +import ( + "context" + + "github.com/google/go-github/github" +) + +// RepoPullRequestIterator iterates over GitHub pull requests belonging to a single repository +type RepoPullRequestIterator struct { + options RepoPullRequestIteratorOptions + githubIter *GitHubIterator + repoOwner string + repoName string +} + +type RepoPullRequestIteratorOptions struct { + GitHubIteratorOptions + github.PullRequestListOptions +} + +func NewRepoPullRequestIterator(repoOwner, repoName string, options RepoPullRequestIteratorOptions) *RepoPullRequestIterator { + prIter := &RepoPullRequestIterator{options, nil, repoOwner, repoName} + githubIter := NewGitHubIterator(prIter.fetchRepoPullRequestsPage, options.GitHubIteratorOptions) + prIter.githubIter = githubIter + + return prIter +} + +func (prIter *RepoPullRequestIterator) fetchRepoPullRequestsPage(githubIter *GitHubIterator, p int) ([]interface{}, *github.Response, error) { + options := prIter.options + options.PullRequestListOptions.Page = p + + // use the user provided per page value, if it's greater than 0 + // otherwise don't set it and use the GitHub API default + if options.PerPage > 0 { + options.PullRequestListOptions.PerPage = githubIter.options.PerPage + } + + prs, res, err := githubIter.options.Client.PullRequests.List(context.Background(), prIter.repoOwner, prIter.repoName, &options.PullRequestListOptions) + items := make([]interface{}, len(prs)) + for i, r := range prs { + items[i] = r + } + return items, res, err +} + +func (prIter *RepoPullRequestIterator) Next() (*github.PullRequest, error) { + pr, err := prIter.githubIter.Next() + if err != nil { + return nil, err + } + + if pr == nil { + return nil, nil + } + + return pr.(*github.PullRequest), nil +} diff --git a/pkg/ghqlite/pull_request_iterator_test.go b/pkg/ghqlite/pull_request_iterator_test.go new file mode 100644 index 00000000..6c2ad43f --- /dev/null +++ b/pkg/ghqlite/pull_request_iterator_test.go @@ -0,0 +1,36 @@ +package ghqlite + +import ( + "os" + "testing" + + "github.com/google/go-github/github" +) + +func TestRepoPullRequestIterator(t *testing.T) { + iter := NewRepoPullRequestIterator("augmentable-dev", "askgit", RepoPullRequestIteratorOptions{ + GitHubIteratorOptions: GitHubIteratorOptions{Token: os.Getenv("GITHUB_TOKEN"), PerPage: 100, PreloadPages: 5}, + PullRequestListOptions: github.PullRequestListOptions{ + State: "all", + Sort: "created", + Direction: "desc", + }, + }) + + atLeastAsManyPRs := 10 + count := 0 + for { + pr, err := iter.Next() + if err != nil { + t.Fatal(err) + } + if pr == nil { + break + } + count++ + } + + if count < atLeastAsManyPRs { + t.Fatalf("expected at least %d PRs, got %d", atLeastAsManyPRs, count) + } +} diff --git a/pkg/ghqlite/pull_requests_vtab.go b/pkg/ghqlite/pull_requests_vtab.go new file mode 100644 index 00000000..4b5932ba --- /dev/null +++ b/pkg/ghqlite/pull_requests_vtab.go @@ -0,0 +1,348 @@ +package ghqlite + +import ( + "encoding/json" + "errors" + "fmt" + "math" + "strings" + "time" + + "github.com/google/go-github/github" + "github.com/mattn/go-sqlite3" + "golang.org/x/time/rate" +) + +type PullRequestsModule struct { + options PullRequestsModuleOptions +} + +type PullRequestsModuleOptions struct { + Token string + RateLimiter *rate.Limiter +} + +func NewPullRequestsModule(options PullRequestsModuleOptions) *PullRequestsModule { + if options.RateLimiter == nil { + options.RateLimiter = rate.NewLimiter(rate.Every(time.Second), 2) + } + + return &PullRequestsModule{options} +} + +func (m *PullRequestsModule) EponymousOnlyModule() {} + +func (m *PullRequestsModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { + err := c.DeclareVTab(fmt.Sprintf(` + CREATE TABLE %s ( + repo_owner HIDDEN, + repo_name HIDDEN, + id INT, + node_id TEXT, + number INT, + state TEXT, + locked BOOL, + title TEXT, + user_login TEXT, + body TEXT, + labels TEXT, + active_lock_reason TEXT, + created_at DATETIME, + updated_at DATETIME, + closed_at DATETIME, + merged_at DATETIME, + merge_commit_sha TEXT, + assignee_login TEXT, + assignees TEXT, + requested_reviewer_logins TEXT, + head_label TEXT, + head_ref TEXT, + head_sha TEXT, + head_repo_owner TEXT, + head_repo_name, + base_label TEXT, + base_ref TEXT, + base_sha TEXT, + base_repo_owner TEXT, + base_repo_name TEXT, + author_association TEXT, + merged BOOL, + mergeable BOOL, + mergeable_state BOOL, + merged_by_login TEXT, + comments INT, + maintainer_can_modify BOOL, + commits INT, + additions INT, + deletions INT, + changed_files INT + )`, args[0])) + if err != nil { + return nil, err + } + + return &pullRequestsTable{m}, nil +} + +func (m *PullRequestsModule) Connect(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { + return m.Create(c, args) +} + +func (m *PullRequestsModule) DestroyModule() {} + +type pullRequestsTable struct { + module *PullRequestsModule +} + +func (v *pullRequestsTable) Open() (sqlite3.VTabCursor, error) { + return &pullRequestsCursor{v, "", "", nil, nil, false}, nil +} + +func (v *pullRequestsTable) Disconnect() error { return nil } +func (v *pullRequestsTable) Destroy() error { return nil } + +type pullRequestsCursor struct { + table *pullRequestsTable + repoOwner string + repoName string + iter *RepoPullRequestIterator + currentPR *github.PullRequest + eof bool +} + +func (vc *pullRequestsCursor) Column(c *sqlite3.SQLiteContext, col int) error { + pr := vc.currentPR + switch col { + case 0: + c.ResultText(vc.repoOwner) + case 1: + c.ResultText(vc.repoName) + case 2: + c.ResultInt64(pr.GetID()) + case 3: + c.ResultText(pr.GetNodeID()) + case 4: + c.ResultInt(pr.GetNumber()) + case 5: + c.ResultText(pr.GetState()) + case 6: + c.ResultBool(pr.GetActiveLockReason() != "") + case 7: + c.ResultText(pr.GetTitle()) + case 8: + c.ResultText(pr.GetUser().GetLogin()) + case 9: + c.ResultText(pr.GetBody()) + case 10: + str, err := json.Marshal(pr.Labels) + if err != nil { + return err + } + c.ResultText(string(str)) + case 11: + c.ResultText(pr.GetActiveLockReason()) + case 12: + c.ResultText(pr.GetCreatedAt().Format(time.RFC3339Nano)) + case 13: + c.ResultText(pr.GetUpdatedAt().Format(time.RFC3339Nano)) + case 14: + c.ResultText(pr.GetClosedAt().Format(time.RFC3339Nano)) + case 15: + c.ResultText(pr.GetMergedAt().Format(time.RFC3339Nano)) + case 16: + c.ResultText(pr.GetMergeCommitSHA()) + case 17: + c.ResultText(pr.GetAssignee().GetLogin()) + case 18: + str, err := json.Marshal(pr.Assignees) + if err != nil { + return err + } + c.ResultText(string(str)) + case 19: + str, err := json.Marshal(pr.RequestedReviewers) + if err != nil { + return err + } + c.ResultText(string(str)) + case 20: + c.ResultText(pr.GetHead().GetLabel()) + case 21: + c.ResultText(pr.GetHead().GetRef()) + case 22: + c.ResultText(pr.GetHead().GetSHA()) + case 23: + c.ResultText(pr.Head.GetRepo().GetOwner().GetLogin()) + case 24: + c.ResultText(pr.Head.GetRepo().GetName()) + case 25: + c.ResultText(pr.GetBase().GetSHA()) + case 26: + c.ResultText(pr.GetBase().GetRef()) + case 27: + c.ResultText(pr.GetBase().GetSHA()) + case 28: + c.ResultText(pr.Base.GetRepo().GetOwner().GetLogin()) + case 29: + c.ResultText(pr.Base.GetRepo().GetName()) + case 30: + c.ResultText(pr.GetAuthorAssociation()) + case 31: + c.ResultBool(pr.GetMerged()) + case 32: + c.ResultBool(pr.GetMergeable()) + case 33: + c.ResultText(pr.GetMergeableState()) + case 34: + c.ResultText(pr.GetMergedBy().GetLogin()) + case 35: + c.ResultInt(pr.GetComments()) + case 36: + c.ResultBool(pr.GetMaintainerCanModify()) + case 37: + c.ResultInt(pr.GetCommits()) + case 38: + c.ResultInt(pr.GetAdditions()) + case 39: + c.ResultInt(pr.GetDeletions()) + case 40: + c.ResultInt(pr.GetChangedFiles()) + } + + return nil +} + +func (v *pullRequestsTable) BestIndex(constraints []sqlite3.InfoConstraint, ob []sqlite3.InfoOrderBy) (*sqlite3.IndexResult, error) { + used := make([]bool, len(constraints)) + var repoOwnerCstUsed, repoNameCstUsed bool + idxNameVals := make([]string, 0) + for c, cst := range constraints { + switch cst.Column { + case 0: // repo_owner + if cst.Op != sqlite3.OpEQ { // if there's no equality constraint, fail + return nil, sqlite3.ErrConstraint + } + // if the constraint is usable, use it, otherwise fail + if used[c] = cst.Usable; !used[c] { + return nil, sqlite3.ErrConstraint + } + repoOwnerCstUsed = true + idxNameVals = append(idxNameVals, "repo_owner") + case 1: // repo_name + if cst.Op != sqlite3.OpEQ { // if there's no equality constraint, fail + return nil, sqlite3.ErrConstraint + } + // if the constraint is usable, use it, otherwise fail + if used[c] = cst.Usable; !used[c] { + return nil, sqlite3.ErrConstraint + } + repoNameCstUsed = true + idxNameVals = append(idxNameVals, "repo_name") + case 5: + if cst.Usable && cst.Op == sqlite3.OpEQ { + used[c] = true + } + idxNameVals = append(idxNameVals, "state") + } + } + + if !repoOwnerCstUsed { + return nil, errors.New("must supply a repo owner") + } + + if !repoNameCstUsed { + return nil, errors.New("must supply a repo name") + } + + var idxNum int + var alreadyOrdered bool + if len(ob) == 1 { + switch ob[0].Column { + case 12: // created_at + alreadyOrdered = true + if ob[0].Desc { + idxNum = -ob[0].Column + } else { + idxNum = ob[0].Column + } + } + + } + + return &sqlite3.IndexResult{ + IdxNum: idxNum, + IdxStr: strings.Join(idxNameVals, ","), + Used: used, + AlreadyOrdered: alreadyOrdered, + }, nil +} + +func (vc *pullRequestsCursor) Filter(idxNum int, idxStr string, vals []interface{}) error { + state := "all" + for c, cstVal := range strings.Split(idxStr, ",") { + switch cstVal { + case "repo_owner": + vc.repoOwner = vals[c].(string) + case "repo_name": + vc.repoName = vals[c].(string) + case "state": + state = vals[c].(string) + } + } + + orderBy := "created" + switch math.Abs(float64(idxNum)) { + case 12: + orderBy = "created" + case 13: + orderBy = "updated" + } + + direction := "asc" + if idxNum <= 0 { + direction = "desc" + } else { + direction = "asc" + } + + iter := NewRepoPullRequestIterator(vc.repoOwner, vc.repoName, RepoPullRequestIteratorOptions{ + GitHubIteratorOptions: GitHubIteratorOptions{ + Token: vc.table.module.options.Token, + PerPage: 100, + PreloadPages: 5, + RateLimiter: vc.table.module.options.RateLimiter, + }, + PullRequestListOptions: github.PullRequestListOptions{ + State: state, + Sort: orderBy, + Direction: direction, + }, + }) + vc.iter = iter + return vc.Next() +} + +func (vc *pullRequestsCursor) Next() error { + nextPR, err := vc.iter.Next() + if err != nil { + return err + } + if nextPR == nil { + vc.eof = true + return nil + } + vc.currentPR = nextPR + return nil +} + +func (vc *pullRequestsCursor) EOF() bool { + return vc.eof +} + +func (vc *pullRequestsCursor) Rowid() (int64, error) { + return vc.currentPR.GetID(), nil +} + +func (vc *pullRequestsCursor) Close() error { + return nil +} diff --git a/pkg/ghqlite/pull_requests_vtab_test.go b/pkg/ghqlite/pull_requests_vtab_test.go new file mode 100644 index 00000000..ec17fb70 --- /dev/null +++ b/pkg/ghqlite/pull_requests_vtab_test.go @@ -0,0 +1,22 @@ +package ghqlite + +import ( + "testing" +) + +func TestPullRequestsTable(t *testing.T) { + rows, err := DB.Query("SELECT * FROM github_pull_requests('augmentable-dev', 'askgit') WHERE state = 'closed' LIMIT 5") + if err != nil { + t.Fatal(err) + } + + _, contents, err := GetRowContents(rows) + if err != nil { + t.Fatal(err) + } + + if len(contents) != 5 { + t.Fatalf("expected: 5 rows, got: %d rows", len(contents)) + } + +} diff --git a/pkg/ghqlite/repo_iterator.go b/pkg/ghqlite/repo_iterator.go index 255e74be..44526285 100644 --- a/pkg/ghqlite/repo_iterator.go +++ b/pkg/ghqlite/repo_iterator.go @@ -2,29 +2,15 @@ package ghqlite import ( "context" - "net/http" - "time" "github.com/google/go-github/github" - "golang.org/x/oauth2" - "golang.org/x/sync/errgroup" - "golang.org/x/time/rate" ) // RepoIterator iterates over GitHub repositories belonging to a single owner type RepoIterator struct { - options *RepoIteratorOptions - owner string - ownerType OwnerType - currentPages []*page - totalPages *int - pageIndex int - repoIndex int -} - -type page struct { - repos []*github.Repository - res *github.Response + githubIter *GitHubIterator + owner string + ownerType OwnerType } type OwnerType string @@ -36,194 +22,65 @@ const ( OwnerTypeUser OwnerType = "User" ) -// RepoIteratorOptions determines how the iterator should behave -type RepoIteratorOptions struct { - Client *github.Client // GitHub API client to use when making requests - PerPage int // number of repos per page, GitHub API caps it at 100 - PreloadPages int // number of pages to "preload" - i.e. download concurrently - RateLimiter *rate.Limiter // rate limiter to use (tune to avoid hitting the API rate limits) -} - -// we define a custom http.Transport here that removes the Accept header -// see this issue for why it needs to be done this way: https://github.com/google/go-github/issues/999 -// the header is removed as the defaults used by go-github sometimes cause 502s from the GitHub API -type noAcceptTransport struct { - originalTransport http.RoundTripper -} - -func (t *noAcceptTransport) RoundTrip(r *http.Request) (*http.Response, error) { - r.Header.Del("Accept") - return t.originalTransport.RoundTrip(r) -} - // NewRepoIterator creates a *RepoIterator from an owner (GitHub organization or user) // oauth token and options. If the token is an empty string, no authentication is used // note that unauthenticated requests are subject to a more stringent rate limit from the API -func NewRepoIterator(owner string, ownerType OwnerType, token string, options *RepoIteratorOptions) *RepoIterator { - if options.Client == nil { - if token != "" { // if token is specified setup an oauth http client - ts := oauth2.StaticTokenSource( - &oauth2.Token{AccessToken: token}, - ) - tc := oauth2.NewClient(context.Background(), ts) - - tc.Transport = &noAcceptTransport{tc.Transport} - options.Client = github.NewClient(tc) - } else { - options.Client = github.NewClient(nil) - } +func NewRepoIterator(owner string, ownerType OwnerType, options GitHubIteratorOptions) *RepoIterator { + repoIter := &RepoIterator{ + owner: owner, + ownerType: ownerType, } - if options.PreloadPages <= 0 { - // we want to make sure this value is always at least 1 - it's the number of pages - // the iterator will fetch concurrently - options.PreloadPages = 1 - } - if options.RateLimiter == nil { - // if the rate limiter is not provided, supply a default one - // https://docs.github.com/en/free-pro-team@latest/developers/apps/rate-limits-for-github-apps - options.RateLimiter = rate.NewLimiter(rate.Every(10*time.Second), 15) - } - return &RepoIterator{options, owner, ownerType, nil, nil, 0, 0} + githubIter := NewGitHubIterator(repoIter.fetchRepoPage, options) + repoIter.githubIter = githubIter + + return repoIter } -// fetchPage retrieves a single page of repos -func (iter *RepoIterator) fetchPage(p int) ([]*github.Repository, *github.Response, error) { +func (repoIter *RepoIterator) fetchRepoPage(githubIter *GitHubIterator, p int) ([]interface{}, *github.Response, error) { listOpt := github.ListOptions{Page: p} // use the user provided per page value, if it's greater than 0 // otherwise don't set it and use the GitHub API default - if iter.options.PerPage > 0 { - listOpt.PerPage = iter.options.PerPage + if githubIter.options.PerPage > 0 { + listOpt.PerPage = githubIter.options.PerPage } - switch iter.ownerType { + switch repoIter.ownerType { case OwnerTypeOrganization: opt := &github.RepositoryListByOrgOptions{ ListOptions: listOpt, } - return iter.options.Client.Repositories.ListByOrg(context.Background(), iter.owner, opt) + repos, res, err := githubIter.options.Client.Repositories.ListByOrg(context.Background(), repoIter.owner, opt) + items := make([]interface{}, len(repos)) + for i, r := range repos { + items[i] = r + } + return items, res, err case OwnerTypeUser: opt := &github.RepositoryListOptions{ ListOptions: listOpt, } - return iter.options.Client.Repositories.List(context.Background(), iter.owner, opt) + repos, res, err := githubIter.options.Client.Repositories.List(context.Background(), repoIter.owner, opt) + items := make([]interface{}, len(repos)) + for i, r := range repos { + items[i] = r + } + return items, res, err } // should never reach this point return nil, nil, nil } -// fetchPages retries a *set* of pages given a nextPage -// if X is the nextPage and N is the preload pages value -// this will call fetchPage N times retrieving the X+N page -func (iter *RepoIterator) fetchPages(nextPage int) error { - - // retrieve the N pages concurrently - g := new(errgroup.Group) - for p := 0; p < iter.options.PreloadPages; p++ { - - // if we already know the total number of expected pages, and we're requesting a page outside of that - // break the loop, since we've reached the end - // if a current page is nil, it indicates we're over the last page - if iter.totalPages != nil && nextPage+p > *iter.totalPages { - iter.currentPages[p] = nil - break - } - - func(p int) { - g.Go(func() error { - // apply the rate limiter here - err := iter.options.RateLimiter.Wait(context.Background()) - if err != nil { - return err - } - - // fetch the page - repos, res, err := iter.fetchPage(nextPage + p) - if err != nil { - return err - } - - // TODO remove this commented line at some point, it can be useful for debugging rate limit issues - // fmt.Println(res.Rate.Limit, res.Rate.Remaining, res.Rate.Reset.Format(time.RFC3339)) - - // if there are repos returned - // if we've preloaded pages beyond the end of list, responses won't have repos - if len(repos) > 0 { - // store the new page we just retrieved - // in currentPages in the right place - newPage := page{repos, res} - iter.currentPages[p] = &newPage - } - - // if the response tells us what the last page is, set it - // this is used above to check whether additional pages should be fetched - if res.LastPage != 0 { - iter.totalPages = &res.LastPage - } - - return nil - }) - }(p) - } - - return g.Wait() -} - -// Next yields the next repository in the iterator -// it should return nil, nil if the iteration is complete and there are no more repositories to retrieve -func (iter *RepoIterator) Next() (*github.Repository, error) { - - // if we are at the very beginning of the iteration, there will be no (nil) currentPages - if iter.currentPages == nil { - // initialize the currentPages the size of the number of pages to preload - iter.currentPages = make([]*page, iter.options.PreloadPages) - // fetch the first pages (starting at 1, but fetching N pages where N = number to preload) - err := iter.fetchPages(1) - if err != nil { - return nil, err - } +func (repoIter *RepoIterator) Next() (*github.Repository, error) { + repo, err := repoIter.githubIter.Next() + if err != nil { + return nil, err } - // if the repoIndex has exceeded the number of repos held in the current page by 1 - // increment to the next page and reset the repo index - if iter.repoIndex == len(iter.currentPages[iter.pageIndex].repos) { - iter.pageIndex++ - iter.repoIndex = 0 - } - - // if we've gone over the last page, however - if iter.pageIndex == len(iter.currentPages) { - // retrieve the last page we were on (but exhausted already) - lastPage := iter.currentPages[iter.pageIndex-1] - // if the API response for this previous page indicates there's no next page - // we're at the end of the iteration, return nil - next := lastPage.res.NextPage - if next == 0 { - return nil, nil - } - - // otherwise, reset the page index and fetch the next batch of pages - iter.pageIndex = 0 - err := iter.fetchPages(next) - if err != nil { - return nil, err - } - } - - // if we've reached a nil page - // which is possible, as part of the batch may have exceeded the total number of pages - // we're at the end of iteration - if iter.currentPages[iter.pageIndex] == nil { + if repo == nil { return nil, nil } - currentPage := iter.currentPages[iter.pageIndex] - // fmt.Println(len(currentPage.repos)) - - // finally, pull out the current repo the indices point to to be returned - currentRepo := currentPage.repos[iter.repoIndex] - iter.repoIndex++ - - return currentRepo, nil + return repo.(*github.Repository), nil } diff --git a/pkg/ghqlite/repo_iterator_test.go b/pkg/ghqlite/repo_iterator_test.go index 5852c85e..cef1a409 100644 --- a/pkg/ghqlite/repo_iterator_test.go +++ b/pkg/ghqlite/repo_iterator_test.go @@ -2,19 +2,28 @@ package ghqlite import ( "fmt" + "os" "testing" + "time" + + "golang.org/x/time/rate" ) func TestRepoIterator(t *testing.T) { - testCases := []*RepoIteratorOptions{ + testCases := []GitHubIteratorOptions{ {PerPage: 1, PreloadPages: 1}, {PerPage: 5, PreloadPages: 2}, {PerPage: 100, PreloadPages: 2}, } + for _, options := range testCases { + options.Token = os.Getenv("GITHUB_TOKEN") + options.RateLimiter = rate.NewLimiter(rate.Every(2*time.Second), 1) + } + minRepos := 10 for i, options := range testCases { - iter := NewRepoIterator("augmentable-dev", OwnerTypeOrganization, "", options) + iter := NewRepoIterator("augmentable-dev", OwnerTypeOrganization, options) t.Run(fmt.Sprintf("Case#%d", i), func(t *testing.T) { for k := 0; k < minRepos; k++ { diff --git a/pkg/ghqlite/repos_vtab.go b/pkg/ghqlite/repos_vtab.go index ae0562bd..2911a969 100644 --- a/pkg/ghqlite/repos_vtab.go +++ b/pkg/ghqlite/repos_vtab.go @@ -2,6 +2,7 @@ package ghqlite import ( "encoding/json" + "errors" "fmt" "time" @@ -12,15 +13,29 @@ import ( type ReposModule struct { ownerType OwnerType + options ReposModuleOptions } -func NewReposModule(ownerType OwnerType) *ReposModule { - return &ReposModule{ownerType} +func NewReposModule(ownerType OwnerType, options ReposModuleOptions) *ReposModule { + if options.RateLimiter == nil { + if options.RateLimiter == nil { + options.RateLimiter = rate.NewLimiter(rate.Every(time.Second), 2) + } + } + return &ReposModule{ownerType, options} } +type ReposModuleOptions struct { + Token string + RateLimiter *rate.Limiter +} + +func (m *ReposModule) EponymousOnlyModule() {} + func (m *ReposModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { err := c.DeclareVTab(fmt.Sprintf(` CREATE TABLE %s ( + repo_owner HIDDEN, id INT, node_id TEXT, name TEXT, @@ -52,8 +67,7 @@ func (m *ReposModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab if err != nil { return nil, err } - - return &reposTable{args[3], m.ownerType, args[4][1 : len(args[4])-1]}, nil + return &reposTable{m}, nil } func (m *ReposModule) Connect(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) { @@ -63,19 +77,13 @@ func (m *ReposModule) Connect(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTa func (m *ReposModule) DestroyModule() {} type reposTable struct { - owner string - ownerType OwnerType - token string + module *ReposModule } func (v *reposTable) Open() (sqlite3.VTabCursor, error) { return &reposCursor{v, nil, nil, false}, nil } -func (v *reposTable) BestIndex(cst []sqlite3.InfoConstraint, ob []sqlite3.InfoOrderBy) (*sqlite3.IndexResult, error) { - return &sqlite3.IndexResult{}, nil -} - func (v *reposTable) Disconnect() error { return nil } func (v *reposTable) Destroy() error { return nil } @@ -90,62 +98,64 @@ func (vc *reposCursor) Column(c *sqlite3.SQLiteContext, col int) error { repo := vc.currentRepo switch col { case 0: - c.ResultInt64(repo.GetID()) + c.ResultText(repo.GetOwner().GetName()) case 1: - c.ResultText(repo.GetNodeID()) + c.ResultInt64(repo.GetID()) case 2: - c.ResultText(repo.GetName()) + c.ResultText(repo.GetNodeID()) case 3: - c.ResultText(repo.GetFullName()) + c.ResultText(repo.GetName()) case 4: - c.ResultText(repo.GetOwner().GetLogin()) + c.ResultText(repo.GetFullName()) case 5: - c.ResultBool(repo.GetPrivate()) + c.ResultText(repo.GetOwner().GetLogin()) case 6: - c.ResultText(repo.GetDescription()) + c.ResultBool(repo.GetPrivate()) case 7: - c.ResultBool(repo.GetFork()) + c.ResultText(repo.GetDescription()) case 8: - c.ResultText(repo.GetHomepage()) + c.ResultBool(repo.GetFork()) case 9: - c.ResultText(repo.GetLanguage()) + c.ResultText(repo.GetHomepage()) case 10: - c.ResultInt(repo.GetForksCount()) + c.ResultText(repo.GetLanguage()) case 11: - c.ResultInt(repo.GetStargazersCount()) + c.ResultInt(repo.GetForksCount()) case 12: - c.ResultInt(repo.GetWatchersCount()) + c.ResultInt(repo.GetStargazersCount()) case 13: - c.ResultInt(repo.GetSize()) + c.ResultInt(repo.GetWatchersCount()) case 14: - c.ResultText(repo.GetDefaultBranch()) + c.ResultInt(repo.GetSize()) case 15: - c.ResultInt(repo.GetOpenIssuesCount()) + c.ResultText(repo.GetDefaultBranch()) case 16: + c.ResultInt(repo.GetOpenIssuesCount()) + case 17: str, err := json.Marshal(repo.Topics) if err != nil { return err } c.ResultText(string(str)) - case 17: - c.ResultBool(repo.GetHasIssues()) case 18: - c.ResultBool(repo.GetHasProjects()) + c.ResultBool(repo.GetHasIssues()) case 19: - c.ResultBool(repo.GetHasWiki()) + c.ResultBool(repo.GetHasProjects()) case 20: - c.ResultBool(repo.GetHasPages()) + c.ResultBool(repo.GetHasWiki()) case 21: - c.ResultBool(repo.GetHasDownloads()) + c.ResultBool(repo.GetHasPages()) case 22: - c.ResultBool(repo.GetArchived()) + c.ResultBool(repo.GetHasDownloads()) case 23: - c.ResultText(repo.PushedAt.Format(time.RFC3339Nano)) + c.ResultBool(repo.GetArchived()) case 24: - c.ResultText(repo.CreatedAt.Format(time.RFC3339Nano)) + c.ResultText(repo.PushedAt.Format(time.RFC3339Nano)) case 25: - c.ResultText(repo.UpdatedAt.Format(time.RFC3339Nano)) + c.ResultText(repo.CreatedAt.Format(time.RFC3339Nano)) case 26: + c.ResultText(repo.UpdatedAt.Format(time.RFC3339Nano)) + case 27: str, err := json.Marshal(repo.GetPermissions()) if err != nil { return err @@ -155,17 +165,41 @@ func (vc *reposCursor) Column(c *sqlite3.SQLiteContext, col int) error { return nil } -func (vc *reposCursor) Filter(idxNum int, idxStr string, vals []interface{}) error { - var rateLimiter *rate.Limiter - if vc.table.token == "" { - rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 30) - } else { - rateLimiter = rate.NewLimiter(rate.Every(time.Minute), 80) +func (v *reposTable) BestIndex(constraints []sqlite3.InfoConstraint, ob []sqlite3.InfoOrderBy) (*sqlite3.IndexResult, error) { + used := make([]bool, len(constraints)) + repoOwnerCstUsed := false + for c, cst := range constraints { + switch cst.Column { + case 0: // repo_owner + if cst.Op != sqlite3.OpEQ { // if there's no equality constraint, fail + return nil, sqlite3.ErrConstraint + } + // if the constraint is usable, use it, otherwise fail + if used[c] = cst.Usable; !used[c] { + return nil, sqlite3.ErrConstraint + } + repoOwnerCstUsed = true + } + } + + if !repoOwnerCstUsed { + return nil, errors.New("must supply a repo owner") } - iter := NewRepoIterator(vc.table.owner, vc.table.ownerType, vc.table.token, &RepoIteratorOptions{ + + return &sqlite3.IndexResult{ + IdxNum: 0, + IdxStr: "default", + Used: used, + }, nil +} + +func (vc *reposCursor) Filter(idxNum int, idxStr string, vals []interface{}) error { + owner := vals[0].(string) + iter := NewRepoIterator(owner, vc.table.module.ownerType, GitHubIteratorOptions{ + Token: vc.table.module.options.Token, PerPage: 100, PreloadPages: 10, - RateLimiter: rateLimiter, + RateLimiter: vc.table.module.options.RateLimiter, }) vc.iter = iter return vc.Next() diff --git a/pkg/ghqlite/repos_vtab_test.go b/pkg/ghqlite/repos_vtab_test.go index e39757ba..a3734a3c 100644 --- a/pkg/ghqlite/repos_vtab_test.go +++ b/pkg/ghqlite/repos_vtab_test.go @@ -5,7 +5,7 @@ import ( ) func TestOrgReposTable(t *testing.T) { - rows, err := DB.Query("SELECT * FROM org_repos LIMIT 5") + rows, err := DB.Query("SELECT * FROM github_org_repos('augmentable-dev') LIMIT 5") if err != nil { t.Fatal(err) } @@ -22,7 +22,7 @@ func TestOrgReposTable(t *testing.T) { } func TestUserReposTable(t *testing.T) { - rows, err := DB.Query("SELECT * FROM user_repos LIMIT 5") + rows, err := DB.Query("SELECT * FROM github_user_repos('patrickdevivo') LIMIT 5") if err != nil { t.Fatal(err) } diff --git a/pkg/gitlog/parse_test.go b/pkg/gitlog/parse_test.go index 0d5f6f37..cd37d97e 100644 --- a/pkg/gitlog/parse_test.go +++ b/pkg/gitlog/parse_test.go @@ -10,7 +10,7 @@ import ( "testing" "github.com/gitsight/go-vcsurl" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) var ( diff --git a/pkg/gitqlite/git_branches.go b/pkg/gitqlite/git_branches.go index 940cb886..2c4072d9 100644 --- a/pkg/gitqlite/git_branches.go +++ b/pkg/gitqlite/git_branches.go @@ -3,7 +3,7 @@ package gitqlite import ( "fmt" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" ) diff --git a/pkg/gitqlite/git_branches_test.go b/pkg/gitqlite/git_branches_test.go index 3af5d11e..56d368d1 100644 --- a/pkg/gitqlite/git_branches_test.go +++ b/pkg/gitqlite/git_branches_test.go @@ -3,7 +3,7 @@ package gitqlite import ( "testing" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) func TestBranches(t *testing.T) { diff --git a/pkg/gitqlite/git_file_iter.go b/pkg/gitqlite/git_file_iter.go index bfb3ba1a..e9ea85dd 100644 --- a/pkg/gitqlite/git_file_iter.go +++ b/pkg/gitqlite/git_file_iter.go @@ -3,7 +3,7 @@ package gitqlite import ( "io" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) type treeEntryWithPath struct { diff --git a/pkg/gitqlite/git_files.go b/pkg/gitqlite/git_files.go index 4151868d..f6ff8516 100644 --- a/pkg/gitqlite/git_files.go +++ b/pkg/gitqlite/git_files.go @@ -5,7 +5,7 @@ import ( "io" "path" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" ) diff --git a/pkg/gitqlite/git_files_test.go b/pkg/gitqlite/git_files_test.go index 724fc32b..cec48817 100644 --- a/pkg/gitqlite/git_files_test.go +++ b/pkg/gitqlite/git_files_test.go @@ -6,7 +6,7 @@ import ( "strconv" "testing" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) func TestFileCounts(t *testing.T) { diff --git a/pkg/gitqlite/git_log.go b/pkg/gitqlite/git_log.go index 14e4c0a1..34242071 100644 --- a/pkg/gitqlite/git_log.go +++ b/pkg/gitqlite/git_log.go @@ -4,7 +4,7 @@ import ( "fmt" "time" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" ) diff --git a/pkg/gitqlite/git_log_cli_test.go b/pkg/gitqlite/git_log_cli_test.go index 017b68b4..dfbb96f7 100644 --- a/pkg/gitqlite/git_log_cli_test.go +++ b/pkg/gitqlite/git_log_cli_test.go @@ -3,7 +3,7 @@ package gitqlite import ( "testing" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) func TestCommitCounts(t *testing.T) { diff --git a/pkg/gitqlite/git_log_test.go b/pkg/gitqlite/git_log_test.go index d3d15b94..6b67a093 100644 --- a/pkg/gitqlite/git_log_test.go +++ b/pkg/gitqlite/git_log_test.go @@ -4,7 +4,7 @@ import ( "fmt" "testing" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) func TestCommits(t *testing.T) { diff --git a/pkg/gitqlite/git_stats.go b/pkg/gitqlite/git_stats.go index 0432e0ab..8685a047 100644 --- a/pkg/gitqlite/git_stats.go +++ b/pkg/gitqlite/git_stats.go @@ -4,7 +4,7 @@ import ( "fmt" "io" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" ) diff --git a/pkg/gitqlite/git_stats_iter.go b/pkg/gitqlite/git_stats_iter.go index 90297ff4..eb9c04cc 100644 --- a/pkg/gitqlite/git_stats_iter.go +++ b/pkg/gitqlite/git_stats_iter.go @@ -3,7 +3,7 @@ package gitqlite import ( "io" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" ) type commitStat struct { diff --git a/pkg/gitqlite/git_tags.go b/pkg/gitqlite/git_tags.go index 733166ee..b792f294 100644 --- a/pkg/gitqlite/git_tags.go +++ b/pkg/gitqlite/git_tags.go @@ -3,7 +3,7 @@ package gitqlite import ( "fmt" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" ) diff --git a/pkg/gitqlite/gitqlite_test.go b/pkg/gitqlite/gitqlite_test.go index ebafd6eb..8c462aef 100644 --- a/pkg/gitqlite/gitqlite_test.go +++ b/pkg/gitqlite/gitqlite_test.go @@ -7,7 +7,7 @@ import ( "os" "testing" - git "github.com/libgit2/git2go/v30" + git "github.com/libgit2/git2go/v31" "github.com/mattn/go-sqlite3" ) diff --git a/scripts/install_libgit2.sh b/scripts/install_libgit2.sh index e5c92242..c0422f20 100755 --- a/scripts/install_libgit2.sh +++ b/scripts/install_libgit2.sh @@ -2,7 +2,7 @@ git clone https://github.com/libgit2/libgit2.git ~/libgit2 cd ~/libgit2 -git checkout v1.0.1 +git checkout v1.1.0 mkdir build && cd build cmake .. -DBUILD_CLAR=0 -DBUILD_SHARED_LIBS=0 cmake --build . --target install From f52442cc10103e41ce7d77942ff2db26d15568c7 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 12 Dec 2020 19:12:22 -0500 Subject: [PATCH 16/20] fix lint issue --- pkg/ghqlite/pull_requests_vtab.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ghqlite/pull_requests_vtab.go b/pkg/ghqlite/pull_requests_vtab.go index 4b5932ba..d710d04d 100644 --- a/pkg/ghqlite/pull_requests_vtab.go +++ b/pkg/ghqlite/pull_requests_vtab.go @@ -298,7 +298,7 @@ func (vc *pullRequestsCursor) Filter(idxNum int, idxStr string, vals []interface orderBy = "updated" } - direction := "asc" + var direction string if idxNum <= 0 { direction = "desc" } else { From c1ef58f94ddc6789f766bca2e6d32662e6c441f8 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 12 Dec 2020 19:14:43 -0500 Subject: [PATCH 17/20] Update repos_vtab.go --- pkg/ghqlite/repos_vtab.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ghqlite/repos_vtab.go b/pkg/ghqlite/repos_vtab.go index 2911a969..30ef37e1 100644 --- a/pkg/ghqlite/repos_vtab.go +++ b/pkg/ghqlite/repos_vtab.go @@ -198,7 +198,7 @@ func (vc *reposCursor) Filter(idxNum int, idxStr string, vals []interface{}) err iter := NewRepoIterator(owner, vc.table.module.ownerType, GitHubIteratorOptions{ Token: vc.table.module.options.Token, PerPage: 100, - PreloadPages: 10, + PreloadPages: 5, RateLimiter: vc.table.module.options.RateLimiter, }) vc.iter = iter From 0a4d283bf299ce73a6b265bf20b20bfbe274e0ef Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 12 Dec 2020 19:33:24 -0500 Subject: [PATCH 18/20] Update pull_requests_vtab.go --- pkg/ghqlite/pull_requests_vtab.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ghqlite/pull_requests_vtab.go b/pkg/ghqlite/pull_requests_vtab.go index d710d04d..0ece843a 100644 --- a/pkg/ghqlite/pull_requests_vtab.go +++ b/pkg/ghqlite/pull_requests_vtab.go @@ -59,7 +59,7 @@ func (m *PullRequestsModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlit head_ref TEXT, head_sha TEXT, head_repo_owner TEXT, - head_repo_name, + head_repo_name TEXT, base_label TEXT, base_ref TEXT, base_sha TEXT, From 8d362596e662fe753b67a184b0483e7a9519e712 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 12 Dec 2020 19:35:54 -0500 Subject: [PATCH 19/20] Update README.md --- README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index de493c6e..78dedfda 100644 --- a/README.md +++ b/README.md @@ -182,12 +182,17 @@ The following tables make GitHub API requests to retrieve data during query exec As such, you should ensure the `GITHUB_TOKEN` environment variable is set so that API requests are authenticated. Unauthenticated API requests (no `GITHUB_TOKEN`) are subject to a stricter rate limit by GitHub, and may take longer to execute (query execution will try to respect the applicable rate limit). -##### `repos` +##### `github_org_repos` and github_user_repos -This table will only be available if either `--github-org` or `--github-user` are provided. -If both are provided, `--github-org` will be used. -Each specifies the org or user to query repositories from when scanning the `repos` table. -In other words, this table returns the repositories belonging to a GitHub organization or user, with the following schema: +These tables can be queried as table-valued functions expecting a single parameter, like so: + +```sql +-- return all repos from a github *org* +SELECT * FROM github_org_repos('augmentable-dev') + +-- return all repos from a github *user* +SELECT * FROM github_user_repos('augmentable-dev') +``` | Column | Type | |-------------------|----------| @@ -219,6 +224,56 @@ In other words, this table returns the repositories belonging to a GitHub organi | updated_at | DATETIME | | permissions | TEXT | +##### `github_pull_requests` + +This table expects 2 parameters, `github_pull_requests('augmentable-dev', 'askgit')`: + +```sql +SELECT count(*) FROM github_pull_requests('augmentable-dev', 'askgit') WHERE state = 'open' +``` + +| Column | Type | +|---------------------------|----------| +| id | INT | +| node_id | TEXT | +| number | INT | +| state | TEXT | +| locked | BOOL | +| title | TEXT | +| user_login | TEXT | +| body | TEXT | +| labels | TEXT | +| active_lock_reason | TEXT | +| created_at | DATETIME | +| updated_at | DATETIME | +| closed_at | DATETIME | +| merged_at | DATETIME | +| merge_commit_sha | TEXT | +| assignee_login | TEXT | +| assignees | TEXT | +| requested_reviewer_logins | TEXT | +| head_label | TEXT | +| head_ref | TEXT | +| head_sha | TEXT | +| head_repo_owner | TEXT | +| head_repo_name | TEXT | +| base_label | TEXT | +| base_ref | TEXT | +| base_sha | TEXT | +| base_repo_owner | TEXT | +| base_repo_name | TEXT | +| author_association | TEXT | +| merged | BOOL | +| mergeable | BOOL | +| mergeable_state | BOOL | +| merged_by_login | TEXT | +| comments | INT | +| maintainer_can_modify | BOOL | +| commits | INT | +| additions | INT | +| deletions | INT | +| changed_files | INT | + ### Example Queries This will return all commits in the history of the currently checked out branch/commit of the repo. From 5809ea8e314a76fab3e08e88f5e094d08a7aed04 Mon Sep 17 00:00:00 2001 From: Patrick DeVivo Date: Sat, 12 Dec 2020 19:39:30 -0500 Subject: [PATCH 20/20] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 78dedfda..7d18cba7 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,7 @@ The following tables make GitHub API requests to retrieve data during query exec As such, you should ensure the `GITHUB_TOKEN` environment variable is set so that API requests are authenticated. Unauthenticated API requests (no `GITHUB_TOKEN`) are subject to a stricter rate limit by GitHub, and may take longer to execute (query execution will try to respect the applicable rate limit). -##### `github_org_repos` and github_user_repos +##### `github_org_repos` and `github_user_repos` These tables can be queried as table-valued functions expecting a single parameter, like so: