From 735861811a4cd943f2c97401a6229ffe5fdee6f7 Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Wed, 1 Jan 2025 16:07:39 -0500 Subject: [PATCH] feat(gitlab): improve detector logic --- pkg/detectors/gitlab/v1/gitlab.go | 104 ++++++++++++++++-------------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/pkg/detectors/gitlab/v1/gitlab.go b/pkg/detectors/gitlab/v1/gitlab.go index d8a9af03a51d..6b13d947ec20 100644 --- a/pkg/detectors/gitlab/v1/gitlab.go +++ b/pkg/detectors/gitlab/v1/gitlab.go @@ -22,21 +22,24 @@ type Scanner struct { // Ensure the Scanner satisfies the interfaces at compile time. var ( - _ detectors.Detector = (*Scanner)(nil) - _ detectors.EndpointCustomizer = (*Scanner)(nil) - _ detectors.Versioner = (*Scanner)(nil) - _ detectors.CloudProvider = (*Scanner)(nil) + _ interface { + detectors.Detector + detectors.Versioner + detectors.CloudProvider + detectors.EndpointCustomizer + } = (*Scanner)(nil) ) -func (Scanner) Version() int { return 1 } -func (Scanner) CloudEndpoint() string { return "https://gitlab.com" } +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_Gitlab +} -var ( - defaultClient = common.SaneHttpClient() - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gitlab"}) + `\b([a-zA-Z0-9\-=_]{20,22})\b`) +func (s Scanner) Description() string { + return "GitLab is a web-based DevOps lifecycle tool that provides a Git repository manager providing wiki, issue-tracking, and CI/CD pipeline features. GitLab API tokens can be used to access and modify repository data and other resources." +} - BlockedUserMessage = "403 Forbidden - Your account has been blocked" -) +func (Scanner) Version() int { return 1 } +func (Scanner) CloudEndpoint() string { return "https://gitlab.com" } // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. @@ -44,58 +47,70 @@ func (s Scanner) Keywords() []string { return []string{"gitlab"} } +var ( + keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gitlab"}) + `\b([a-zA-Z0-9\-=_]{20,22})\b`) + BlockedUserMessage = "403 Forbidden - Your account has been blocked" +) + // FromData will find and optionally verify Gitlab secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { + // Deduplicate matches. + uniqueMatches := make(map[string]struct{}) + for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) { // ignore v2 detectors which have a prefix of `glpat-` if strings.Contains(match[0], "glpat-") { continue } - resMatch := strings.TrimSpace(match[1]) - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_Gitlab, - Raw: []byte(resMatch), - ExtraData: map[string]string{}, + m := match[1] + if detectors.StringShannonEntropy(m) < 3.75 { + continue } - s1.ExtraData = map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/gitlab/", - "version": fmt.Sprintf("%d", s.Version()), + uniqueMatches[m] = struct{}{} + } + + // Process matches. + for token := range uniqueMatches { + r := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(token), + ExtraData: map[string]string{ + "rotation_guide": "https://howtorotate.com/docs/tutorials/gitlab/", + "version": fmt.Sprintf("%d", s.Version()), + }, } if verify { - isVerified, extraData, analysisInfo, verificationErr := s.verifyGitlab(ctx, resMatch) - s1.Verified = isVerified + if s.client == nil { + s.client = common.SaneHttpClient() + } + + isVerified, extraData, analysisInfo, verificationErr := s.verifyGitlab(ctx, s.client, token) + r.Verified = isVerified for key, value := range extraData { - s1.ExtraData[key] = value + r.ExtraData[key] = value } - s1.SetVerificationError(verificationErr, resMatch) - s1.AnalysisInfo = analysisInfo + r.SetVerificationError(verificationErr, token) + r.AnalysisInfo = analysisInfo } - results = append(results, s1) + results = append(results, r) } return results, nil } -func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[string]string, map[string]string, error) { +func (s Scanner) verifyGitlab(ctx context.Context, client *http.Client, resMatch string) (bool, map[string]string, map[string]string, error) { // there are 4 read 'scopes' for a gitlab token: api, read_user, read_repo, and read_registry // they all grant access to different parts of the API. I couldn't find an endpoint that every // one of these scopes has access to, so we just check an example endpoint for each scope. If any // of them contain data, we know we have a valid key, but if they all fail, we don't - - client := s.client - if client == nil { - client = defaultClient - } for _, baseURL := range s.Endpoints() { // test `read_user` scope - req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/api/v4/user", nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/api/v4/user", nil) if err != nil { continue } @@ -105,8 +120,10 @@ func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[s if err != nil { return false, nil, nil, err } - - defer res.Body.Close() + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() bodyBytes, err := io.ReadAll(res.Body) if err != nil { @@ -118,11 +135,9 @@ func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[s "host": baseURL, } - // 200 means good key and has `read_user` scope - // 403 means good key but not the right scope - // 401 is bad key switch res.StatusCode { case http.StatusOK: + // 200 means good key and has `read_user` scope return json.Valid(bodyBytes), nil, analysisInfo, nil case http.StatusForbidden: // check if the user account is blocked or not @@ -139,18 +154,9 @@ func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[s // Nothing to do; zero values are the ones we want return false, nil, nil, nil default: - return false, nil, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode) + return false, nil, nil, fmt.Errorf("unexpected HTTP response status %d, body = %q", res.StatusCode, string(bodyBytes)) } - } return false, nil, nil, nil } - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_Gitlab -} - -func (s Scanner) Description() string { - return "GitLab is a web-based DevOps lifecycle tool that provides a Git repository manager providing wiki, issue-tracking, and CI/CD pipeline features. GitLab API tokens can be used to access and modify repository data and other resources." -}