Skip to content

Commit

Permalink
feat(gitlab): improve detector logic
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz committed Feb 2, 2025
1 parent a407ee1 commit 7358618
Showing 1 changed file with 55 additions and 49 deletions.
104 changes: 55 additions & 49 deletions pkg/detectors/gitlab/v1/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,80 +22,95 @@ type Scanner struct {

// Ensure the Scanner satisfies the interfaces at compile time.
var (
_ detectors.Detector = (*Scanner)(nil)
_ detectors.EndpointCustomizer = (*Scanner)(nil)
_ detectors.Versioner = (*Scanner)(nil)
_ detectors.CloudProvider = (*Scanner)(nil)
_ interface {
detectors.Detector
detectors.Versioner
detectors.CloudProvider
detectors.EndpointCustomizer
} = (*Scanner)(nil)
)

func (Scanner) Version() int { return 1 }
func (Scanner) CloudEndpoint() string { return "https://gitlab.com" }
func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_Gitlab
}

var (
defaultClient = common.SaneHttpClient()
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gitlab"}) + `\b([a-zA-Z0-9\-=_]{20,22})\b`)
func (s Scanner) Description() string {
return "GitLab is a web-based DevOps lifecycle tool that provides a Git repository manager providing wiki, issue-tracking, and CI/CD pipeline features. GitLab API tokens can be used to access and modify repository data and other resources."
}

BlockedUserMessage = "403 Forbidden - Your account has been blocked"
)
func (Scanner) Version() int { return 1 }
func (Scanner) CloudEndpoint() string { return "https://gitlab.com" }

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"gitlab"}
}

var (
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"gitlab"}) + `\b([a-zA-Z0-9\-=_]{20,22})\b`)
BlockedUserMessage = "403 Forbidden - Your account has been blocked"
)

// FromData will find and optionally verify Gitlab secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

matches := keyPat.FindAllStringSubmatch(dataStr, -1)
for _, match := range matches {
// Deduplicate matches.
uniqueMatches := make(map[string]struct{})
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
// ignore v2 detectors which have a prefix of `glpat-`
if strings.Contains(match[0], "glpat-") {
continue
}
resMatch := strings.TrimSpace(match[1])

s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_Gitlab,
Raw: []byte(resMatch),
ExtraData: map[string]string{},
m := match[1]
if detectors.StringShannonEntropy(m) < 3.75 {
continue
}
s1.ExtraData = map[string]string{
"rotation_guide": "https://howtorotate.com/docs/tutorials/gitlab/",
"version": fmt.Sprintf("%d", s.Version()),
uniqueMatches[m] = struct{}{}
}

// Process matches.
for token := range uniqueMatches {
r := detectors.Result{
DetectorType: s.Type(),
Raw: []byte(token),
ExtraData: map[string]string{
"rotation_guide": "https://howtorotate.com/docs/tutorials/gitlab/",
"version": fmt.Sprintf("%d", s.Version()),
},
}

if verify {
isVerified, extraData, analysisInfo, verificationErr := s.verifyGitlab(ctx, resMatch)
s1.Verified = isVerified
if s.client == nil {
s.client = common.SaneHttpClient()
}

isVerified, extraData, analysisInfo, verificationErr := s.verifyGitlab(ctx, s.client, token)
r.Verified = isVerified
for key, value := range extraData {
s1.ExtraData[key] = value
r.ExtraData[key] = value
}

s1.SetVerificationError(verificationErr, resMatch)
s1.AnalysisInfo = analysisInfo
r.SetVerificationError(verificationErr, token)
r.AnalysisInfo = analysisInfo
}

results = append(results, s1)
results = append(results, r)
}

return results, nil
}

func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[string]string, map[string]string, error) {
func (s Scanner) verifyGitlab(ctx context.Context, client *http.Client, resMatch string) (bool, map[string]string, map[string]string, error) {
// there are 4 read 'scopes' for a gitlab token: api, read_user, read_repo, and read_registry
// they all grant access to different parts of the API. I couldn't find an endpoint that every
// one of these scopes has access to, so we just check an example endpoint for each scope. If any
// of them contain data, we know we have a valid key, but if they all fail, we don't

client := s.client
if client == nil {
client = defaultClient
}
for _, baseURL := range s.Endpoints() {
// test `read_user` scope
req, err := http.NewRequestWithContext(ctx, "GET", baseURL+"/api/v4/user", nil)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/api/v4/user", nil)
if err != nil {
continue
}
Expand All @@ -105,8 +120,10 @@ func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[s
if err != nil {
return false, nil, nil, err
}

defer res.Body.Close()
defer func() {
_, _ = io.Copy(io.Discard, res.Body)
_ = res.Body.Close()
}()

bodyBytes, err := io.ReadAll(res.Body)
if err != nil {
Expand All @@ -118,11 +135,9 @@ func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[s
"host": baseURL,
}

// 200 means good key and has `read_user` scope
// 403 means good key but not the right scope
// 401 is bad key
switch res.StatusCode {
case http.StatusOK:
// 200 means good key and has `read_user` scope
return json.Valid(bodyBytes), nil, analysisInfo, nil
case http.StatusForbidden:
// check if the user account is blocked or not
Expand All @@ -139,18 +154,9 @@ func (s Scanner) verifyGitlab(ctx context.Context, resMatch string) (bool, map[s
// Nothing to do; zero values are the ones we want
return false, nil, nil, nil
default:
return false, nil, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
return false, nil, nil, fmt.Errorf("unexpected HTTP response status %d, body = %q", res.StatusCode, string(bodyBytes))
}

}

return false, nil, nil, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_Gitlab
}

func (s Scanner) Description() string {
return "GitLab is a web-based DevOps lifecycle tool that provides a Git repository manager providing wiki, issue-tracking, and CI/CD pipeline features. GitLab API tokens can be used to access and modify repository data and other resources."
}

0 comments on commit 7358618

Please sign in to comment.