diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cce8f97..b916273 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,7 @@ permissions: jobs: run-test: + timeout-minutes: 15 runs-on: ubuntu-latest steps: - name: Checkout Source @@ -30,6 +31,7 @@ jobs: build-container: runs-on: ubuntu-latest + timeout-minutes: 15 steps: - name: Checkout Source uses: actions/checkout@v3 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index c77739b..1341ade 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -24,6 +24,7 @@ permissions: jobs: analyze: if: "!contains(github.event.commits[0].message, '[noci]')" + timeout-minutes: 30 name: Analyze runs-on: ubuntu-latest permissions: diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index 47526a3..b8097be 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -19,6 +19,7 @@ env: jobs: build: if: "!contains(github.event.commits[0].message, '[noci]')" + timeout-minutes: 30 runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index b0dedc4..a2d3af6 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -13,6 +13,7 @@ permissions: jobs: dependency-review: runs-on: ubuntu-latest + timeout-minutes: 15 steps: - name: 'Checkout Repository' uses: actions/checkout@v3 diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index cd590c6..3afb839 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -10,6 +10,7 @@ jobs: golangci: name: lint runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/setup-go@v4 with: diff --git a/.github/workflows/goreleaser.yml b/.github/workflows/goreleaser.yml index bcc1236..5432edf 100644 --- a/.github/workflows/goreleaser.yml +++ b/.github/workflows/goreleaser.yml @@ -16,6 +16,7 @@ env: jobs: goreleaser: + timeout-minutes: 60 outputs: hashes: ${{ steps.hash.outputs.hashes }} permissions: @@ -90,6 +91,7 @@ jobs: checksum_file=$(echo "$ARTIFACTS" | jq -r '.[] | select (.type=="Checksum") | .path') echo "hashes=$(cat $checksum_file | base64 -w0)" >> "$GITHUB_OUTPUT" provenance: + timeout-minutes: 30 needs: [goreleaser] permissions: actions: read # To read the workflow path. diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index a013ded..23f018f 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -20,6 +20,7 @@ permissions: read-all jobs: analysis: name: Scorecard analysis + timeout-minutes: 30 runs-on: ubuntu-latest permissions: security-events: write diff --git a/.github/workflows/secret_scan.yml b/.github/workflows/secret_scan.yml index 13b1993..37b159b 100644 --- a/.github/workflows/secret_scan.yml +++ b/.github/workflows/secret_scan.yml @@ -10,6 +10,7 @@ permissions: jobs: trufflehog: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - name: Checkout Source uses: actions/checkout@v2 diff --git a/README.md b/README.md index 7e7368b..dc7addb 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,17 @@ vet scan --github https://github.com/safedep/vet **Note:** You may need to enable [Dependency Graph](https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-the-dependency-graph) at repository or organization level for Github repository scanning to work. +#### Scanning Github Organization + +> You must setup the required access for scanning private repositories +> before scanning organizations + +```bash +vet scan --github-org https://github.com/safedep +``` + +> **Note:** `vet` will block and wait if it encounters Github secondary rate limit. + #### Scanning Package URL - To scan a [purl](https://github.com/package-url/purl-spec) @@ -195,6 +206,10 @@ make quick-vet go test -v ./... ``` +## Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=safedep/vet&type=Date)](https://star-history.com/#safedep/vet&Date) + ## 🔖 References - [https://github.com/google/osv-scanner](https://github.com/google/osv-scanner) diff --git a/connect.go b/connect.go index 5843738..62ae6e1 100644 --- a/connect.go +++ b/connect.go @@ -51,7 +51,7 @@ func connectGithubCommand() *cobra.Command { ui.PrintSuccess("Github Access Token configured and saved at '%s' for your convenience.", connect.GetConfigFileHint()) ui.PrintSuccess("You can use vet to scan your github repositories") ui.PrintSuccess("Run the command to scan your github repository") - ui.PrintSuccess("\tvet scan https://github.com//") + ui.PrintSuccess("\tvet scan --github https://github.com//") os.Exit(1) return nil @@ -80,7 +80,7 @@ func getAccessTokenFromUser() (string, error) { } password := &survey.Password{ - Message: "Paste your access token: ", + Message: "Provide your access token: ", } var accessToken string diff --git a/internal/connect/github.go b/internal/connect/github.go index 5e9c00e..555acf6 100644 --- a/internal/connect/github.go +++ b/internal/connect/github.go @@ -2,7 +2,9 @@ package connect import ( "context" + "net/http" "os" + "strconv" "strings" "github.com/gofri/go-github-ratelimit/github_ratelimit" @@ -44,8 +46,13 @@ func GetGithubClient() (*github.Client, error) { } if utils.IsEmptyString(github_token) { + rateLimitedClient, err := githubRateLimitedClient(http.DefaultTransport) + if err != nil { + return nil, err + } + logger.Debugf("Creating a Github client without credential") - return github.NewClient(nil), nil + return github.NewClient(rateLimitedClient), nil } tokenSource := oauth2.StaticTokenSource(&oauth2.Token{ @@ -53,11 +60,43 @@ func GetGithubClient() (*github.Client, error) { }) baseClient := oauth2.NewClient(context.Background(), tokenSource) - rateLimitedClient, err := github_ratelimit.NewRateLimitWaiterClient(baseClient.Transport) + rateLimitedClient, err := githubRateLimitedClient(baseClient.Transport) if err != nil { return nil, err } - logger.Debugf("Created a new Github client with rate limit waiter") + logger.Debugf("Created a new Github client with credential") return github.NewClient(rateLimitedClient), nil } + +// This is currently effective only for Github secondary rate limits +// https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api +func githubRateLimitedClient(transport http.RoundTripper) (*http.Client, error) { + var options []github_ratelimit.Option + + if !githubClientRateLimitBlockDisabled() { + logger.Debugf("Adding Github rate limit callbacks to client") + + options = append(options, github_ratelimit.WithLimitDetectedCallback(func(cc *github_ratelimit.CallbackContext) { + logger.Infof("Github rate limit detected, sleep until: %s", cc.SleepUntil) + })) + } + + rateLimitedClient, err := github_ratelimit.NewRateLimitWaiterClient(transport, options...) + if err != nil { + return nil, err + } + + return rateLimitedClient, err +} + +// We implement this as an internal feature i.e. without a config or an UI option because +// we want this to be the default behaviour *always* unless user want to explicitly disable it +func githubClientRateLimitBlockDisabled() bool { + ret, err := strconv.ParseBool(os.Getenv("VET_GITHUB_DISABLE_RATE_LIMIT_BLOCKING")) + if err != nil { + return false + } + + return ret +} diff --git a/main.go b/main.go index 31f5553..66c7cf0 100644 --- a/main.go +++ b/main.go @@ -53,7 +53,7 @@ func main() { cmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Show verbose logs") cmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "Show debug logs") cmd.PersistentFlags().BoolVarP(&noBanner, "no-banner", "", false, "Do not display the vet banner") - cmd.PersistentFlags().StringVarP(&logFile, "log", "l", "", "Write command logs to file") + cmd.PersistentFlags().StringVarP(&logFile, "log", "l", "", "Write command logs to file, use - as for stdout") cmd.PersistentFlags().StringVarP(&globalExceptionsFile, "exceptions", "e", "", "Load exceptions from file") cmd.AddCommand(newAuthCommand()) diff --git a/pkg/parser/custom/sbom/spdx/spdx.go b/pkg/parser/custom/sbom/spdx/spdx.go index 4a0baff..cd4474a 100644 --- a/pkg/parser/custom/sbom/spdx/spdx.go +++ b/pkg/parser/custom/sbom/spdx/spdx.go @@ -151,16 +151,16 @@ func attempParsePackageName(input string) (string, string, string, bool) { // 3. (.*?) Match and capture the name pattern := regexp.MustCompile(`^((.+):)?((.+)/)?(.*)$`) matches := pattern.FindStringSubmatch(input) - version := matches[5] - - if matches[5] == "" { - version = "0.0.0" - } if len(matches) != 6 { return "", "", "", false } + version := matches[5] + if version == "" { + version = "0.0.0" + } + return matches[2], matches[4], version, true } diff --git a/pkg/readers/github_org_reader.go b/pkg/readers/github_org_reader.go new file mode 100644 index 0000000..d58d6cc --- /dev/null +++ b/pkg/readers/github_org_reader.go @@ -0,0 +1,158 @@ +package readers + +import ( + "context" + "errors" + "net/url" + "strings" + + "github.com/google/go-github/v54/github" + "github.com/safedep/vet/pkg/common/logger" + "github.com/safedep/vet/pkg/models" +) + +const ( + githubOrgReaderPerPageSize = 100 +) + +type GithubOrgReaderConfig struct { + OrganizationURL string + IncludeArchived bool + MaxRepositories int +} + +type githubOrgReader struct { + client *github.Client + config *GithubOrgReaderConfig + scannedRepoCounter int +} + +// NewGithubOrgReader creates a [PackageManifestReader] which enumerates +// a Github org, identifying repositories and scanning them using [githubReader] +func NewGithubOrgReader(client *github.Client, + config *GithubOrgReaderConfig) (PackageManifestReader, error) { + return &githubOrgReader{ + client: client, + config: config, + scannedRepoCounter: 0, + }, nil +} + +func (p *githubOrgReader) Name() string { + return "Github Organization Package Manifest Reader" +} + +func (p *githubOrgReader) EnumManifests(handler func(*models.PackageManifest, + PackageReader) error) error { + ctx := context.Background() + + gitOrg, err := githubOrgFromURL(p.config.OrganizationURL) + if err != nil { + return err + } + + listOptions := &github.ListOptions{ + Page: 0, + PerPage: githubOrgReaderPerPageSize, + } + + for { + if err := ctx.Err(); err != nil { + logger.Errorf("Context error: %v", err) + break + } + + if p.isRepoLimitReached() { + logger.Infof("Stopping repository enumeration due to max %d limit reached", + p.config.MaxRepositories) + break + } + + repositories, resp, err := p.client.Repositories.ListByOrg(ctx, gitOrg, + &github.RepositoryListByOrgOptions{ + ListOptions: *listOptions, + }) + + if err != nil { + logger.Errorf("Failed to list Github org: %v", err) + break + } + + logger.Infof("Enumerated %d repositories with page: %d and next page: %d", + len(repositories), listOptions.Page, resp.NextPage) + + err = p.handleRepositoryBatch(repositories, handler) + if err != nil { + logger.Errorf("Failed to handle repository batch: %v", err) + break + } + + if resp.NextPage == 0 { + break + } + + listOptions.Page = resp.NextPage + } + + return nil +} + +func (p *githubOrgReader) isRepoLimitReached() bool { + return (p.config.MaxRepositories != 0) && + (p.scannedRepoCounter >= p.config.MaxRepositories) +} + +// withIncrementedRepoCount executes fn while incrementing the repository +// count. It returns a boolean indicating if repo count is reached +func (p *githubOrgReader) withIncrementedRepoCount(fn func()) bool { + fn() + p.scannedRepoCounter = p.scannedRepoCounter + 1 + + return p.isRepoLimitReached() +} + +func (p *githubOrgReader) handleRepositoryBatch(repositories []*github.Repository, + handler PackageManifestHandlerFn) error { + + var repoUrls []string + for _, repo := range repositories { + breach := p.withIncrementedRepoCount(func() { + repoUrls = append(repoUrls, repo.GetCloneURL()) + }) + + if breach { + break + } + } + + if len(repoUrls) == 0 { + return nil + } + + githubReader, err := NewGithubReader(p.client, repoUrls, "") + if err != nil { + return err + } + + return githubReader.EnumManifests(handler) +} + +// Making this exposed so that we can test this independently +func githubOrgFromURL(githubUrl string) (string, error) { + u, err := url.Parse(githubUrl) + if err != nil { + return "", err + } + + // Handling special case which is acceptable to url.Parse + if u.Scheme == "" { + return "", errors.New("rejecting URL without a scheme") + } + + parts := strings.Split(u.Path, "/") + if len(parts) < 2 || parts[1] == "" { + return "", errors.New("rejecting URL without an org") + } + + return parts[1], nil +} diff --git a/pkg/readers/github_org_reader_test.go b/pkg/readers/github_org_reader_test.go new file mode 100644 index 0000000..44552be --- /dev/null +++ b/pkg/readers/github_org_reader_test.go @@ -0,0 +1,61 @@ +package readers + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOrgFromURL(t *testing.T) { + cases := []struct { + name string + url string + org string + err error + }{ + { + "URL is invalid", + "aaaa", + "", + errors.New("rejecting URL without a scheme"), + }, + { + "URL does not have org", + "https://github.com/", + "", + errors.New("rejecting URL without an org"), + }, + { + "URL does not have org slash", + "https://github.com", + "", + errors.New("rejecting URL without an org"), + }, + + { + "URL has org", + "https://github.com/org1", + "org1", + nil, + }, + { + "URL has org++", + "https://github.com/org1/repo.git?x=1", + "org1", + nil, + }, + } + + for _, test := range cases { + t.Run(test.name, func(t *testing.T) { + org, err := githubOrgFromURL(test.url) + + if test.err != nil { + assert.ErrorContains(t, err, test.err.Error()) + } else { + assert.Equal(t, test.org, org) + } + }) + } +} diff --git a/pkg/readers/github_reader.go b/pkg/readers/github_reader.go index 8def506..12223d3 100644 --- a/pkg/readers/github_reader.go +++ b/pkg/readers/github_reader.go @@ -51,6 +51,8 @@ func (p *githubReader) EnumManifests(handler func(*models.PackageManifest, // github urls, which we may while scanning an entire org, we want to make // as much progress as possible while logging errors for _, github_url := range p.github_urls { + logger.Debugf("Processing Github URL: %s", github_url) + gitURL, err := giturl.NewGitURL(github_url) if err != nil { logger.Errorf("Failed to parse Github URL: %s due to %v", github_url, err) diff --git a/pkg/readers/reader.go b/pkg/readers/reader.go index 7045ed8..e5c8a01 100644 --- a/pkg/readers/reader.go +++ b/pkg/readers/reader.go @@ -7,6 +7,8 @@ package readers import "github.com/safedep/vet/pkg/models" +type PackageManifestHandlerFn func(*models.PackageManifest, PackageReader) error + // Contract for implementing package manifest readers such as lockfile parser, // SBOM parser etc. Reader should stop enumeration and return error if handler // returns an error diff --git a/pkg/scanner/callbacks.go b/pkg/scanner/callbacks.go index ac71878..2e973d5 100644 --- a/pkg/scanner/callbacks.go +++ b/pkg/scanner/callbacks.go @@ -13,20 +13,34 @@ type ScannerCallbackErrArgFn func(error) type ScannerCallbackNoArgFn func() type ScannerCallbacks struct { - OnStart ScannerCallbackOnManifestsFn - OnStartManifest ScannerCallbackOnManifestFn - OnStartPackage ScannerCallbackOnPackageFn - OnAddTransitivePackage ScannerCallbackOnPackageFn - OnDonePackage ScannerCallbackOnPackageFn - OnDoneManifest ScannerCallbackOnManifestFn - BeforeFinish ScannerCallbackNoArgFn - OnStop ScannerCallbackErrArgFn + OnStartEnumerateManifest ScannerCallbackNoArgFn // Manifest enumeration is starting + OnEnumerateManifest ScannerCallbackOnManifestFn // A manifest is read by reader + OnStart ScannerCallbackOnManifestsFn // Manifest scan phase is starting + OnStartManifest ScannerCallbackOnManifestFn // A manifest is starting to be scanned + OnStartPackage ScannerCallbackOnPackageFn // A package analysis is starting + OnAddTransitivePackage ScannerCallbackOnPackageFn // A transitive dependency is discovered + OnDonePackage ScannerCallbackOnPackageFn // A package analysis is finished + OnDoneManifest ScannerCallbackOnManifestFn // A manifest analysis is finished + BeforeFinish ScannerCallbackNoArgFn // Scan is about to finish + OnStop ScannerCallbackErrArgFn // Scan is finished } func (s *packageManifestScanner) WithCallbacks(callbacks ScannerCallbacks) { s.callbacks = callbacks } +func (s *packageManifestScanner) dispatchStartManifestEnumeration() { + if s.callbacks.OnStartEnumerateManifest != nil { + s.callbacks.OnStartEnumerateManifest() + } +} + +func (s *packageManifestScanner) dispatchOnManifestEnumeration(manifest *models.PackageManifest) { + if s.callbacks.OnEnumerateManifest != nil { + s.callbacks.OnEnumerateManifest(manifest) + } +} + func (s *packageManifestScanner) dispatchOnStart(manifests []*models.PackageManifest) { if s.callbacks.OnStart != nil { s.callbacks.OnStart(manifests) diff --git a/pkg/scanner/scanner.go b/pkg/scanner/scanner.go index d9843f6..5e5b30a 100644 --- a/pkg/scanner/scanner.go +++ b/pkg/scanner/scanner.go @@ -48,10 +48,14 @@ func (s *packageManifestScanner) Start() error { // of progress update depends on it var manifests []*models.PackageManifest + s.dispatchStartManifestEnumeration() for _, reader := range s.readers { err := reader.EnumManifests(func(manifest *models.PackageManifest, _ readers.PackageReader) error { + + s.dispatchOnManifestEnumeration(manifest) manifests = append(manifests, manifest) + return nil }) diff --git a/samples/filter-suites/fs-generic.yml b/samples/filter-suites/fs-generic.yml index 2abeed0..507dcc4 100644 --- a/samples/filter-suites/fs-generic.yml +++ b/samples/filter-suites/fs-generic.yml @@ -23,7 +23,8 @@ filters: licenses.exists(p, p == "GPL-2.0") || licenses.exists(p, p == "GPL-2.0-only") || licenses.exists(p, p == "GPL-3.0") || - licenses.exists(p, p == "GPL-3.0-only") + licenses.exists(p, p == "GPL-3.0-only") || + licenses.exists(p, p == "BSD-3-Clause OR GPL-2.0") - name: ossf-unmaintained check_type: CheckTypeMaintenance summary: Component appears to be unmaintained diff --git a/scan.go b/scan.go index 8f34855..e5d443e 100644 --- a/scan.go +++ b/scan.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/google/go-github/v54/github" "github.com/safedep/dry/utils" "github.com/safedep/vet/internal/auth" "github.com/safedep/vet/internal/connect" @@ -24,6 +25,8 @@ var ( baseDirectory string purlSpec string githubRepoUrls []string + githubOrgUrl string + githubOrgMaxRepositories int scanExclude []string transitiveAnalysis bool transitiveDepth int @@ -73,6 +76,10 @@ func newScanCommand() *cobra.Command { "PURL to scan") cmd.Flags().StringArrayVarP(&githubRepoUrls, "github", "", []string{}, "Github repository URL (Example: https://github.com/{org}/{repo})") + cmd.Flags().StringVarP(&githubOrgUrl, "github-org", "", "", + "Github organization URL (Example: https://github.com/safedep)") + cmd.Flags().IntVarP(&githubOrgMaxRepositories, "github-org-max-repo", "", 1000, + "Maximum number of repositories to process for the Github Org") cmd.Flags().StringVarP(&lockfileAs, "lockfile-as", "", "", "Parser to use for the lockfile (vet scan parsers to list)") cmd.Flags().BoolVarP(&transitiveAnalysis, "transitive", "", false, @@ -155,6 +162,15 @@ func internalStartScan() error { var reader readers.PackageManifestReader var err error + githubClientBuilder := func() *github.Client { + githubClient, err := connect.GetGithubClient() + if err != nil { + logger.Fatalf("Failed to build Github client: %v", err) + } + + return githubClient + } + // We can easily support both directory and lockfile reader. But current UX // contract is to support one of them at a time. Lets not break the contract // for now and figure out UX improvement later @@ -162,13 +178,19 @@ func internalStartScan() error { // nolint:ineffassign,staticcheck reader, err = readers.NewLockfileReader(lockfiles, lockfileAs) } else if len(githubRepoUrls) > 0 { - githubClient, err := connect.GetGithubClient() - if err != nil { - logger.Fatalf("Failed to build Github client: %v", err) - } + githubClient := githubClientBuilder() // nolint:ineffassign,staticcheck reader, err = readers.NewGithubReader(githubClient, githubRepoUrls, lockfileAs) + } else if len(githubOrgUrl) > 0 { + githubClient := githubClientBuilder() + + // nolint:ineffassign,staticcheck + reader, err = readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{ + OrganizationURL: githubOrgUrl, + IncludeArchived: false, + MaxRepositories: githubOrgMaxRepositories, + }) } else if len(purlSpec) > 0 { // nolint:ineffassign,staticcheck reader, err = readers.NewPurlReader(purlSpec) @@ -299,6 +321,13 @@ func internalStartScan() error { var packageTracker any pmScanner.WithCallbacks(scanner.ScannerCallbacks{ + OnStartEnumerateManifest: func() { + ui.PrintMsg("Starting to enumerate manifests") + }, + OnEnumerateManifest: func(manifest *models.PackageManifest) { + ui.PrintSuccess("Discovered a manifest at %s with %d packages", + manifest.GetDisplayPath(), len(manifest.Packages)) + }, OnStart: func(manifests []*models.PackageManifest) { if !silentScan { ui.StartProgressWriter() diff --git a/test/e2e_github_org_reader_test.go b/test/e2e_github_org_reader_test.go new file mode 100644 index 0000000..4164175 --- /dev/null +++ b/test/e2e_github_org_reader_test.go @@ -0,0 +1,43 @@ +package test + +import ( + "os" + "testing" + + "github.com/safedep/vet/internal/connect" + "github.com/safedep/vet/pkg/models" + "github.com/safedep/vet/pkg/readers" + "github.com/stretchr/testify/assert" +) + +func TestGithubOrgReaderWithSafeDepOrg(t *testing.T) { + verifyE2E(t) + + t.Run("Test Reader using SafeDep Github Org without auth", func(t *testing.T) { + githubToken := os.Getenv("GITHUB_TOKEN") + + t.Cleanup(func() { + os.Setenv("GITHUB_TOKEN", githubToken) + }) + + os.Setenv("GITHUB_TOKEN", "") + githubClient, err := connect.GetGithubClient() + assert.Nil(t, err) + + githubOrgReader, err := readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{ + OrganizationURL: "https://github.com/safedep", + MaxRepositories: 5, + }) + + assert.Nil(t, err) + + var manifests []*models.PackageManifest + err = githubOrgReader.EnumManifests(func(pm *models.PackageManifest, pr readers.PackageReader) error { + manifests = append(manifests, pm) + return nil + }) + + assert.Nil(t, err) + assert.Greater(t, len(manifests), 0) + }) +}