Merge pull request #150 from safedep/feat/140-github-org-reader

feat: Add Support for Github Organization Scanning
This commit is contained in:
Abhisek Datta 2023-11-15 23:05:36 +05:30 committed by GitHub
commit 358e2148d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 402 additions and 24 deletions

View File

@ -9,6 +9,7 @@ permissions:
jobs:
run-test:
timeout-minutes: 15
runs-on: ubuntu-latest
steps:
- name: Checkout Source
@ -30,6 +31,7 @@ jobs:
build-container:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout Source
uses: actions/checkout@v3

View File

@ -24,6 +24,7 @@ permissions:
jobs:
analyze:
if: "!contains(github.event.commits[0].message, '[noci]')"
timeout-minutes: 30
name: Analyze
runs-on: ubuntu-latest
permissions:

View File

@ -19,6 +19,7 @@ env:
jobs:
build:
if: "!contains(github.event.commits[0].message, '[noci]')"
timeout-minutes: 30
runs-on: ubuntu-latest
permissions:
contents: read

View File

@ -13,6 +13,7 @@ permissions:
jobs:
dependency-review:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: 'Checkout Repository'
uses: actions/checkout@v3

View File

@ -10,6 +10,7 @@ jobs:
golangci:
name: lint
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/setup-go@v4
with:

View File

@ -16,6 +16,7 @@ env:
jobs:
goreleaser:
timeout-minutes: 60
outputs:
hashes: ${{ steps.hash.outputs.hashes }}
permissions:
@ -90,6 +91,7 @@ jobs:
checksum_file=$(echo "$ARTIFACTS" | jq -r '.[] | select (.type=="Checksum") | .path')
echo "hashes=$(cat $checksum_file | base64 -w0)" >> "$GITHUB_OUTPUT"
provenance:
timeout-minutes: 30
needs: [goreleaser]
permissions:
actions: read # To read the workflow path.

View File

@ -20,6 +20,7 @@ permissions: read-all
jobs:
analysis:
name: Scorecard analysis
timeout-minutes: 30
runs-on: ubuntu-latest
permissions:
security-events: write

View File

@ -10,6 +10,7 @@ permissions:
jobs:
trufflehog:
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout Source
uses: actions/checkout@v2

View File

@ -112,6 +112,17 @@ vet scan --github https://github.com/safedep/vet
**Note:** You may need to enable [Dependency Graph](https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-the-dependency-graph) at repository or organization level for Github repository scanning to work.
#### Scanning Github Organization
> You must setup the required access for scanning private repositories
> before scanning organizations
```bash
vet scan --github-org https://github.com/safedep
```
> **Note:** `vet` will block and wait if it encounters Github secondary rate limit.
#### Scanning Package URL
- To scan a [purl](https://github.com/package-url/purl-spec)
@ -195,6 +206,10 @@ make quick-vet
go test -v ./...
```
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=safedep/vet&type=Date)](https://star-history.com/#safedep/vet&Date)
## 🔖 References
- [https://github.com/google/osv-scanner](https://github.com/google/osv-scanner)

View File

@ -51,7 +51,7 @@ func connectGithubCommand() *cobra.Command {
ui.PrintSuccess("Github Access Token configured and saved at '%s' for your convenience.", connect.GetConfigFileHint())
ui.PrintSuccess("You can use vet to scan your github repositories")
ui.PrintSuccess("Run the command to scan your github repository")
ui.PrintSuccess("\tvet scan https://github.com/<Org|User>/<Repo>")
ui.PrintSuccess("\tvet scan --github https://github.com/<Org|User>/<Repo>")
os.Exit(1)
return nil
@ -80,7 +80,7 @@ func getAccessTokenFromUser() (string, error) {
}
password := &survey.Password{
Message: "Paste your access token: ",
Message: "Provide your access token: ",
}
var accessToken string

View File

@ -2,7 +2,9 @@ package connect
import (
"context"
"net/http"
"os"
"strconv"
"strings"
"github.com/gofri/go-github-ratelimit/github_ratelimit"
@ -44,8 +46,13 @@ func GetGithubClient() (*github.Client, error) {
}
if utils.IsEmptyString(github_token) {
rateLimitedClient, err := githubRateLimitedClient(http.DefaultTransport)
if err != nil {
return nil, err
}
logger.Debugf("Creating a Github client without credential")
return github.NewClient(nil), nil
return github.NewClient(rateLimitedClient), nil
}
tokenSource := oauth2.StaticTokenSource(&oauth2.Token{
@ -53,11 +60,43 @@ func GetGithubClient() (*github.Client, error) {
})
baseClient := oauth2.NewClient(context.Background(), tokenSource)
rateLimitedClient, err := github_ratelimit.NewRateLimitWaiterClient(baseClient.Transport)
rateLimitedClient, err := githubRateLimitedClient(baseClient.Transport)
if err != nil {
return nil, err
}
logger.Debugf("Created a new Github client with rate limit waiter")
logger.Debugf("Created a new Github client with credential")
return github.NewClient(rateLimitedClient), nil
}
// This is currently effective only for Github secondary rate limits
// https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api
func githubRateLimitedClient(transport http.RoundTripper) (*http.Client, error) {
var options []github_ratelimit.Option
if !githubClientRateLimitBlockDisabled() {
logger.Debugf("Adding Github rate limit callbacks to client")
options = append(options, github_ratelimit.WithLimitDetectedCallback(func(cc *github_ratelimit.CallbackContext) {
logger.Infof("Github rate limit detected, sleep until: %s", cc.SleepUntil)
}))
}
rateLimitedClient, err := github_ratelimit.NewRateLimitWaiterClient(transport, options...)
if err != nil {
return nil, err
}
return rateLimitedClient, err
}
// We implement this as an internal feature i.e. without a config or an UI option because
// we want this to be the default behaviour *always* unless user want to explicitly disable it
func githubClientRateLimitBlockDisabled() bool {
ret, err := strconv.ParseBool(os.Getenv("VET_GITHUB_DISABLE_RATE_LIMIT_BLOCKING"))
if err != nil {
return false
}
return ret
}

View File

@ -53,7 +53,7 @@ func main() {
cmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Show verbose logs")
cmd.PersistentFlags().BoolVarP(&debug, "debug", "d", false, "Show debug logs")
cmd.PersistentFlags().BoolVarP(&noBanner, "no-banner", "", false, "Do not display the vet banner")
cmd.PersistentFlags().StringVarP(&logFile, "log", "l", "", "Write command logs to file")
cmd.PersistentFlags().StringVarP(&logFile, "log", "l", "", "Write command logs to file, use - as for stdout")
cmd.PersistentFlags().StringVarP(&globalExceptionsFile, "exceptions", "e", "", "Load exceptions from file")
cmd.AddCommand(newAuthCommand())

View File

@ -151,16 +151,16 @@ func attempParsePackageName(input string) (string, string, string, bool) {
// 3. (.*?) Match and capture the name
pattern := regexp.MustCompile(`^((.+):)?((.+)/)?(.*)$`)
matches := pattern.FindStringSubmatch(input)
version := matches[5]
if matches[5] == "" {
version = "0.0.0"
}
if len(matches) != 6 {
return "", "", "", false
}
version := matches[5]
if version == "" {
version = "0.0.0"
}
return matches[2], matches[4], version, true
}

View File

@ -0,0 +1,158 @@
package readers
import (
"context"
"errors"
"net/url"
"strings"
"github.com/google/go-github/v54/github"
"github.com/safedep/vet/pkg/common/logger"
"github.com/safedep/vet/pkg/models"
)
const (
githubOrgReaderPerPageSize = 100
)
type GithubOrgReaderConfig struct {
OrganizationURL string
IncludeArchived bool
MaxRepositories int
}
type githubOrgReader struct {
client *github.Client
config *GithubOrgReaderConfig
scannedRepoCounter int
}
// NewGithubOrgReader creates a [PackageManifestReader] which enumerates
// a Github org, identifying repositories and scanning them using [githubReader]
func NewGithubOrgReader(client *github.Client,
config *GithubOrgReaderConfig) (PackageManifestReader, error) {
return &githubOrgReader{
client: client,
config: config,
scannedRepoCounter: 0,
}, nil
}
func (p *githubOrgReader) Name() string {
return "Github Organization Package Manifest Reader"
}
func (p *githubOrgReader) EnumManifests(handler func(*models.PackageManifest,
PackageReader) error) error {
ctx := context.Background()
gitOrg, err := githubOrgFromURL(p.config.OrganizationURL)
if err != nil {
return err
}
listOptions := &github.ListOptions{
Page: 0,
PerPage: githubOrgReaderPerPageSize,
}
for {
if err := ctx.Err(); err != nil {
logger.Errorf("Context error: %v", err)
break
}
if p.isRepoLimitReached() {
logger.Infof("Stopping repository enumeration due to max %d limit reached",
p.config.MaxRepositories)
break
}
repositories, resp, err := p.client.Repositories.ListByOrg(ctx, gitOrg,
&github.RepositoryListByOrgOptions{
ListOptions: *listOptions,
})
if err != nil {
logger.Errorf("Failed to list Github org: %v", err)
break
}
logger.Infof("Enumerated %d repositories with page: %d and next page: %d",
len(repositories), listOptions.Page, resp.NextPage)
err = p.handleRepositoryBatch(repositories, handler)
if err != nil {
logger.Errorf("Failed to handle repository batch: %v", err)
break
}
if resp.NextPage == 0 {
break
}
listOptions.Page = resp.NextPage
}
return nil
}
func (p *githubOrgReader) isRepoLimitReached() bool {
return (p.config.MaxRepositories != 0) &&
(p.scannedRepoCounter >= p.config.MaxRepositories)
}
// withIncrementedRepoCount executes fn while incrementing the repository
// count. It returns a boolean indicating if repo count is reached
func (p *githubOrgReader) withIncrementedRepoCount(fn func()) bool {
fn()
p.scannedRepoCounter = p.scannedRepoCounter + 1
return p.isRepoLimitReached()
}
func (p *githubOrgReader) handleRepositoryBatch(repositories []*github.Repository,
handler PackageManifestHandlerFn) error {
var repoUrls []string
for _, repo := range repositories {
breach := p.withIncrementedRepoCount(func() {
repoUrls = append(repoUrls, repo.GetCloneURL())
})
if breach {
break
}
}
if len(repoUrls) == 0 {
return nil
}
githubReader, err := NewGithubReader(p.client, repoUrls, "")
if err != nil {
return err
}
return githubReader.EnumManifests(handler)
}
// Making this exposed so that we can test this independently
func githubOrgFromURL(githubUrl string) (string, error) {
u, err := url.Parse(githubUrl)
if err != nil {
return "", err
}
// Handling special case which is acceptable to url.Parse
if u.Scheme == "" {
return "", errors.New("rejecting URL without a scheme")
}
parts := strings.Split(u.Path, "/")
if len(parts) < 2 || parts[1] == "" {
return "", errors.New("rejecting URL without an org")
}
return parts[1], nil
}

View File

@ -0,0 +1,61 @@
package readers
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
)
func TestOrgFromURL(t *testing.T) {
cases := []struct {
name string
url string
org string
err error
}{
{
"URL is invalid",
"aaaa",
"",
errors.New("rejecting URL without a scheme"),
},
{
"URL does not have org",
"https://github.com/",
"",
errors.New("rejecting URL without an org"),
},
{
"URL does not have org slash",
"https://github.com",
"",
errors.New("rejecting URL without an org"),
},
{
"URL has org",
"https://github.com/org1",
"org1",
nil,
},
{
"URL has org++",
"https://github.com/org1/repo.git?x=1",
"org1",
nil,
},
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
org, err := githubOrgFromURL(test.url)
if test.err != nil {
assert.ErrorContains(t, err, test.err.Error())
} else {
assert.Equal(t, test.org, org)
}
})
}
}

View File

@ -51,6 +51,8 @@ func (p *githubReader) EnumManifests(handler func(*models.PackageManifest,
// github urls, which we may while scanning an entire org, we want to make
// as much progress as possible while logging errors
for _, github_url := range p.github_urls {
logger.Debugf("Processing Github URL: %s", github_url)
gitURL, err := giturl.NewGitURL(github_url)
if err != nil {
logger.Errorf("Failed to parse Github URL: %s due to %v", github_url, err)

View File

@ -7,6 +7,8 @@ package readers
import "github.com/safedep/vet/pkg/models"
type PackageManifestHandlerFn func(*models.PackageManifest, PackageReader) error
// Contract for implementing package manifest readers such as lockfile parser,
// SBOM parser etc. Reader should stop enumeration and return error if handler
// returns an error

View File

@ -13,20 +13,34 @@ type ScannerCallbackErrArgFn func(error)
type ScannerCallbackNoArgFn func()
type ScannerCallbacks struct {
OnStart ScannerCallbackOnManifestsFn
OnStartManifest ScannerCallbackOnManifestFn
OnStartPackage ScannerCallbackOnPackageFn
OnAddTransitivePackage ScannerCallbackOnPackageFn
OnDonePackage ScannerCallbackOnPackageFn
OnDoneManifest ScannerCallbackOnManifestFn
BeforeFinish ScannerCallbackNoArgFn
OnStop ScannerCallbackErrArgFn
OnStartEnumerateManifest ScannerCallbackNoArgFn // Manifest enumeration is starting
OnEnumerateManifest ScannerCallbackOnManifestFn // A manifest is read by reader
OnStart ScannerCallbackOnManifestsFn // Manifest scan phase is starting
OnStartManifest ScannerCallbackOnManifestFn // A manifest is starting to be scanned
OnStartPackage ScannerCallbackOnPackageFn // A package analysis is starting
OnAddTransitivePackage ScannerCallbackOnPackageFn // A transitive dependency is discovered
OnDonePackage ScannerCallbackOnPackageFn // A package analysis is finished
OnDoneManifest ScannerCallbackOnManifestFn // A manifest analysis is finished
BeforeFinish ScannerCallbackNoArgFn // Scan is about to finish
OnStop ScannerCallbackErrArgFn // Scan is finished
}
func (s *packageManifestScanner) WithCallbacks(callbacks ScannerCallbacks) {
s.callbacks = callbacks
}
func (s *packageManifestScanner) dispatchStartManifestEnumeration() {
if s.callbacks.OnStartEnumerateManifest != nil {
s.callbacks.OnStartEnumerateManifest()
}
}
func (s *packageManifestScanner) dispatchOnManifestEnumeration(manifest *models.PackageManifest) {
if s.callbacks.OnEnumerateManifest != nil {
s.callbacks.OnEnumerateManifest(manifest)
}
}
func (s *packageManifestScanner) dispatchOnStart(manifests []*models.PackageManifest) {
if s.callbacks.OnStart != nil {
s.callbacks.OnStart(manifests)

View File

@ -48,10 +48,14 @@ func (s *packageManifestScanner) Start() error {
// of progress update depends on it
var manifests []*models.PackageManifest
s.dispatchStartManifestEnumeration()
for _, reader := range s.readers {
err := reader.EnumManifests(func(manifest *models.PackageManifest,
_ readers.PackageReader) error {
s.dispatchOnManifestEnumeration(manifest)
manifests = append(manifests, manifest)
return nil
})

View File

@ -23,7 +23,8 @@ filters:
licenses.exists(p, p == "GPL-2.0") ||
licenses.exists(p, p == "GPL-2.0-only") ||
licenses.exists(p, p == "GPL-3.0") ||
licenses.exists(p, p == "GPL-3.0-only")
licenses.exists(p, p == "GPL-3.0-only") ||
licenses.exists(p, p == "BSD-3-Clause OR GPL-2.0")
- name: ossf-unmaintained
check_type: CheckTypeMaintenance
summary: Component appears to be unmaintained

37
scan.go
View File

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"github.com/google/go-github/v54/github"
"github.com/safedep/dry/utils"
"github.com/safedep/vet/internal/auth"
"github.com/safedep/vet/internal/connect"
@ -24,6 +25,8 @@ var (
baseDirectory string
purlSpec string
githubRepoUrls []string
githubOrgUrl string
githubOrgMaxRepositories int
scanExclude []string
transitiveAnalysis bool
transitiveDepth int
@ -73,6 +76,10 @@ func newScanCommand() *cobra.Command {
"PURL to scan")
cmd.Flags().StringArrayVarP(&githubRepoUrls, "github", "", []string{},
"Github repository URL (Example: https://github.com/{org}/{repo})")
cmd.Flags().StringVarP(&githubOrgUrl, "github-org", "", "",
"Github organization URL (Example: https://github.com/safedep)")
cmd.Flags().IntVarP(&githubOrgMaxRepositories, "github-org-max-repo", "", 1000,
"Maximum number of repositories to process for the Github Org")
cmd.Flags().StringVarP(&lockfileAs, "lockfile-as", "", "",
"Parser to use for the lockfile (vet scan parsers to list)")
cmd.Flags().BoolVarP(&transitiveAnalysis, "transitive", "", false,
@ -155,6 +162,15 @@ func internalStartScan() error {
var reader readers.PackageManifestReader
var err error
githubClientBuilder := func() *github.Client {
githubClient, err := connect.GetGithubClient()
if err != nil {
logger.Fatalf("Failed to build Github client: %v", err)
}
return githubClient
}
// We can easily support both directory and lockfile reader. But current UX
// contract is to support one of them at a time. Lets not break the contract
// for now and figure out UX improvement later
@ -162,13 +178,19 @@ func internalStartScan() error {
// nolint:ineffassign,staticcheck
reader, err = readers.NewLockfileReader(lockfiles, lockfileAs)
} else if len(githubRepoUrls) > 0 {
githubClient, err := connect.GetGithubClient()
if err != nil {
logger.Fatalf("Failed to build Github client: %v", err)
}
githubClient := githubClientBuilder()
// nolint:ineffassign,staticcheck
reader, err = readers.NewGithubReader(githubClient, githubRepoUrls, lockfileAs)
} else if len(githubOrgUrl) > 0 {
githubClient := githubClientBuilder()
// nolint:ineffassign,staticcheck
reader, err = readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{
OrganizationURL: githubOrgUrl,
IncludeArchived: false,
MaxRepositories: githubOrgMaxRepositories,
})
} else if len(purlSpec) > 0 {
// nolint:ineffassign,staticcheck
reader, err = readers.NewPurlReader(purlSpec)
@ -299,6 +321,13 @@ func internalStartScan() error {
var packageTracker any
pmScanner.WithCallbacks(scanner.ScannerCallbacks{
OnStartEnumerateManifest: func() {
ui.PrintMsg("Starting to enumerate manifests")
},
OnEnumerateManifest: func(manifest *models.PackageManifest) {
ui.PrintSuccess("Discovered a manifest at %s with %d packages",
manifest.GetDisplayPath(), len(manifest.Packages))
},
OnStart: func(manifests []*models.PackageManifest) {
if !silentScan {
ui.StartProgressWriter()

View File

@ -0,0 +1,43 @@
package test
import (
"os"
"testing"
"github.com/safedep/vet/internal/connect"
"github.com/safedep/vet/pkg/models"
"github.com/safedep/vet/pkg/readers"
"github.com/stretchr/testify/assert"
)
func TestGithubOrgReaderWithSafeDepOrg(t *testing.T) {
verifyE2E(t)
t.Run("Test Reader using SafeDep Github Org without auth", func(t *testing.T) {
githubToken := os.Getenv("GITHUB_TOKEN")
t.Cleanup(func() {
os.Setenv("GITHUB_TOKEN", githubToken)
})
os.Setenv("GITHUB_TOKEN", "")
githubClient, err := connect.GetGithubClient()
assert.Nil(t, err)
githubOrgReader, err := readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{
OrganizationURL: "https://github.com/safedep",
MaxRepositories: 5,
})
assert.Nil(t, err)
var manifests []*models.PackageManifest
err = githubOrgReader.EnumManifests(func(pm *models.PackageManifest, pr readers.PackageReader) error {
manifests = append(manifests, pm)
return nil
})
assert.Nil(t, err)
assert.Greater(t, len(manifests), 0)
})
}