feat: Add UI integration for Github Org reader component

This commit is contained in:
abhisek 2023-11-12 10:07:51 +05:30
parent 535ee17642
commit b945fe36ba
No known key found for this signature in database
GPG Key ID: CB92A4990C02A88F
7 changed files with 267 additions and 5 deletions

View File

@ -0,0 +1,134 @@
package readers
import (
"context"
"errors"
"net/url"
"strings"
"github.com/google/go-github/v54/github"
"github.com/safedep/vet/pkg/common/logger"
"github.com/safedep/vet/pkg/models"
)
const (
githubOrgReaderPerPageSize = 100
)
type GithubOrgReaderConfig struct {
OrganizationURL string
IncludeArchived bool
MaxRepositories int
}
type githubOrgReader struct {
client *github.Client
config *GithubOrgReaderConfig
}
// NewGithubOrgReader creates a [PackageManifestReader] which enumerates
// a Github org, identifying repositories and scanning them using [githubReader]
func NewGithubOrgReader(client *github.Client,
config *GithubOrgReaderConfig) (PackageManifestReader, error) {
return &githubOrgReader{
client: client,
config: config,
}, nil
}
func (p *githubOrgReader) Name() string {
return "Github Organization Package Manifest Reader"
}
func (p *githubOrgReader) EnumManifests(handler func(*models.PackageManifest,
PackageReader) error) error {
ctx := context.Background()
gitOrg, err := githubOrgFromURL(p.config.OrganizationURL)
if err != nil {
return err
}
listOptions := &github.ListOptions{
Page: 0,
PerPage: githubOrgReaderPerPageSize,
}
enumeratedRepositories := 0
for {
if err := ctx.Err(); err != nil {
logger.Errorf("Context error: %v", err)
break
}
if (p.config.MaxRepositories != 0) && (enumeratedRepositories > p.config.MaxRepositories) {
logger.Infof("Stopping repository enumeration due to max %d limit reached",
p.config.MaxRepositories)
break
}
repositories, resp, err := p.client.Repositories.ListByOrg(ctx, gitOrg,
&github.RepositoryListByOrgOptions{
ListOptions: *listOptions,
})
if err != nil {
logger.Errorf("Failed to list Github org: %v", err)
break
}
logger.Infof("Enumerated %d repositories with page: %d and next page: %d",
len(repositories), listOptions.Page, resp.NextPage)
err = p.handleRepositoryBatch(repositories, handler)
if err != nil {
logger.Errorf("Failed to handle repository batch: %v", err)
break
}
if resp.NextPage == 0 {
break
}
enumeratedRepositories = enumeratedRepositories + len(repositories)
listOptions.Page = resp.NextPage
}
return nil
}
func (p *githubOrgReader) handleRepositoryBatch(repositories []*github.Repository,
handler PackageManifestHandlerFn) error {
var repoUrls []string
for _, repo := range repositories {
repoUrls = append(repoUrls, repo.GetCloneURL())
}
githubReader, err := NewGithubReader(p.client, repoUrls, "")
if err != nil {
return err
}
return githubReader.EnumManifests(handler)
}
// Making this exposed so that we can test this independently
func githubOrgFromURL(githubUrl string) (string, error) {
u, err := url.Parse(githubUrl)
if err != nil {
return "", err
}
// Handling special case which is acceptable to url.Parse
if u.Scheme == "" {
return "", errors.New("rejecting URL without a scheme")
}
parts := strings.Split(u.Path, "/")
if len(parts) < 2 || parts[1] == "" {
return "", errors.New("rejecting URL without an org")
}
return parts[1], nil
}

View File

@ -0,0 +1,61 @@
package readers
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
)
func TestOrgFromURL(t *testing.T) {
cases := []struct {
name string
url string
org string
err error
}{
{
"URL is invalid",
"aaaa",
"",
errors.New("rejecting URL without a scheme"),
},
{
"URL does not have org",
"https://github.com/",
"",
errors.New("rejecting URL without an org"),
},
{
"URL does not have org slash",
"https://github.com",
"",
errors.New("rejecting URL without an org"),
},
{
"URL has org",
"https://github.com/org1",
"org1",
nil,
},
{
"URL has org++",
"https://github.com/org1/repo.git?x=1",
"org1",
nil,
},
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
org, err := githubOrgFromURL(test.url)
if test.err != nil {
assert.ErrorContains(t, err, test.err.Error())
} else {
assert.Equal(t, test.org, org)
}
})
}
}

View File

@ -51,6 +51,8 @@ func (p *githubReader) EnumManifests(handler func(*models.PackageManifest,
// github urls, which we may while scanning an entire org, we want to make
// as much progress as possible while logging errors
for _, github_url := range p.github_urls {
logger.Debugf("Processing Github URL: %s", github_url)
gitURL, err := giturl.NewGitURL(github_url)
if err != nil {
logger.Errorf("Failed to parse Github URL: %s due to %v", github_url, err)

View File

@ -7,6 +7,8 @@ package readers
import "github.com/safedep/vet/pkg/models"
type PackageManifestHandlerFn func(*models.PackageManifest, PackageReader) error
// Contract for implementing package manifest readers such as lockfile parser,
// SBOM parser etc. Reader should stop enumeration and return error if handler
// returns an error

View File

@ -23,7 +23,8 @@ filters:
licenses.exists(p, p == "GPL-2.0") ||
licenses.exists(p, p == "GPL-2.0-only") ||
licenses.exists(p, p == "GPL-3.0") ||
licenses.exists(p, p == "GPL-3.0-only")
licenses.exists(p, p == "GPL-3.0-only") ||
licenses.exists(p, p == "BSD-3-Clause OR GPL-2.0")
- name: ossf-unmaintained
check_type: CheckTypeMaintenance
summary: Component appears to be unmaintained

30
scan.go
View File

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"github.com/google/go-github/v54/github"
"github.com/safedep/dry/utils"
"github.com/safedep/vet/internal/auth"
"github.com/safedep/vet/internal/connect"
@ -24,6 +25,8 @@ var (
baseDirectory string
purlSpec string
githubRepoUrls []string
githubOrgUrl string
githubOrgMaxRepositories int
scanExclude []string
transitiveAnalysis bool
transitiveDepth int
@ -73,6 +76,10 @@ func newScanCommand() *cobra.Command {
"PURL to scan")
cmd.Flags().StringArrayVarP(&githubRepoUrls, "github", "", []string{},
"Github repository URL (Example: https://github.com/{org}/{repo})")
cmd.Flags().StringVarP(&githubOrgUrl, "github-org", "", "",
"Github organization URL (Example: https://github.com/safedep)")
cmd.Flags().IntVarP(&githubOrgMaxRepositories, "github-org-max-repo", "", 1000,
"Maximum number of repositories to process for the Github Org")
cmd.Flags().StringVarP(&lockfileAs, "lockfile-as", "", "",
"Parser to use for the lockfile (vet scan parsers to list)")
cmd.Flags().BoolVarP(&transitiveAnalysis, "transitive", "", false,
@ -155,6 +162,15 @@ func internalStartScan() error {
var reader readers.PackageManifestReader
var err error
githubClientBuilder := func() *github.Client {
githubClient, err := connect.GetGithubClient()
if err != nil {
logger.Fatalf("Failed to build Github client: %v", err)
}
return githubClient
}
// We can easily support both directory and lockfile reader. But current UX
// contract is to support one of them at a time. Lets not break the contract
// for now and figure out UX improvement later
@ -162,13 +178,19 @@ func internalStartScan() error {
// nolint:ineffassign,staticcheck
reader, err = readers.NewLockfileReader(lockfiles, lockfileAs)
} else if len(githubRepoUrls) > 0 {
githubClient, err := connect.GetGithubClient()
if err != nil {
logger.Fatalf("Failed to build Github client: %v", err)
}
githubClient := githubClientBuilder()
// nolint:ineffassign,staticcheck
reader, err = readers.NewGithubReader(githubClient, githubRepoUrls, lockfileAs)
} else if len(githubOrgUrl) > 0 {
githubClient := githubClientBuilder()
// nolint:ineffassign,staticcheck
reader, err = readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{
OrganizationURL: githubOrgUrl,
IncludeArchived: false,
MaxRepositories: githubOrgMaxRepositories,
})
} else if len(purlSpec) > 0 {
// nolint:ineffassign,staticcheck
reader, err = readers.NewPurlReader(purlSpec)

View File

@ -0,0 +1,40 @@
package test
import (
"os"
"testing"
"github.com/safedep/vet/internal/connect"
"github.com/safedep/vet/pkg/models"
"github.com/safedep/vet/pkg/readers"
"github.com/stretchr/testify/assert"
)
func TestGithubOrgReaderWithSafeDepOrg(t *testing.T) {
verifyE2E(t)
t.Run("Test Reader using SafeDep Github Org without auth", func(t *testing.T) {
githubToken := os.Getenv("GITHUB_TOKEN")
t.Cleanup(func() {
os.Setenv("GITHUB_TOKEN", githubToken)
})
os.Setenv("GITHUB_TOKEN", "")
githubClient, err := connect.GetGithubClient()
assert.Nil(t, err)
githubOrgReader, err := readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{
OrganizationURL: "https://github.com/safedep",
MaxRepositories: 1,
})
assert.Nil(t, err)
err = githubOrgReader.EnumManifests(func(pm *models.PackageManifest, pr readers.PackageReader) error {
return nil
})
assert.Nil(t, err)
})
}