feat: Exclude Fork and Archive during GitHub Org Scan (#650)

* Only scan private repos under org based scan

* Only scan private repos under org based scan

* fix: Style and formatting issues

* Update scan.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Abhisek Datta <abhisek.datta@gmail.com>

* fix: Add tests for github org reader filters

---------

Signed-off-by: Abhisek Datta <abhisek.datta@gmail.com>
Co-authored-by: infosecwonderland <monika.talekar@ascenda.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Abhisek Datta 2025-12-09 22:14:49 +05:30 committed by GitHub
parent 33c4ca5059
commit 6acf08aec0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 266 additions and 39 deletions

View File

@ -23,6 +23,8 @@ type GithubOrgReaderConfig struct {
MaxRepositories int
SkipDependencyGraphAPI bool
ExcludeRepos []string
PrivateOnly bool
IncludeForks bool
}
type githubOrgReader struct {
@ -131,9 +133,9 @@ func (p *githubOrgReader) handleRepositoryBatch(repositories []*github.Repositor
var repoUrls []string
for _, repo := range repositories {
fullName := repo.GetFullName()
if githubIsExcludedRepo(fullName, p.config.ExcludeRepos) {
logger.Infof("Skipping excluded repo: %s", fullName)
excluded, reason := githubIsExcludedRepo(repo, p.config)
if excluded {
logger.Infof("Skipping github org repo %s: %s", repo.GetFullName(), reason)
continue
}
@ -162,8 +164,8 @@ func (p *githubOrgReader) handleRepositoryBatch(repositories []*github.Repositor
}
// Making this exposed so that we can test this independently
func githubOrgFromURL(githubUrl string) (string, error) {
u, err := url.Parse(githubUrl)
func githubOrgFromURL(githubURL string) (string, error) {
u, err := url.Parse(githubURL)
if err != nil {
return "", err
}
@ -181,19 +183,32 @@ func githubOrgFromURL(githubUrl string) (string, error) {
return parts[1], nil
}
// To exclude specific repo using github org scanner
func githubIsExcludedRepo(repoName string, excludedRepositories []string) bool {
if len(excludedRepositories) == 0 {
return false
// githubIsExcludedRepo checks if a repository should be excluded based on
// various criteria defined in the config. It returns a boolean indicating
// if the repo should be excluded and a reason string explaining why.
func githubIsExcludedRepo(repo *github.Repository, config *GithubOrgReaderConfig) (bool, string) {
fullName := repo.GetFullName()
if len(config.ExcludeRepos) > 0 {
logger.Debugf("Checking if repo %s is in exclusion list", fullName)
for _, ex := range config.ExcludeRepos {
if strings.TrimSpace(fullName) == strings.TrimSpace(ex) {
return true, "explicitly excluded"
}
logger.Debugf("Checking if repo %s is excluded", repoName)
for _, ex := range excludedRepositories {
if strings.TrimSpace(repoName) == strings.TrimSpace(ex) {
return true
}
}
return false
if !config.IncludeArchived && repo.GetArchived() {
return true, "archived"
}
if !config.IncludeForks && repo.GetFork() {
return true, "forked"
}
if config.PrivateOnly && !repo.GetPrivate() {
return true, "not private"
}
return false, ""
}

View File

@ -4,6 +4,8 @@ import (
"errors"
"testing"
"github.com/google/go-github/v70/github"
"github.com/safedep/dry/utils"
"github.com/stretchr/testify/assert"
)
@ -82,40 +84,239 @@ func TestGithubOrgReader(t *testing.T) {
func TestGithubIsExcludedRepo(t *testing.T) {
cases := []struct {
name string
fullName string
excluded []string
expectedVal bool
repo *github.Repository
config *GithubOrgReaderConfig
expectedExcl bool
expectedReason string
}{
{
name: "no excluded repo configured",
fullName: "x/y",
excluded: []string{},
expectedVal: false,
name: "no exclusion - default config",
repo: &github.Repository{
FullName: utils.PtrTo("org/repo1"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: false,
expectedReason: "",
},
{
name: "match excluded",
fullName: "x/y",
excluded: []string{"y/z", "x/y"},
expectedVal: true,
name: "explicitly excluded - exact match",
repo: &github.Repository{
FullName: utils.PtrTo("org/repo1"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{"org/repo1"},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: true,
expectedReason: "explicitly excluded",
},
{
name: "match with ignore whitespace",
fullName: "x/y",
excluded: []string{" x/y ", "b", "c"},
expectedVal: true,
name: "explicitly excluded - with whitespace",
repo: &github.Repository{
FullName: utils.PtrTo("org/repo1"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{" org/repo1 ", "org/other"},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: true,
expectedReason: "explicitly excluded",
},
{
name: "no match",
fullName: "x/u",
excluded: []string{"x/y"},
expectedVal: false,
name: "not in exclusion list",
repo: &github.Repository{
FullName: utils.PtrTo("org/repo1"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{"org/repo2", "org/repo3"},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: false,
expectedReason: "",
},
{
name: "archived repo excluded when IncludeArchived is false",
repo: &github.Repository{
FullName: utils.PtrTo("org/archived-repo"),
Archived: utils.PtrTo(true),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: false,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: true,
expectedReason: "archived",
},
{
name: "archived repo included when IncludeArchived is true",
repo: &github.Repository{
FullName: utils.PtrTo("org/archived-repo"),
Archived: utils.PtrTo(true),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: false,
expectedReason: "",
},
{
name: "forked repo excluded when IncludeForks is false",
repo: &github.Repository{
FullName: utils.PtrTo("org/forked-repo"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(true),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: false,
PrivateOnly: false,
},
expectedExcl: true,
expectedReason: "forked",
},
{
name: "forked repo included when IncludeForks is true",
repo: &github.Repository{
FullName: utils.PtrTo("org/forked-repo"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(true),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: false,
expectedReason: "",
},
{
name: "public repo excluded when PrivateOnly is true",
repo: &github.Repository{
FullName: utils.PtrTo("org/public-repo"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: true,
},
expectedExcl: true,
expectedReason: "not private",
},
{
name: "private repo included when PrivateOnly is true",
repo: &github.Repository{
FullName: utils.PtrTo("org/private-repo"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(true),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: true,
},
expectedExcl: false,
expectedReason: "",
},
{
name: "public repo included when PrivateOnly is false",
repo: &github.Repository{
FullName: utils.PtrTo("org/public-repo"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: false,
expectedReason: "",
},
{
name: "multiple exclusion criteria - archived and forked",
repo: &github.Repository{
FullName: utils.PtrTo("org/archived-fork"),
Archived: utils.PtrTo(true),
Fork: utils.PtrTo(true),
Private: utils.PtrTo(false),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{},
IncludeArchived: false,
IncludeForks: false,
PrivateOnly: false,
},
expectedExcl: true,
expectedReason: "archived", // First check that matches
},
{
name: "explicit exclusion takes precedence",
repo: &github.Repository{
FullName: utils.PtrTo("org/excluded-repo"),
Archived: utils.PtrTo(false),
Fork: utils.PtrTo(false),
Private: utils.PtrTo(true),
},
config: &GithubOrgReaderConfig{
ExcludeRepos: []string{"org/excluded-repo"},
IncludeArchived: true,
IncludeForks: true,
PrivateOnly: false,
},
expectedExcl: true,
expectedReason: "explicitly excluded",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
ret := githubIsExcludedRepo(tc.fullName, tc.excluded)
assert.Equal(t, tc.expectedVal, ret)
excluded, reason := githubIsExcludedRepo(tc.repo, tc.config)
assert.Equal(t, tc.expectedExcl, excluded, "Expected exclusion status mismatch")
assert.Equal(t, tc.expectedReason, reason, "Expected exclusion reason mismatch")
})
}
}

13
scan.go
View File

@ -46,6 +46,9 @@ var (
githubOrgMaxRepositories int
githubOrgExcludedRepos []string
githubSkipDependencyGraphAPI bool
githubOrgIncludeArchived bool
githubOrgIncludeForks bool
githubOrgPrivateOnly bool
scanExclude []string
transitiveAnalysis bool
transitiveDepth int
@ -150,6 +153,12 @@ func newScanCommand() *cobra.Command {
"Maximum number of repositories to process for the Github Org")
cmd.Flags().StringArrayVarP(&githubOrgExcludedRepos, "github-org-exclude-repos", "", []string{},
"Comma-separated list of GitHub repos to exclude during org scan (format: org/repo1,org/repo2)")
cmd.Flags().BoolVarP(&githubOrgIncludeArchived, "github-org-include-archived", "", false,
"Include archived repositories when scanning a GitHub organization")
cmd.Flags().BoolVarP(&githubOrgIncludeForks, "github-org-include-forks", "", false,
"Include forked repositories when scanning a GitHub organization")
cmd.Flags().BoolVarP(&githubOrgPrivateOnly, "github-org-private-only", "", false,
"Only scan private repositories in the GitHub organization")
cmd.Flags().BoolVarP(&githubSkipDependencyGraphAPI, "skip-github-dependency-graph-api", "", false,
"Do not use GitHub Dependency Graph API to fetch dependencies")
cmd.Flags().StringVarP(&lockfileAs, "lockfile-as", "", "",
@ -415,10 +424,12 @@ func internalStartScan() error {
// nolint:ineffassign,staticcheck
reader, err = readers.NewGithubOrgReader(githubClient, &readers.GithubOrgReaderConfig{
OrganizationURL: githubOrgUrl,
IncludeArchived: false,
IncludeArchived: githubOrgIncludeArchived,
MaxRepositories: githubOrgMaxRepositories,
SkipDependencyGraphAPI: githubSkipDependencyGraphAPI,
ExcludeRepos: githubOrgExcludedRepos,
IncludeForks: githubOrgIncludeForks,
PrivateOnly: githubOrgPrivateOnly,
})
} else if len(purlSpec) > 0 {
analytics.TrackCommandScanPurlScan()