Support exclusion patterns for lockfiles flag (#543)

* introduce config for lockfile reader

* add exclusion support

* add test cases for exclusion patterns

* refactor: introduce common exclusion matcher and update lockfile reader to use it

* chore: rm print statements

* refactor: use better naming for tests

* use doublestar lib for supporting dir reader exclusion patterns

* fix: path handling in exclusion matcher to support relative & absolute paths
This commit is contained in:
Sahil Bansal 2025-07-22 08:37:41 +05:30 committed by GitHub
parent 3d6d8ed036
commit 150cad94a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 238 additions and 61 deletions

2
go.mod
View File

@ -143,7 +143,7 @@ require (
github.com/bkielbasa/cyclop v1.2.3 // indirect
github.com/blizzy78/varnamelen v0.8.0 // indirect
github.com/bmatcuk/doublestar v1.3.4 // indirect
github.com/bmatcuk/doublestar/v4 v4.8.1 // indirect
github.com/bmatcuk/doublestar/v4 v4.9.0 // indirect
github.com/boltdb/bolt v1.3.1 // indirect
github.com/bombsimon/wsl/v4 v4.5.0 // indirect
github.com/breml/bidichk v0.3.2 // indirect

2
go.sum
View File

@ -277,6 +277,8 @@ github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQ
github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
github.com/bmatcuk/doublestar/v4 v4.8.1 h1:54Bopc5c2cAvhLRAzqOGCYHYyhcDHsFF4wWIR5wKP38=
github.com/bmatcuk/doublestar/v4 v4.8.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
github.com/bmatcuk/doublestar/v4 v4.9.0 h1:DBvuZxjdKkRP/dr4GVV4w2fnmrk5Hxc90T51LZjv0JA=
github.com/bmatcuk/doublestar/v4 v4.9.0/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4=
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=

View File

@ -1,3 +1,51 @@
package readers
import (
"path/filepath"
"github.com/bmatcuk/doublestar/v4"
"github.com/safedep/vet/pkg/common/logger"
)
const defaultApplicationName = "vet-scanned-project"
type exclusionMatcher struct {
Exclusions []string
}
func newPathExclusionMatcher(exclusions []string) *exclusionMatcher {
return &exclusionMatcher{
Exclusions: exclusions,
}
}
func (ex *exclusionMatcher) Match(term string) bool {
for _, exclusionPattern := range ex.Exclusions {
// Try matching in current form first
if m, err := doublestar.Match(exclusionPattern, term); err == nil && m {
return true
}
// If term is relative and pattern is absolute, convert term to absolute
if !filepath.IsAbs(term) && filepath.IsAbs(exclusionPattern) {
if abs, err := filepath.Abs(term); err == nil {
if m, err := doublestar.Match(exclusionPattern, abs); err == nil && m {
return true
}
}
}
// If term is absolute and pattern is relative, convert pattern to absolute
if filepath.IsAbs(term) && !filepath.IsAbs(exclusionPattern) {
if abs, err := filepath.Abs(exclusionPattern); err == nil {
if m, err := doublestar.Match(abs, term); err == nil && m {
return true
}
}
}
logger.Debugf("No match for pattern '%s' against '%s'", exclusionPattern, term)
}
return false
}

107
pkg/readers/common_test.go Normal file
View File

@ -0,0 +1,107 @@
package readers
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestExcludedPath(t *testing.T) {
cases := []struct {
name string
path string
patterns []string
shouldBeExcluded bool
}{
{
name: "No exclusions",
path: "package-lock.json",
patterns: []string{},
shouldBeExcluded: false,
},
{
name: "Simple glob match",
path: "vendor/package-lock.json",
patterns: []string{"vendor/*"},
shouldBeExcluded: true,
},
{
name: "Multiple patterns with match",
path: "test/yarn.lock",
patterns: []string{
"vendor/*",
"test/*",
"node_modules/*",
},
shouldBeExcluded: true,
},
{
name: "Multiple patterns without match",
path: "src/package-lock.json",
patterns: []string{
"vendor/*",
"test/*",
"node_modules/*",
},
shouldBeExcluded: false,
},
{
name: "Invalid pattern character",
path: "package-lock.json",
patterns: []string{"["},
shouldBeExcluded: false,
},
{
name: "Subdirectory with match",
path: "pkg/readers/fixtures/requirements.txt",
patterns: []string{"pkg/readers/*/**"},
shouldBeExcluded: true,
},
{
name: "Subdirectory without match",
path: "pkg/readers/fixtures/requirements.txt",
patterns: []string{"pkg/readers/*"},
shouldBeExcluded: false,
},
{
name: "Single character wildcard",
path: "test/a.json",
patterns: []string{"test/?.json"},
shouldBeExcluded: true,
},
{
name: "Character class match",
path: "test-123/package.json",
patterns: []string{"test-[0-9]*/package.json"},
shouldBeExcluded: true,
},
{
name: "matches wildcard with missing characters in filename",
path: "pom.xml",
patterns: []string{"p*.xml"},
shouldBeExcluded: true,
},
{
name: "matches wildcard across nested subdirectories",
path: "pkg/readers/fixtures/requirements.txt",
patterns: []string{"pkg/readers/**/*.txt"},
shouldBeExcluded: true,
},
{
name: "should exclude deeply nested file with recursive glob",
path: "dir1/subdirA/subdirB/requirements.txt",
patterns: []string{"**/requirements.txt"},
shouldBeExcluded: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
matcher := newPathExclusionMatcher(tc.patterns)
result := matcher.Match(tc.path)
assert.Equal(t, tc.shouldBeExcluded, result,
"Expected path.Match to return %v for path %s with patterns %v",
tc.shouldBeExcluded, tc.path, tc.patterns)
})
}
}

View File

@ -3,7 +3,6 @@ package readers
import (
"os"
"path/filepath"
"regexp"
"strings"
"github.com/safedep/vet/pkg/common/logger"
@ -15,7 +14,7 @@ type DirectoryReaderConfig struct {
// Path to enumerate
Path string
// Exclusions are regex patterns to ignore paths
// Exclusions are glob patterns to ignore paths
Exclusions []string
// Explicitly walk for the given manifest type. If this is empty
@ -31,7 +30,7 @@ type directoryReader struct {
// NewDirectoryReader creates a [PackageManifestReader] that can scan a directory
// for package manifests while honoring exclusion rules. This reader will log
// and ignore parser failure. But it will fail in case the manifest handler
// returns an error. Exclusion strings are treated as regex patterns and applied
// returns an error. Exclusion strings are treated as glob patterns and applied
// on the absolute file path discovered while talking the directory.
func NewDirectoryReader(config DirectoryReaderConfig) (PackageManifestReader, error) {
return &directoryReader{
@ -54,6 +53,8 @@ func (p *directoryReader) ApplicationName() (string, error) {
func (p *directoryReader) EnumManifests(handler func(*models.PackageManifest,
PackageReader) error,
) error {
exclusionMatcher := newPathExclusionMatcher(p.config.Exclusions)
err := filepath.WalkDir(p.config.Path, func(path string, info os.DirEntry, err error) error {
if err != nil {
return err
@ -69,7 +70,7 @@ func (p *directoryReader) EnumManifests(handler func(*models.PackageManifest,
return err
}
if p.excludedPath(path) {
if exclusionMatcher.Match(path) {
logger.Debugf("Ignoring excluded path: %s", path)
return filepath.SkipDir
}
@ -104,23 +105,6 @@ func (p *directoryReader) EnumManifests(handler func(*models.PackageManifest,
return err
}
// TODO: Build a precompiled cache of regex patterns
func (p *directoryReader) excludedPath(path string) bool {
for _, pattern := range p.config.Exclusions {
m, err := regexp.MatchString(pattern, path)
if err != nil {
logger.Warnf("Invalid regex pattern: %s: %v", pattern, err)
continue
}
if m {
return true
}
}
return false
}
func (p *directoryReader) ignorableDirectory(name string) bool {
dirs := []string{
".git",

View File

@ -97,7 +97,7 @@ func TestDirectoryReaderEnumPackages(t *testing.T) {
{
"Directory enumeration with exclusion patterns",
"./fixtures/multi-with-invalid",
[]string{"requirements.txt"},
[]string{"**/requirements.txt"},
"multi-with-invalid",
1,
// for pom.xml we have 4 dependencies = 1 Direct + 3 Transitive Dependencies
@ -192,26 +192,14 @@ func TestDirectoryReaderExcludedPath(t *testing.T) {
"not.json",
},
{
"Regular Expression Match 1",
[]string{"^f[a-z]+.json$"},
"file.json",
"file.txt",
"match json in specific recursive subdirectory",
[]string{"docs/a/**/*.json"},
"docs/a/sample.json",
"docs/b/sample.json",
},
{
"Regular Expression Match 2",
[]string{"^f[a-z]+.json$"},
"file.json",
"afile.json",
},
{
"Regular Expression Match 3",
[]string{"^f[a-z]+.json$"},
"file.json",
"file.jsons",
},
{
"Subdirectory Match",
[]string{"docs\\/a\\/.*\\.json"},
"match full path json in specific recursive subdirectory",
[]string{"**/docs/a/**/*.json"},
"/a/b/docs/a/sample.json",
"/a/b/docs/b/sample.json",
},
@ -219,17 +207,13 @@ func TestDirectoryReaderExcludedPath(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
r, err := NewDirectoryReader(DirectoryReaderConfig{
Path: "test-path",
Exclusions: test.patterns,
})
assert.Nil(t, err)
m := newPathExclusionMatcher(test.patterns)
var ret bool
ret = r.(*directoryReader).excludedPath(test.matchInput)
ret = m.Match(test.matchInput)
assert.True(t, ret)
ret = r.(*directoryReader).excludedPath(test.noMatchInput)
ret = m.Match(test.noMatchInput)
assert.False(t, ret)
})
}

View File

@ -1,6 +1,7 @@
package readers
import (
"github.com/safedep/vet/pkg/common/logger"
"github.com/safedep/vet/pkg/models"
"github.com/safedep/vet/pkg/parser"
)
@ -10,18 +11,24 @@ const (
)
type lockfileReader struct {
lockfiles []string
lockfileAs string
config LockfileReaderConfig
}
type LockfileReaderConfig struct {
Lockfiles []string
LockfileAs string
// Exclusions are glob patterns to ignore paths
Exclusions []string
}
// NewLockfileReader creates a [PackageManifestReader] that can be used to read
// one or more `lockfiles` interpreted as `lockfileAs`. When `lockfileAs` is empty
// the parser auto-detects the format based on file name. This reader fails and
// returns an error on first error encountered while parsing lockfiles
func NewLockfileReader(lockfiles []string, lockfileAs string) (PackageManifestReader, error) {
func NewLockfileReader(config LockfileReaderConfig) (PackageManifestReader, error) {
return &lockfileReader{
lockfiles: lockfiles,
lockfileAs: lockfileAs,
config: config,
}, nil
}
@ -40,8 +47,14 @@ func (p *lockfileReader) ApplicationName() (string, error) {
func (p *lockfileReader) EnumManifests(handler func(*models.PackageManifest,
PackageReader) error,
) error {
for _, lf := range p.lockfiles {
rf, rt, err := parser.ResolveParseTarget(lf, p.lockfileAs,
exclusionMatcher := newPathExclusionMatcher(p.config.Exclusions)
for _, lf := range p.config.Lockfiles {
if exclusionMatcher.Match(lf) {
logger.Debugf("Ignoring excluded path: %s", lf)
continue
}
rf, rt, err := parser.ResolveParseTarget(lf, p.config.LockfileAs,
[]parser.TargetScopeType{parser.TargetScopeAll})
if err != nil {
return err

View File

@ -15,6 +15,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
// Input
lockfiles []string
lockfileAs string
exclusions []string
// Output
cbRet error
@ -28,6 +29,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"Single lockfile parse",
[]string{"./fixtures/java/gradle.lockfile"},
"", // Auto detect from name
[]string{},
nil,
nil,
1,
@ -40,6 +42,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"./fixtures/multi-with-invalid/requirements.txt",
},
"", // Auto detect from name
[]string{},
nil,
nil,
2,
@ -49,6 +52,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"Lockfile parse with non_standard name",
[]string{"./fixtures/custom-lockfiles/1-gradle.txt"},
"gradle.lockfile",
[]string{},
nil,
nil,
1,
@ -62,11 +66,25 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"./fixtures/java/gradle.lockfile",
},
"", // Auto detect from name
[]string{},
nil,
errors.New("invalid character"),
0,
[]int{13},
},
{
"Multiple lockfile parse with exclusion",
[]string{
"./fixtures/java/gradle.lockfile",
"./fixtures/multi-with-invalid/requirements.txt",
},
"", // Auto detect from name
[]string{"./fixtures/multi-with-invalid/requirements.txt"},
nil,
nil,
1,
[]int{3},
},
{
"Callback returns an error",
[]string{
@ -74,6 +92,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"./fixtures/java/gradle.lockfile",
},
"", // Auto detect from name
[]string{},
errors.New("callback error"),
errors.New("callback error"),
1,
@ -83,6 +102,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"Lockfile has non_standard name and no hint",
[]string{"./a.txt"},
"",
[]string{},
nil,
errors.New("no parser found"),
0,
@ -92,6 +112,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"Lockfile does not exists",
[]string{"./a.txt"},
"gradle.lockfile",
[]string{},
nil,
errors.New("no such file or directory"),
0,
@ -101,6 +122,7 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
"Duplicate packages with extras (GitHub issue #343)",
[]string{"./fixtures/duplicate-packages/requirements.txt"},
"",
[]string{},
nil,
nil,
1,
@ -110,7 +132,11 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
r, err := NewLockfileReader(test.lockfiles, test.lockfileAs)
r, err := NewLockfileReader(LockfileReaderConfig{
Lockfiles: test.lockfiles,
LockfileAs: test.lockfileAs,
Exclusions: test.exclusions,
})
assert.Nil(t, err)
manifestCount := 0
@ -142,7 +168,12 @@ func TestLockfileReaderEnumManifests(t *testing.T) {
func TestLockfileReaderDeduplication(t *testing.T) {
// Test specifically for GitHub issue #343 - duplicate packages with extras
t.Run("Deduplicates packages with extras syntax", func(t *testing.T) {
r, err := NewLockfileReader([]string{"./fixtures/duplicate-packages/requirements.txt"}, "")
r, err := NewLockfileReader(LockfileReaderConfig{
Lockfiles: []string{"./fixtures/duplicate-packages/requirements.txt"},
LockfileAs: "",
Exclusions: []string{},
})
assert.Nil(t, err)
var packages []*models.Package
@ -164,7 +195,7 @@ func TestLockfileReaderDeduplication(t *testing.T) {
assert.Contains(t, packageNames, "bleach")
assert.Equal(t, "3.1.2", packageNames["bleach"], "bleach should have explicit version 3.1.2")
// Verify requests has explicit version, not 0.0.0
// Verify requests has explicit version, not 0.0.0
assert.Contains(t, packageNames, "requests")
assert.Equal(t, "2.25.1", packageNames["requests"], "requests should have explicit version 2.25.1")

14
scan.go
View File

@ -124,7 +124,7 @@ func newScanCommand() *cobra.Command {
cmd.Flags().StringVarP(&baseDirectory, "directory", "D", wd,
"The directory to scan for package manifests")
cmd.Flags().StringArrayVarP(&scanExclude, "exclude", "", []string{},
"Name patterns to ignore while scanning a directory")
"Name patterns to ignore while scanning")
cmd.Flags().StringArrayVarP(&lockfiles, "lockfiles", "L", []string{},
"List of lockfiles to scan")
cmd.Flags().StringArrayVarP(&manifests, "manifests", "M", []string{},
@ -351,7 +351,11 @@ func internalStartScan() error {
analytics.TrackCommandScanPackageManifestScan()
// nolint:ineffassign,staticcheck
reader, err = readers.NewLockfileReader(lockfiles, manifestType)
reader, err = readers.NewLockfileReader(readers.LockfileReaderConfig{
Lockfiles: lockfiles,
LockfileAs: manifestType,
Exclusions: scanExclude,
})
} else if len(manifests) > 0 {
analytics.TrackCommandScanPackageManifestScan()
@ -361,7 +365,11 @@ func internalStartScan() error {
}
// nolint:ineffassign,staticcheck
reader, err = readers.NewLockfileReader(manifests, manifestType)
reader, err = readers.NewLockfileReader(readers.LockfileReaderConfig{
Lockfiles: manifests,
LockfileAs: manifestType,
Exclusions: scanExclude,
})
} else if len(githubRepoUrls) > 0 {
analytics.TrackCommandScanGitHubScan()