feat: Add support for malysis min confidence config (#429)

* feat: Add support for malysis min confidence config * fix: Test case to use factory function
2025-12-10 00:22:08 -06:00 · 2025-03-26 14:07:40 +05:30 · 2025-03-26 14:07:40 +05:30 · f6258fdc86
commit f6258fdc86
parent 635baeb86e
3 changed files with 185 additions and 66 deletions
--- a/pkg/analyzer/malware.go
+++ b/pkg/analyzer/malware.go
@ -2,6 +2,7 @@ package analyzer

 import (
 	"fmt"
+	"strings"
 	"sync"

 	malysisv1 "buf.build/gen/go/safedep/api/protocolbuffers/go/safedep/messages/malysis/v1"
@ -19,6 +20,13 @@ type MalwareAnalyzerConfig struct {

 	// Fail fast on malware detection
 	FailFast bool
+
+	// Minimum confidence level for malicious package analysis result to fail fast
+	// Should be HIGH, MEDIUM or LOW
+	MinimumConfidence string
+
+	// Internally mapped confidence level as per protobuf spec
+	minimumConfidenceLevel malysisv1.Report_Evidence_Confidence
 }

 func DefaultMalwareAnalyzerConfig() MalwareAnalyzerConfig {
@ -36,6 +44,17 @@ type malwareAnalyzer struct {
 var _ Analyzer = (*malwareAnalyzer)(nil)

 func NewMalwareAnalyzer(config MalwareAnalyzerConfig) (*malwareAnalyzer, error) {
+	config.minimumConfidenceLevel = malysisv1.Report_Evidence_CONFIDENCE_HIGH
+	if config.MinimumConfidence != "" {
+		confidenceName := fmt.Sprintf("CONFIDENCE_%s", strings.ToUpper(config.MinimumConfidence))
+		conf, ok := malysisv1.Report_Evidence_Confidence_value[confidenceName]
+		if !ok {
+			return nil, fmt.Errorf("invalid minimum confidence level: %s", config.MinimumConfidence)
+		}
+
+		config.minimumConfidenceLevel = malysisv1.Report_Evidence_Confidence(conf)
+	}
+
 	return &malwareAnalyzer{
 		config: config,
 	}, nil
@ -46,7 +65,8 @@ func (a *malwareAnalyzer) Name() string {
 }

 func (a *malwareAnalyzer) Analyze(manifest *models.PackageManifest,
-	handler AnalyzerEventHandler) error {
+	handler AnalyzerEventHandler,
+) error {
 	return readers.NewManifestModelReader(manifest).EnumPackages(func(pkg *models.Package) error {
 		err := a.applyMalwareDecision(pkg)
 		if err != nil {
@ -84,7 +104,6 @@ func (a *malwareAnalyzer) Analyze(manifest *models.PackageManifest,
 				Tags:        []string{"malware-analysis"},
 			},
 		})
-
 		if err != nil {
 			logger.Errorf("MalwareAnalyzer: Failed to handle filter event for package %s/%s/%s: %v",
 				pkg.GetControlTowerSpecEcosystem(), pkg.GetName(), pkg.GetVersion(), err)
@ -138,16 +157,27 @@ func (a *malwareAnalyzer) applyMalwareDecision(pkg *models.Package) error {
 		return nil
 	}

-	if a.config.TrustAutomatedAnalysis &&
-		report.GetInference().GetConfidence() == malysisv1.Report_Evidence_CONFIDENCE_HIGH {
+	// By default we do not trust results without a verification record
+	// unless the config is set to trust automated analysis and a minimum confidence is set
+	if a.config.TrustAutomatedAnalysis && a.hasMinimumConfidence(report) {
 		logger.Warnf("MalwareAnalyzer: Package %s/%s/%s is classified as malware",
 			pkg.GetControlTowerSpecEcosystem(), pkg.GetName(), pkg.GetVersion())
 		ma.IsMalware = true
+		return nil
 	}

-	logger.Warnf("MalwareAnalyzer: Package %s/%s/%s is classified as malware with low confidence",
+	logger.Warnf("MalwareAnalyzer: Package %s/%s/%s is classified as suspicious due to low confidence",
 		pkg.GetControlTowerSpecEcosystem(), pkg.GetName(), pkg.GetVersion())
 	ma.IsSuspicious = true
-
 	return nil
 }
+
+func (a *malwareAnalyzer) hasMinimumConfidence(report *malysisv1.Report) bool {
+	confidence := report.GetInference().GetConfidence()
+	if confidence == malysisv1.Report_Evidence_CONFIDENCE_UNSPECIFIED {
+		return false
+	}
+
+	// Confidence is a protobuf enum, so we need to compare the integer values
+	return confidence <= a.config.minimumConfidenceLevel
+}
--- a/pkg/analyzer/malware_test.go
+++ b/pkg/analyzer/malware_test.go
@ -8,6 +8,89 @@ import (
 	"github.com/stretchr/testify/assert"
 )

+func TestNewMalwareAnalyzerHasMinimumConfidence(t *testing.T) {
+	cases := []struct {
+		name              string
+		config            MalwareAnalyzerConfig
+		expectedConfLevel malysisv1.Report_Evidence_Confidence
+		reports           []struct {
+			confidence malysisv1.Report_Evidence_Confidence
+			expected   bool
+		}
+		wantError bool
+	}{
+		{
+			name:              "when minimum confidence is not set",
+			config:            MalwareAnalyzerConfig{},
+			expectedConfLevel: malysisv1.Report_Evidence_CONFIDENCE_HIGH,
+			reports: []struct {
+				confidence malysisv1.Report_Evidence_Confidence
+				expected   bool
+			}{
+				{malysisv1.Report_Evidence_CONFIDENCE_MEDIUM, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_UNSPECIFIED, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_LOW, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_HIGH, true},
+			},
+		},
+		{
+			name:              "when minimum confidence is set to HIGH",
+			config:            MalwareAnalyzerConfig{MinimumConfidence: "HIGH"},
+			expectedConfLevel: malysisv1.Report_Evidence_CONFIDENCE_HIGH,
+			reports: []struct {
+				confidence malysisv1.Report_Evidence_Confidence
+				expected   bool
+			}{
+				{malysisv1.Report_Evidence_CONFIDENCE_UNSPECIFIED, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_LOW, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_MEDIUM, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_HIGH, true},
+			},
+		},
+		{
+			name:              "when minimum confidence is set to MEDIUM",
+			config:            MalwareAnalyzerConfig{MinimumConfidence: "MEDIUM"},
+			expectedConfLevel: malysisv1.Report_Evidence_CONFIDENCE_MEDIUM,
+			reports: []struct {
+				confidence malysisv1.Report_Evidence_Confidence
+				expected   bool
+			}{
+				{malysisv1.Report_Evidence_CONFIDENCE_LOW, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_UNSPECIFIED, false},
+				{malysisv1.Report_Evidence_CONFIDENCE_MEDIUM, true},
+				{malysisv1.Report_Evidence_CONFIDENCE_HIGH, true},
+			},
+		},
+		{
+			name:      "when invalid minimum confidence is set",
+			config:    MalwareAnalyzerConfig{MinimumConfidence: "INVALID"},
+			wantError: true,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			analyzer, err := NewMalwareAnalyzer(tc.config)
+			if tc.wantError {
+				assert.Error(t, err)
+				assert.Nil(t, analyzer)
+				return
+			}
+
+			assert.NoError(t, err)
+			assert.Equal(t, tc.expectedConfLevel, analyzer.config.minimumConfidenceLevel)
+
+			for _, report := range tc.reports {
+				assert.Equal(t, report.expected, analyzer.hasMinimumConfidence(&malysisv1.Report{
+					Inference: &malysisv1.Report_Inference{
+						Confidence: report.confidence,
+					},
+				}))
+			}
+		})
+	}
+}
+
 func TestMalwareAnalyzerDecision(t *testing.T) {
 	pkgDetail := models.NewPackageDetail(models.EcosystemNpm, "test", "1.0.0")
 	pkgManifest := models.NewPackageManifestFromLocal("test", models.EcosystemNpm)
@ -26,7 +109,7 @@ func TestMalwareAnalyzerDecision(t *testing.T) {
 				PackageDetails: pkgDetail,
 			},
 			assert: func(pkg *models.Package) {
-				assert.Nil(t, pkg.GetMalwareAnalysisResult())
+				assert.Nil(t, pkg.GetMalwareAnalysisResult(), "should not have malware analysis result")
 			},
 		},
 		{
@ -47,7 +130,7 @@ func TestMalwareAnalyzerDecision(t *testing.T) {
 				},
 			},
 			assert: func(pkg *models.Package) {
-				assert.True(t, pkg.GetMalwareAnalysisResult().IsMalware)
+				assert.True(t, pkg.GetMalwareAnalysisResult().IsMalware, "should be malware")
 			},
 		},
 		{
@ -65,8 +148,8 @@ func TestMalwareAnalyzerDecision(t *testing.T) {
 				},
 			},
 			assert: func(pkg *models.Package) {
-				assert.False(t, pkg.GetMalwareAnalysisResult().IsMalware)
-				assert.True(t, pkg.GetMalwareAnalysisResult().IsSuspicious)
+				assert.False(t, pkg.GetMalwareAnalysisResult().IsMalware, "should not be malware")
+				assert.True(t, pkg.GetMalwareAnalysisResult().IsSuspicious, "should be suspicious")
 			},
 		},
 		{
@ -85,7 +168,7 @@ func TestMalwareAnalyzerDecision(t *testing.T) {
 				},
 			},
 			assert: func(pkg *models.Package) {
-				assert.True(t, pkg.GetMalwareAnalysisResult().IsMalware)
+				assert.True(t, pkg.GetMalwareAnalysisResult().IsMalware, "should be malware")
 			},
 		},
 		{
@ -104,16 +187,18 @@ func TestMalwareAnalyzerDecision(t *testing.T) {
 				},
 			},
 			assert: func(pkg *models.Package) {
-				assert.False(t, pkg.GetMalwareAnalysisResult().IsMalware)
-				assert.True(t, pkg.GetMalwareAnalysisResult().IsSuspicious)
+				assert.False(t, pkg.GetMalwareAnalysisResult().IsMalware, "should not be malware")
+				assert.True(t, pkg.GetMalwareAnalysisResult().IsSuspicious, "should be suspicious")
 			},
 		},
 	}

 	for _, tc := range cases {
 		t.Run(tc.name, func(t *testing.T) {
-			a := &malwareAnalyzer{config: tc.config}
-			err := a.applyMalwareDecision(tc.pkg)
+			a, err := NewMalwareAnalyzer(tc.config)
+			assert.NoError(t, err)
+
+			err = a.applyMalwareDecision(tc.pkg)
 			assert.NoError(t, err)
 			tc.assert(tc.pkg)
 		})
--- a/scan.go
+++ b/scan.go
@ -25,57 +25,58 @@ import (
 )

 var (
-	manifests                      []string
-	manifestType                   string
-	lockfiles                      []string
-	lockfileAs                     string
-	enrich                         bool
-	enrichUsingInsightsV2          bool
-	enrichMalware                  bool
-	baseDirectory                  string
-	purlSpec                       string
-	vsxReader                      bool
-	vsxDirectories                 []string
-	githubRepoUrls                 []string
-	githubOrgUrl                   string
-	githubOrgMaxRepositories       int
-	githubSkipDependencyGraphAPI   bool
-	scanExclude                    []string
-	transitiveAnalysis             bool
-	transitiveDepth                int
-	dependencyUsageEvidence        bool
-	codeAnalysisDBPath             string
-	concurrency                    int
-	dumpJsonManifestDir            string
-	celFilterExpression            string
-	celFilterSuiteFile             string
-	celFilterFailOnMatch           bool
-	markdownReportPath             string
-	markdownSummaryReportPath      string
-	jsonReportPath                 string
-	consoleReport                  bool
-	summaryReport                  bool
-	summaryReportMaxAdvice         int
-	summaryReportGroupByDirectDeps bool
-	summaryReportUsedOnly          bool
-	csvReportPath                  string
-	reportDefectDojo               bool
-	defectDojoHostUrl              string
-	defectDojoProductID            int
-	sarifReportPath                string
-	silentScan                     bool
-	disableAuthVerifyBeforeScan    bool
-	syncReport                     bool
-	syncReportProject              string
-	syncEnableMultiProject         bool
-	graphReportDirectory           string
-	syncReportStream               string
-	listExperimentalParsers        bool
-	failFast                       bool
-	trustedRegistryUrls            []string
-	scannerExperimental            bool
-	malwareAnalyzerTrustToolResult bool
-	malwareAnalysisTimeout         time.Duration
+	manifests                        []string
+	manifestType                     string
+	lockfiles                        []string
+	lockfileAs                       string
+	enrich                           bool
+	enrichUsingInsightsV2            bool
+	enrichMalware                    bool
+	baseDirectory                    string
+	purlSpec                         string
+	vsxReader                        bool
+	vsxDirectories                   []string
+	githubRepoUrls                   []string
+	githubOrgUrl                     string
+	githubOrgMaxRepositories         int
+	githubSkipDependencyGraphAPI     bool
+	scanExclude                      []string
+	transitiveAnalysis               bool
+	transitiveDepth                  int
+	dependencyUsageEvidence          bool
+	codeAnalysisDBPath               string
+	concurrency                      int
+	dumpJsonManifestDir              string
+	celFilterExpression              string
+	celFilterSuiteFile               string
+	celFilterFailOnMatch             bool
+	markdownReportPath               string
+	markdownSummaryReportPath        string
+	jsonReportPath                   string
+	consoleReport                    bool
+	summaryReport                    bool
+	summaryReportMaxAdvice           int
+	summaryReportGroupByDirectDeps   bool
+	summaryReportUsedOnly            bool
+	csvReportPath                    string
+	reportDefectDojo                 bool
+	defectDojoHostUrl                string
+	defectDojoProductID              int
+	sarifReportPath                  string
+	silentScan                       bool
+	disableAuthVerifyBeforeScan      bool
+	syncReport                       bool
+	syncReportProject                string
+	syncEnableMultiProject           bool
+	graphReportDirectory             string
+	syncReportStream                 string
+	listExperimentalParsers          bool
+	failFast                         bool
+	trustedRegistryUrls              []string
+	scannerExperimental              bool
+	malwareAnalyzerTrustToolResult   bool
+	malwareAnalysisTimeout           time.Duration
+	malwareAnalysisMinimumConfidence string
 )

 func newScanCommand() *cobra.Command {
@ -189,6 +190,8 @@ func newScanCommand() *cobra.Command {
 		"Trust malicious package analysis tool result without verification record")
 	cmd.Flags().DurationVarP(&malwareAnalysisTimeout, "malware-analysis-timeout", "", 5*time.Minute,
 		"Timeout for malicious package analysis")
+	cmd.Flags().StringVarP(&malwareAnalysisMinimumConfidence, "malware-analysis-min-confidence", "", "HIGH",
+		"Minimum confidence level for malicious package analysis result to fail fast")

 	// Add validations that should trigger a fail fast condition
 	cmd.PreRun = func(cmd *cobra.Command, args []string) {
@ -381,6 +384,7 @@ func internalStartScan() error {
 	if enrichMalware {
 		config := analyzer.DefaultMalwareAnalyzerConfig()
 		config.TrustAutomatedAnalysis = malwareAnalyzerTrustToolResult
+		config.MinimumConfidence = malwareAnalysisMinimumConfidence
 		config.FailFast = failFast

 		task, err := analyzer.NewMalwareAnalyzer(config)