Refactor cel filter analyzer to extract CEL evaluator

This commit is contained in:
abhisek 2023-02-17 16:44:23 +05:30
parent 032d0770c7
commit d4884c0457
No known key found for this signature in database
GPG Key ID: CB92A4990C02A88F
6 changed files with 347 additions and 190 deletions

View File

@ -79,8 +79,8 @@ vet scan --lockfiles /path/to/pom.xml --report-summary=false \
> Use filtering along with `query` command for offline slicing and dicing of
> enriched package manifests. Read [filtering guide](docs/filtering.md)
[Common Expressions Language](https://github.com/google/cel-spec) is used to
evaluate filters on packages. Learn more about [filtering with vet](docs/filtering.md).
Learn more about [filtering with vet](docs/filtering.md).
Look at [filter input spec](api/filter_input_spec.proto) on attributes
available to the filter expression.

View File

@ -12,6 +12,8 @@ vet scan -D /path/to/repo \
--filter 'licenses.exists(p, p == "MIT")'
```
The scan will list only packages that use the `MIT` license.
## Input
Filter expressions work on packages (aka. dependencies) and evaluates to
@ -69,7 +71,7 @@ vet query --from /tmp/dump --report-summary
vet query --from /tmp/dump --filter 'scorecard.score.Maintained == 0'
```
## Gating with Filters
## Security Gating with Filters
A simple security gate (in CI) can be achieved using the filters. The
`--filter-fail` argument tells the `Filter Analyzer` module to fail the command
@ -90,6 +92,62 @@ Subsequently, the command fails with `-1` exit code in case of match
255
```
## Filter Suite
A single filter is useful for identification of packages that meet some
specific criteria. While it helps solve various use-cases, it is not entirely
suitable for `security gating` where multiple filters may be required to
express an organization's acceptable OSS usage policy.
For example, an organization may define a filter to deny certain type of
packages:
1. Any package that has a high or critical vulnerability
2. Any package that does not match acceptable OSS licenses
3. Any package that has a low [OpenSSF scorecard score](https://github.com/ossf/scorecard)
To express this policy, multiple filters are needed such as:
```
vulns.critical.exists(p, true) ||
licenses.exists(p,
(p != "MIT") && (p != "Apache-2.0")
) ||
(scorecard.scores.Maintained == 0)
```
To solve this problem, we introduce the concept of `Filter Suite`. It can be
represented as an YAML file containing multiple filters to match:
```yaml
name: Generic Filter Suite
description: Example filter suite with canned filters
filters:
- name: critical-vuln
value: |
vulns.critical.exists(p, true)
- name: safe-licenses
value: |
licenses.exists(p, (p != "MIT") && (p != "Apache-2.0"))
- name: ossf-maintained
value: |
scorecard.scores.Maintained == 0
```
A scan or query operation can be invoked using the filter suite:
```bash
vet scan -D /path/to/repo --filter-suite /path/to/filters.yml --filter-fail
```
The filter suite will be evaluated as:
* Ordered list of filters as given in the suite file
* Stop on first rule match for a given package
* Stop on first evaluation error for a given package
## FAQ
### How does the filter input JSON look like?

View File

@ -1,34 +1,19 @@
package analyzer
import (
"encoding/json"
"fmt"
"os"
"reflect"
"strings"
"github.com/golang/protobuf/jsonpb"
"github.com/google/cel-go/cel"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/jedib0t/go-pretty/v6/text"
"github.com/safedep/dry/utils"
"github.com/safedep/vet/gen/filterinput"
"github.com/safedep/vet/gen/insightapi"
"github.com/safedep/vet/pkg/analyzer/filter"
"github.com/safedep/vet/pkg/common/logger"
"github.com/safedep/vet/pkg/models"
)
const (
filterInputVarRoot = "_"
filterInputVarPkg = "pkg"
filterInputVarVulns = "vulns"
filterInputVarScorecard = "scorecard"
filterInputVarProjects = "projects"
filterInputVarLicenses = "licenses"
)
type celFilterAnalyzer struct {
program cel.Program
evaluator filter.Evaluator
failOnMatch bool
packages map[string]*models.Package
@ -41,31 +26,19 @@ type celFilterAnalyzer struct {
}
}
func NewCelFilterAnalyzer(filter string, failOnMatch bool) (Analyzer, error) {
env, err := cel.NewEnv(
cel.Variable(filterInputVarPkg, cel.DynType),
cel.Variable(filterInputVarVulns, cel.DynType),
cel.Variable(filterInputVarProjects, cel.DynType),
cel.Variable(filterInputVarScorecard, cel.DynType),
cel.Variable(filterInputVarLicenses, cel.DynType),
cel.Variable(filterInputVarRoot, cel.DynType),
)
func NewCelFilterAnalyzer(fl string, failOnMatch bool) (Analyzer, error) {
evaluator, err := filter.NewEvaluator("single-filter")
if err != nil {
return nil, err
}
ast, issues := env.Compile(filter)
if issues != nil && issues.Err() != nil {
return nil, issues.Err()
}
prog, err := env.Program(ast)
err = evaluator.AddFilter("single-filter", fl)
if err != nil {
return nil, err
}
return &celFilterAnalyzer{program: prog,
return &celFilterAnalyzer{
evaluator: evaluator,
failOnMatch: failOnMatch,
packages: make(map[string]*models.Package),
}, nil
@ -84,40 +57,16 @@ func (f *celFilterAnalyzer) Analyze(manifest *models.PackageManifest,
for _, pkg := range manifest.Packages {
f.stat.packages += 1
filterInput, err := f.buildFilterInput(pkg)
res, err := f.evaluator.EvalPackage(pkg)
if err != nil {
f.stat.err += 1
logger.Errorf("Failed to convert package to filter input: %v", err)
continue
}
serializedInput, err := f.serializeFilterInput(filterInput)
if err != nil {
f.stat.err += 1
logger.Errorf("Failed to serialize filter input: %v", err)
continue
}
out, _, err := f.program.Eval(map[string]interface{}{
filterInputVarRoot: serializedInput,
filterInputVarPkg: serializedInput["pkg"],
filterInputVarProjects: serializedInput["projects"],
filterInputVarVulns: serializedInput["vulns"],
filterInputVarScorecard: serializedInput["scorecard"],
filterInputVarLicenses: serializedInput["licenses"],
})
if err != nil {
f.stat.err += 1
logger.Errorf("Failed to evaluate CEL for %s:%v : %v",
logger.Errorf("Failed to evaluate CEL for %s:%s : %v",
pkg.PackageDetails.Name,
pkg.PackageDetails.Version, err)
continue
}
if (reflect.TypeOf(out).Kind() == reflect.Bool) &&
(reflect.ValueOf(out).Bool()) {
if res.Matched() {
// Avoid duplicates added to the table
if _, ok := f.packages[pkg.Id()]; ok {
continue
@ -170,27 +119,6 @@ func (f *celFilterAnalyzer) notifyCaller(manifest *models.PackageManifest,
return nil
}
// TODO: Fix this JSON round-trip problem by directly configuring CEL env to
// work with Protobuf messages
func (f *celFilterAnalyzer) serializeFilterInput(fi *filterinput.FilterInput) (map[string]interface{}, error) {
var ret map[string]interface{}
m := jsonpb.Marshaler{OrigName: true, EnumsAsInts: false, EmitDefaults: true}
data, err := m.MarshalToString(fi)
if err != nil {
return ret, err
}
logger.Debugf("Serialized filter input: %s", data)
err = json.Unmarshal([]byte(data), &ret)
if err != nil {
return ret, err
}
return ret, nil
}
func (f *celFilterAnalyzer) pkgLatestVersion(pkg *models.Package) string {
insight := utils.SafelyGetValue(pkg.Insights)
return utils.SafelyGetValue(insight.PackageCurrentVersion)
@ -206,108 +134,3 @@ func (f *celFilterAnalyzer) pkgSource(pkg *models.Package) string {
return ""
}
func (f *celFilterAnalyzer) buildFilterInput(pkg *models.Package) (*filterinput.FilterInput, error) {
fi := filterinput.FilterInput{
Pkg: &filterinput.PackageVersion{
Ecosystem: strings.ToLower(string(pkg.PackageDetails.Ecosystem)),
Name: pkg.PackageDetails.Name,
Version: pkg.PackageDetails.Version,
},
Projects: []*filterinput.ProjectInfo{},
Vulns: &filterinput.Vulnerabilities{
All: []*filterinput.Vulnerability{},
Critical: []*filterinput.Vulnerability{},
High: []*filterinput.Vulnerability{},
Medium: []*filterinput.Vulnerability{},
Low: []*filterinput.Vulnerability{},
},
Scorecard: &filterinput.Scorecard{
Scores: map[string]float32{},
},
Licenses: []string{},
}
// Safely get insight
insight := utils.SafelyGetValue(pkg.Insights)
// Add projects
projectTypeMapper := func(tp string) filterinput.ProjectType {
tp = strings.ToLower(tp)
if tp == "github" {
return filterinput.ProjectType_GITHUB
} else {
return filterinput.ProjectType_UNKNOWN
}
}
for _, project := range utils.SafelyGetValue(insight.Projects) {
fi.Projects = append(fi.Projects, &filterinput.ProjectInfo{
Name: utils.SafelyGetValue(project.Name),
Stars: int32(utils.SafelyGetValue(project.Stars)),
Forks: int32(utils.SafelyGetValue(project.Forks)),
Issues: int32(utils.SafelyGetValue(project.Issues)),
Type: projectTypeMapper(utils.SafelyGetValue(project.Type)),
})
}
// Add vulnerabilities
cveFilter := func(aliases []string) string {
for _, alias := range aliases {
if strings.HasPrefix(strings.ToUpper(alias), "CVE-") {
return alias
}
}
return ""
}
for _, vuln := range utils.SafelyGetValue(insight.Vulnerabilities) {
fiv := filterinput.Vulnerability{
Id: utils.SafelyGetValue(vuln.Id),
Cve: cveFilter(utils.SafelyGetValue(vuln.Aliases)),
}
fi.Vulns.All = append(fi.Vulns.All, &fiv)
risk := insightapi.PackageVulnerabilitySeveritiesRiskUNKNOWN
for _, s := range utils.SafelyGetValue(vuln.Severities) {
sType := utils.SafelyGetValue(s.Type)
if (sType == insightapi.PackageVulnerabilitySeveritiesTypeCVSSV3) ||
(sType == insightapi.PackageVulnerabilitySeveritiesTypeCVSSV2) {
risk = utils.SafelyGetValue(s.Risk)
break
}
}
switch risk {
case insightapi.PackageVulnerabilitySeveritiesRiskCRITICAL:
fi.Vulns.Critical = append(fi.Vulns.Critical, &fiv)
break
case insightapi.PackageVulnerabilitySeveritiesRiskHIGH:
fi.Vulns.High = append(fi.Vulns.High, &fiv)
break
case insightapi.PackageVulnerabilitySeveritiesRiskMEDIUM:
fi.Vulns.Medium = append(fi.Vulns.Medium, &fiv)
break
case insightapi.PackageVulnerabilitySeveritiesRiskLOW:
fi.Vulns.Low = append(fi.Vulns.Low, &fiv)
break
}
}
// Add licenses
for _, lic := range utils.SafelyGetValue(insight.Licenses) {
fi.Licenses = append(fi.Licenses, string(lic))
}
// Scorecard
scorecard := utils.SafelyGetValue(insight.Scorecard)
checks := utils.SafelyGetValue(utils.SafelyGetValue(scorecard.Content).Checks)
for _, check := range checks {
fi.Scorecard.Scores[string(utils.SafelyGetValue(check.Name))] =
utils.SafelyGetValue(check.Score)
}
return &fi, nil
}

254
pkg/analyzer/filter/eval.go Normal file
View File

@ -0,0 +1,254 @@
package filter
import (
"encoding/json"
"errors"
"reflect"
"strings"
"github.com/golang/protobuf/jsonpb"
"github.com/google/cel-go/cel"
"github.com/safedep/dry/utils"
"github.com/safedep/vet/gen/filterinput"
"github.com/safedep/vet/gen/insightapi"
"github.com/safedep/vet/pkg/common/logger"
"github.com/safedep/vet/pkg/models"
)
const (
filterInputVarRoot = "_"
filterInputVarPkg = "pkg"
filterInputVarVulns = "vulns"
filterInputVarScorecard = "scorecard"
filterInputVarProjects = "projects"
filterInputVarLicenses = "licenses"
// Soft limit to start with
filterEvalMaxFilters = 50
)
var (
errMaxFilter = errors.New("max filter limit has reached")
)
type Evaluator interface {
AddFilter(name, filter string) error
EvalPackage(pkg *models.Package) (*filterEvaluationResult, error)
}
type filterEvaluator struct {
name string
env *cel.Env
programs []*filterProgram
}
func NewEvaluator(name string) (Evaluator, error) {
env, err := cel.NewEnv(
cel.Variable(filterInputVarPkg, cel.DynType),
cel.Variable(filterInputVarVulns, cel.DynType),
cel.Variable(filterInputVarProjects, cel.DynType),
cel.Variable(filterInputVarScorecard, cel.DynType),
cel.Variable(filterInputVarLicenses, cel.DynType),
cel.Variable(filterInputVarRoot, cel.DynType),
)
if err != nil {
return nil, err
}
return &filterEvaluator{
name: name,
env: env,
programs: []*filterProgram{},
}, nil
}
func (f *filterEvaluator) AddFilter(name, filter string) error {
if len(f.programs) >= filterEvalMaxFilters {
return errMaxFilter
}
ast, issues := f.env.Compile(filter)
if issues != nil && issues.Err() != nil {
return issues.Err()
}
prog, err := f.env.Program(ast)
if err != nil {
return err
}
f.programs = append(f.programs, &filterProgram{
name: name,
program: prog,
})
return nil
}
func (f *filterEvaluator) EvalPackage(pkg *models.Package) (*filterEvaluationResult, error) {
filterInput, err := f.buildFilterInput(pkg)
if err != nil {
return nil, err
}
serializedInput, err := f.serializeFilterInput(filterInput)
if err != nil {
return nil, err
}
for _, prog := range f.programs {
out, _, err := prog.program.Eval(map[string]interface{}{
filterInputVarRoot: serializedInput,
filterInputVarPkg: serializedInput["pkg"],
filterInputVarProjects: serializedInput["projects"],
filterInputVarVulns: serializedInput["vulns"],
filterInputVarScorecard: serializedInput["scorecard"],
filterInputVarLicenses: serializedInput["licenses"],
})
if err != nil {
return nil, err
}
if (reflect.TypeOf(out).Kind() == reflect.Bool) &&
(reflect.ValueOf(out).Bool()) {
return &filterEvaluationResult{
match: true,
program: prog,
}, nil
}
}
return &filterEvaluationResult{
match: false,
}, nil
}
// TODO: Fix this JSON round-trip problem by directly configuring CEL env to
// work with Protobuf messages
func (f *filterEvaluator) serializeFilterInput(fi *filterinput.FilterInput) (map[string]interface{}, error) {
var ret map[string]interface{}
m := jsonpb.Marshaler{OrigName: true, EnumsAsInts: false, EmitDefaults: true}
data, err := m.MarshalToString(fi)
if err != nil {
return ret, err
}
logger.Debugf("Serialized filter input: %s", data)
err = json.Unmarshal([]byte(data), &ret)
if err != nil {
return ret, err
}
return ret, nil
}
func (f *filterEvaluator) buildFilterInput(pkg *models.Package) (*filterinput.FilterInput, error) {
fi := filterinput.FilterInput{
Pkg: &filterinput.PackageVersion{
Ecosystem: strings.ToLower(string(pkg.PackageDetails.Ecosystem)),
Name: pkg.PackageDetails.Name,
Version: pkg.PackageDetails.Version,
},
Projects: []*filterinput.ProjectInfo{},
Vulns: &filterinput.Vulnerabilities{
All: []*filterinput.Vulnerability{},
Critical: []*filterinput.Vulnerability{},
High: []*filterinput.Vulnerability{},
Medium: []*filterinput.Vulnerability{},
Low: []*filterinput.Vulnerability{},
},
Scorecard: &filterinput.Scorecard{
Scores: map[string]float32{},
},
Licenses: []string{},
}
// Safely get insight
insight := utils.SafelyGetValue(pkg.Insights)
// Add projects
projectTypeMapper := func(tp string) filterinput.ProjectType {
tp = strings.ToLower(tp)
if tp == "github" {
return filterinput.ProjectType_GITHUB
} else {
return filterinput.ProjectType_UNKNOWN
}
}
for _, project := range utils.SafelyGetValue(insight.Projects) {
fi.Projects = append(fi.Projects, &filterinput.ProjectInfo{
Name: utils.SafelyGetValue(project.Name),
Stars: int32(utils.SafelyGetValue(project.Stars)),
Forks: int32(utils.SafelyGetValue(project.Forks)),
Issues: int32(utils.SafelyGetValue(project.Issues)),
Type: projectTypeMapper(utils.SafelyGetValue(project.Type)),
})
}
// Add vulnerabilities
cveFilter := func(aliases []string) string {
for _, alias := range aliases {
if strings.HasPrefix(strings.ToUpper(alias), "CVE-") {
return alias
}
}
return ""
}
for _, vuln := range utils.SafelyGetValue(insight.Vulnerabilities) {
fiv := filterinput.Vulnerability{
Id: utils.SafelyGetValue(vuln.Id),
Cve: cveFilter(utils.SafelyGetValue(vuln.Aliases)),
}
fi.Vulns.All = append(fi.Vulns.All, &fiv)
risk := insightapi.PackageVulnerabilitySeveritiesRiskUNKNOWN
for _, s := range utils.SafelyGetValue(vuln.Severities) {
sType := utils.SafelyGetValue(s.Type)
if (sType == insightapi.PackageVulnerabilitySeveritiesTypeCVSSV3) ||
(sType == insightapi.PackageVulnerabilitySeveritiesTypeCVSSV2) {
risk = utils.SafelyGetValue(s.Risk)
break
}
}
switch risk {
case insightapi.PackageVulnerabilitySeveritiesRiskCRITICAL:
fi.Vulns.Critical = append(fi.Vulns.Critical, &fiv)
break
case insightapi.PackageVulnerabilitySeveritiesRiskHIGH:
fi.Vulns.High = append(fi.Vulns.High, &fiv)
break
case insightapi.PackageVulnerabilitySeveritiesRiskMEDIUM:
fi.Vulns.Medium = append(fi.Vulns.Medium, &fiv)
break
case insightapi.PackageVulnerabilitySeveritiesRiskLOW:
fi.Vulns.Low = append(fi.Vulns.Low, &fiv)
break
}
}
// Add licenses
for _, lic := range utils.SafelyGetValue(insight.Licenses) {
fi.Licenses = append(fi.Licenses, string(lic))
}
// Scorecard
scorecard := utils.SafelyGetValue(insight.Scorecard)
checks := utils.SafelyGetValue(utils.SafelyGetValue(scorecard.Content).Checks)
for _, check := range checks {
fi.Scorecard.Scores[string(utils.SafelyGetValue(check.Name))] =
utils.SafelyGetValue(check.Score)
}
return &fi, nil
}

View File

@ -0,0 +1,12 @@
package filter
import "github.com/google/cel-go/cel"
type filterProgram struct {
name string
program cel.Program
}
func (p *filterProgram) Name() string {
return p.name
}

View File

@ -0,0 +1,10 @@
package filter
type filterEvaluationResult struct {
match bool
program *filterProgram
}
func (r *filterEvaluationResult) Matched() bool {
return r.match
}