feat: Add data structure to build dependency graph

feat: Add support for DG JSON serialization

test: Add test case for dependency graph structure

test: Add test case for dependency graph structure (GetDependents)

test: Add test case for dependency graph structure (PathToRoot, JSON)

refactor: Use factory to initialize package manifest with dependency graph
This commit is contained in:
abhisek 2023-12-26 08:47:16 +05:30
parent f6e055d5c5
commit be81848cc0
No known key found for this signature in database
GPG Key ID: CB92A4990C02A88F
6 changed files with 308 additions and 9 deletions

View File

@ -47,11 +47,11 @@ func (j *jsonDumperAnalyzer) Analyze(manifest *models.PackageManifest,
return fmt.Errorf("Failed to JSON serialize manifest: %w", err)
}
random := rand.NewSource(time.Now().UnixNano())
randomSource := rand.NewSource(time.Now().UnixNano())
path := filepath.Join(j.dir, fmt.Sprintf("%s-%s--%d-dump.json",
manifest.Ecosystem,
filepath.Base(manifest.Path),
random.Int63()))
randomSource.Int63()))
return os.WriteFile(path, data, 0600)
}

158
pkg/models/graph.go Normal file
View File

@ -0,0 +1,158 @@
package models
import "encoding/json"
// We are using generics here to make the graph implementation
// not too coupled with our model types
type DependencyGraphNodeType interface {
Id() string
}
// DependencyGraphNode represents a node in the dependency graph. It must be
// serializable to JSON
type DependencyGraphNode[T DependencyGraphNodeType] struct {
Data T `json:"data"`
Children []T `json:"children"`
}
// Directed Acyclic Graph (DAG) representation of the package manifest
type DependencyGraph[T DependencyGraphNodeType] struct {
present bool
nodes map[string]*DependencyGraphNode[T]
}
func NewDependencyGraph[T DependencyGraphNodeType]() *DependencyGraph[T] {
return &DependencyGraph[T]{
present: false,
nodes: make(map[string]*DependencyGraphNode[T]),
}
}
// Present returns true if the dependency graph is present
func (dg *DependencyGraph[T]) Present() bool {
return dg.present
}
// Clear clears the dependency graph
func (dg *DependencyGraph[T]) Clear() {
dg.present = false
dg.nodes = make(map[string]*DependencyGraphNode[T])
}
// Set present flag for the dependency graph
// This is useful when we want to indicate that the graph is present
// because we are building it as an enhancement over our existing list of packages
func (dg *DependencyGraph[T]) SetPresent(present bool) {
dg.present = present
}
// AddDependency adds a dependency from one package to another
// Add an edge from [from] to [to]
func (dg *DependencyGraph[T]) AddDependency(from, to T) {
if _, ok := dg.nodes[from.Id()]; !ok {
dg.nodes[from.Id()] = &DependencyGraphNode[T]{Data: from, Children: []T{}}
}
if _, ok := dg.nodes[to.Id()]; !ok {
dg.nodes[to.Id()] = &DependencyGraphNode[T]{Data: to, Children: []T{}}
}
dg.nodes[from.Id()].Children = append(dg.nodes[from.Id()].Children, dg.nodes[to.Id()].Data)
}
// GetDependencies returns the list of dependencies for the given package
// Outgoing edges
func (dg *DependencyGraph[T]) GetDependencies(pkg T) []T {
if _, ok := dg.nodes[pkg.Id()]; !ok {
return []T{}
}
return dg.nodes[pkg.Id()].Children
}
// GetDependents returns the list of dependents for the given package
// Incoming edges
func (dg *DependencyGraph[T]) GetDependents(pkg T) []T {
if _, ok := dg.nodes[pkg.Id()]; !ok {
return []T{}
}
dependents := []T{}
for _, node := range dg.nodes {
for _, child := range node.Children {
if child.Id() == pkg.Id() {
dependents = append(dependents, node.Data)
}
}
}
return dependents
}
// GetNodes returns the list of nodes in the graph
// This is useful when enumerating all packages
func (dg *DependencyGraph[T]) GetNodes() []T {
var nodes []T
for _, node := range dg.nodes {
nodes = append(nodes, node.Data)
}
return nodes
}
// Alias for GetNodes
func (dg *DependencyGraph[T]) GetPackages() []T {
return dg.GetNodes()
}
// PathToRoot returns the path from the given package to the root
// It uses a simple DFS algorithm to find the path. In future, it is likely
// that we will use a more efficient algorithm like a weighted traversal which
// is more relevant here because we want to update minimum number of root packages
func (dg *DependencyGraph[T]) PathToRoot(pkg T) []T {
var path []T
for _, node := range dg.nodes {
if node.Data.Id() == pkg.Id() {
path = append(path, node.Data)
break
}
}
for len(path) > 0 {
node := path[len(path)-1]
dependents := dg.GetDependents(node)
if len(dependents) == 0 {
break
}
path = append(path, dependents[0])
}
return path
}
func (dg *DependencyGraph[T]) MarshalJSON() ([]byte, error) {
return json.Marshal(&struct {
Present bool `json:"present"`
Nodes map[string]*DependencyGraphNode[T] `json:"nodes"`
}{
dg.present,
dg.nodes,
})
}
func (dg *DependencyGraph[T]) UnmarshalJSON(b []byte) error {
var data struct {
Present bool `json:"present"`
Nodes map[string]*DependencyGraphNode[T] `json:"nodes"`
}
if err := json.Unmarshal(b, &data); err != nil {
return err
}
dg.present = data.Present
dg.nodes = data.Nodes
return nil
}

114
pkg/models/graph_test.go Normal file
View File

@ -0,0 +1,114 @@
package models
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
)
type dgTestNode struct {
Name string `json:"Name"`
}
func (n *dgTestNode) Id() string {
return n.Name
}
func dependencyGraphAddTestData(dg *DependencyGraph[*dgTestNode]) {
dg.AddDependency(&dgTestNode{Name: "a"}, &dgTestNode{Name: "b"})
dg.AddDependency(&dgTestNode{Name: "a"}, &dgTestNode{Name: "c"})
dg.AddDependency(&dgTestNode{Name: "b"}, &dgTestNode{Name: "c"})
dg.AddDependency(&dgTestNode{Name: "c"}, &dgTestNode{Name: "d"})
}
func TestDependencyGraphIsPresent(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
assert.False(t, dg.Present())
dg.SetPresent(true)
assert.True(t, dg.Present())
}
func TestDependencyGraphGetDependencies(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
dependencyGraphAddTestData(dg)
assert.Equal(t, []*dgTestNode{{Name: "b"}, {Name: "c"}}, dg.GetDependencies(&dgTestNode{Name: "a"}))
assert.Equal(t, []*dgTestNode{{Name: "c"}}, dg.GetDependencies(&dgTestNode{Name: "b"}))
assert.Equal(t, []*dgTestNode{{Name: "d"}}, dg.GetDependencies(&dgTestNode{Name: "c"}))
assert.Equal(t, []*dgTestNode{}, dg.GetDependencies(&dgTestNode{Name: "d"}))
}
func TestDependencyGraphGetDependents(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
dependencyGraphAddTestData(dg)
assert.Equal(t, []*dgTestNode{}, dg.GetDependents(&dgTestNode{Name: "a"}))
assert.Equal(t, []*dgTestNode{{Name: "a"}}, dg.GetDependents(&dgTestNode{Name: "b"}))
assert.Equal(t, []*dgTestNode{{Name: "a"}, {Name: "b"}}, dg.GetDependents(&dgTestNode{Name: "c"}))
assert.Equal(t, []*dgTestNode{{Name: "c"}}, dg.GetDependents(&dgTestNode{Name: "d"}))
}
func TestDependencyGraphGetNodes(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
dependencyGraphAddTestData(dg)
nodes := dg.GetNodes()
assert.Contains(t, nodes, &dgTestNode{Name: "a"})
assert.Contains(t, nodes, &dgTestNode{Name: "b"})
assert.Contains(t, nodes, &dgTestNode{Name: "c"})
assert.Contains(t, nodes, &dgTestNode{Name: "d"})
}
func TestDependencyGraphPathToRoot(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
dependencyGraphAddTestData(dg)
assert.Equal(t,
[]*dgTestNode{
{Name: "d"},
{Name: "c"},
{Name: "a"},
}, dg.PathToRoot(&dgTestNode{Name: "d"}))
}
func TestDependencyGraphMarshalJSON(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
dependencyGraphAddTestData(dg)
dg.SetPresent(true)
json, err := json.Marshal(dg)
assert.Nil(t, err)
assert.Equal(t, "{\"present\":true,\"nodes\":{\"a\":{\"data\":{\"Name\":\"a\"},\"children\":[{\"Name\":\"b\"},{\"Name\":\"c\"}]},\"b\":{\"data\":{\"Name\":\"b\"},\"children\":[{\"Name\":\"c\"}]},\"c\":{\"data\":{\"Name\":\"c\"},\"children\":[{\"Name\":\"d\"}]},\"d\":{\"data\":{\"Name\":\"d\"},\"children\":[]}}}", string(json))
}
func TestDependencyGraphUnmarshalJSON(t *testing.T) {
dg := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg)
dependencyGraphAddTestData(dg)
dg.SetPresent(true)
data, err := json.Marshal(dg)
assert.Nil(t, err)
dg2 := NewDependencyGraph[*dgTestNode]()
assert.NotNil(t, dg2)
err = json.Unmarshal(data, dg2)
assert.Nil(t, err)
assert.Equal(t, dg, dg2)
}

View File

@ -45,10 +45,22 @@ type PackageManifest struct {
// List of packages obtained by parsing the manifest
Packages []*Package `json:"packages"`
// The package depeneny graph representation
DependencyGraph *DependencyGraph[*Package] `json:"dependency_graph"`
// Lock to serialize updating packages
m sync.Mutex
}
func NewPackageManifest(path, ecosystem string) *PackageManifest {
return &PackageManifest{
Path: path,
Ecosystem: ecosystem,
Packages: make([]*Package, 0),
DependencyGraph: NewDependencyGraph[*Package](),
}
}
func (pm *PackageManifest) AddPackage(pkg *Package) {
pm.m.Lock()
defer pm.m.Unlock()
@ -74,6 +86,17 @@ func (pm *PackageManifest) GetDisplayPath() string {
return pm.GetPath()
}
// GetPackages returns the list of packages in this manifest
// It uses the DependencyGraph to get the list of packages if available
// else fallsback to the [Packages] field
func (pm *PackageManifest) GetPackages() []*Package {
if pm.DependencyGraph != nil && pm.DependencyGraph.Present() {
return pm.DependencyGraph.GetNodes()
}
return pm.Packages
}
func (pm *PackageManifest) Id() string {
return hashedId(fmt.Sprintf("%s/%s",
pm.Ecosystem, pm.Path))
@ -132,6 +155,9 @@ type Package struct {
Manifest *PackageManifest `json:"-"`
}
// Id returns a unique identifier for this package within a manifest
// It is used to identify a package in the dependency graph
// It should be reproducible across multiple runs
func (p *Package) Id() string {
return hashedId(fmt.Sprintf("%s/%s/%s",
strings.ToLower(string(p.PackageDetails.Ecosystem)),

View File

@ -153,22 +153,20 @@ func (pw *parserWrapper) Ecosystem() string {
}
func (pw *parserWrapper) Parse(lockfilePath string) (*models.PackageManifest, error) {
pm := models.PackageManifest{Path: lockfilePath,
Ecosystem: pw.Ecosystem()}
logger.Infof("[%s] Parsing %s", pw.parseAs, lockfilePath)
pm := models.NewPackageManifest(lockfilePath, pw.Ecosystem())
packages, err := pw.parser(lockfilePath)
if err != nil {
return &pm, err
return pm, err
}
for _, pkg := range packages {
pm.AddPackage(&models.Package{
PackageDetails: pkg,
Manifest: &pm,
Manifest: pm,
})
}
return &pm, nil
return pm, nil
}

View File

@ -59,7 +59,10 @@ func (p *jsonDumpReader) EnumManifests(handler func(*models.PackageManifest,
return err
}
var manifest models.PackageManifest
manifest := models.PackageManifest{
DependencyGraph: models.NewDependencyGraph[*models.Package](),
}
err = json.Unmarshal(data, &manifest)
if err != nil {
return err