vet/docs/sqlite3-reporter-queries.md
Abhisek Datta 5f4cccbc85
feat: Add Support for Agentic Query and Analysis (#535)
* Add initial UI for agent mode

* fix: Cleanup and define agent contract

* Add react agent

* Add interactions memory

* Add support for stdio based MCP integration

* Add basic sqlite3 report generator

* fix: Persist vulnerabilities with package relation

* fix: Persist license information

* refactor: Agents into its own command package

* feat: Add support for tool calling introspection

* refactor: UI to hide implementation detail

* sqlite3 reporter persist dependency graph

* fix: Support multiple LLM provider for agent

* docs: Update agents doc

* docs: Remove deprecated query docs

* fix: UI tests

* fix: Linter issue

* Add support for prompt mode

* Improve UI with animation

* Fix UI tests after update

* Add OpenSSF scorecard persistence

* Add slsa provenances in sqlite3 reporter

* Add test cases for sqlite3 reporter

* Fix agent doc

* fix: Sqlite3 reporter use safe accessors

* feat: Add support for fast model

* feat: Simplify and streamline agent UI for better user experience

- Remove decorative borders and excessive styling to maximize output area
- Implement clean minimal design similar to modern TUI interfaces
- Add bordered input area for clear visual separation
- Move thinking indicator above input area for better visibility
- Enhance input field reset logic for proper line alignment
- Remove verbose help text and status messages
- Optimize layout calculations for full width utilization
- Add smooth animations for agent thinking state with spinner
- Clean up code structure and remove unused progress bar functionality

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* fix: Improve agent status line

* test: Update UI tests

* fix: Use terminal safe rendering

* fix: Fix nil deref without storing empty strings in DB

* fix: Support overwriting sqlite3 database

* fix: Data model to use m2m between manifest and package

* style: Fix linter issue with unused variables

* Misc fixes

* Add test for agent memory

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-07-11 18:37:44 +05:30

34 KiB

SQLite3 Reporter Query Examples

The SQLite3 reporter generates a comprehensive database with package manifest and dependency data that can be queried using standard SQL. Here are some useful example queries:

Basic Queries

List all package manifests

SELECT manifest_id, ecosystem, display_path, created_at 
FROM report_package_manifests 
ORDER BY created_at;

List all packages with their basic information

SELECT name, version, ecosystem, is_direct, is_malware, is_suspicious 
FROM report_packages 
ORDER BY name;

Find all direct dependencies

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_direct = 1
ORDER BY p.name;

Security Analysis Queries

Find all vulnerabilities

SELECT p.name, p.version, v.vulnerability_id, v.title, v.severity, v.cvss_score
FROM report_packages p
JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
ORDER BY v.severity DESC, v.cvss_score DESC;

Find critical and high severity vulnerabilities

SELECT p.name, p.version, v.vulnerability_id, v.title, v.severity, m.display_path
FROM report_packages p
JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE v.severity IN ('CRITICAL', 'HIGH')
ORDER BY v.severity DESC;

Count vulnerabilities by severity

SELECT v.severity, COUNT(*) as vulnerability_count
FROM report_vulnerabilities v
GROUP BY v.severity
ORDER BY vulnerability_count DESC;

Find packages with most vulnerabilities

SELECT p.name, p.version, p.ecosystem, COUNT(v.id) as vuln_count
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
GROUP BY p.id, p.name, p.version, p.ecosystem
HAVING vuln_count > 0
ORDER BY vuln_count DESC;

Find vulnerability aliases

SELECT p.name, p.version, v.vulnerability_id, v.aliases
FROM report_packages p
JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
WHERE v.aliases IS NOT NULL AND v.aliases != '[]';

License Analysis Queries

Find all licenses

SELECT p.name, p.version, l.license_id, l.name
FROM report_packages p
JOIN report_licenses l ON p.id = l.report_package_licenses
ORDER BY l.license_id, p.name;

Count packages by license

SELECT l.license_id, COUNT(DISTINCT p.id) as package_count
FROM report_licenses l
JOIN report_packages p ON l.report_package_licenses = p.id
GROUP BY l.license_id
ORDER BY package_count DESC;

Find packages with specific licenses

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_licenses l ON p.id = l.report_package_licenses
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE l.license_id IN ('MIT', 'Apache-2.0', 'GPL-3.0')
ORDER BY l.license_id, p.name;

License compliance overview by manifest

SELECT 
    m.display_path,
    COUNT(DISTINCT p.id) as total_packages,
    COUNT(DISTINCT l.license_id) as unique_licenses,
    GROUP_CONCAT(DISTINCT l.license_id) as all_licenses
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
LEFT JOIN report_licenses l ON p.id = l.report_package_licenses
GROUP BY m.id, m.display_path
ORDER BY unique_licenses DESC;

Find packages without license information

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
LEFT JOIN report_licenses l ON p.id = l.report_package_licenses
WHERE l.id IS NULL;

Find potentially problematic licenses (copyleft, GPL, etc.)

SELECT p.name, p.version, l.license_id, m.display_path
FROM report_packages p
JOIN report_licenses l ON p.id = l.report_package_licenses
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE l.license_id LIKE '%GPL%' 
   OR l.license_id LIKE '%AGPL%' 
   OR l.license_id LIKE '%LGPL%'
   OR l.license_id LIKE '%Copyleft%'
ORDER BY l.license_id, p.name;

Find packages flagged as malware

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_malware = 1;

Find suspicious packages

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_suspicious = 1;

Get malware analysis details

SELECT p.name, p.version, ma.analysis_id, ma.is_malware, ma.confidence
FROM report_packages p
JOIN report_malwares ma ON p.id = ma.report_package_malware_analysis
WHERE ma.is_malware = 1;

Dependency Analysis Queries

Find packages with dependencies

SELECT p.name, p.version, COUNT(d.id) as dependency_count
FROM report_packages p
LEFT JOIN report_dependencies d ON p.id = d.report_package_dependencies
GROUP BY p.id, p.name, p.version
HAVING dependency_count > 0
ORDER BY dependency_count DESC;

Show dependency relationships

SELECT 
    parent.name as parent_package,
    parent.version as parent_version,
    d.dependency_name,
    d.dependency_version,
    d.dependency_ecosystem
FROM report_packages parent
JOIN report_dependencies d ON parent.id = d.report_package_dependencies
ORDER BY parent.name, d.dependency_name;

Ecosystem Analysis Queries

Count packages by ecosystem

SELECT ecosystem, COUNT(*) as package_count
FROM report_packages
GROUP BY ecosystem
ORDER BY package_count DESC;

Find npm packages only

SELECT name, version, is_direct
FROM report_packages
WHERE ecosystem = 'npm'
ORDER BY name;

Cross-Manifest Analysis Queries

Find packages used across multiple manifests

SELECT p.name, p.version, p.ecosystem, COUNT(DISTINCT mp.report_package_manifest_id) as manifest_count
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
GROUP BY p.name, p.version, p.ecosystem
HAVING manifest_count > 1
ORDER BY manifest_count DESC;

Compare direct dependencies across manifests

SELECT 
    m.display_path,
    p.name,
    p.version,
    p.ecosystem
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_direct = 1
ORDER BY m.display_path, p.name;

OSS Project and OpenSSF Scorecard Queries

Find packages with project information

SELECT p.name, p.version, proj.name as project_name, proj.url, proj.stars, proj.forks
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
ORDER BY proj.stars DESC;

Find packages with low scorecard scores

SELECT p.name, p.version, s.score, s.scorecard_version, proj.name as project_name
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
WHERE s.score < 5.0
ORDER BY s.score ASC;

Find packages failing specific scorecard checks

SELECT p.name, p.version, c.name as check_name, c.score, c.reason, proj.name as project_name
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects  
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks
WHERE c.name = 'Maintained' AND c.score < 5
ORDER BY c.score ASC;

OpenSSF scorecard check summary

SELECT 
    c.name as check_name,
    AVG(c.score) as avg_score,
    MIN(c.score) as min_score,
    MAX(c.score) as max_score,
    COUNT(*) as package_count
FROM report_scorecard_checks c
GROUP BY c.name
ORDER BY avg_score ASC;
SELECT DISTINCT p.name, p.version, c.name as failed_check, c.score, c.reason
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects  
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks
WHERE c.name IN ('Security-Policy', 'Vulnerabilities', 'Token-Permissions', 'Dangerous-Workflow')
  AND c.score < 5
ORDER BY c.score ASC;
SELECT 
    p.name, 
    p.version, 
    proj.name as project_name,
    proj.stars,
    s.score as scorecard_score,
    COUNT(c.id) as failed_checks
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
LEFT JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks AND c.score < 5
WHERE proj.stars > 1000 AND s.score < 6
GROUP BY p.id, proj.id, s.id
ORDER BY proj.stars DESC;

Scorecard overview by ecosystem

SELECT 
    p.ecosystem,
    COUNT(DISTINCT p.id) as packages_with_scorecard,
    AVG(s.score) as avg_scorecard_score,
    MIN(s.score) as min_score,
    MAX(s.score) as max_score,
    AVG(proj.stars) as avg_stars
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
GROUP BY p.ecosystem
ORDER BY avg_scorecard_score DESC;

Find packages with both vulnerabilities and poor scorecard scores

SELECT 
    p.name, 
    p.version,
    s.score as scorecard_score,
    COUNT(v.id) as vulnerability_count,
    SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
    proj.name as project_name,
    proj.stars
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
WHERE s.score < 6
GROUP BY p.id, proj.id, s.id
HAVING vulnerability_count > 0
ORDER BY critical_vulns DESC, vulnerability_count DESC;

Detailed scorecard check analysis

SELECT 
    p.name,
    p.version,
    proj.name as project_name,
    s.score as overall_score,
    c.name as check_name,
    c.score as check_score,
    c.reason
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks
WHERE p.name = 'example-package'
ORDER BY c.score ASC;

Find projects with best security practices

SELECT 
    p.name,
    p.version,
    proj.name as project_name,
    proj.url,
    s.score as scorecard_score,
    proj.stars,
    proj.forks
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
WHERE s.score >= 8.0
ORDER BY s.score DESC, proj.stars DESC;

SLSA Provenance Queries

Find packages WITH SLSA provenance

SELECT p.name, p.version, p.ecosystem, COUNT(slsa.id) as provenance_count
FROM report_packages p
JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
GROUP BY p.id, p.name, p.version, p.ecosystem
ORDER BY provenance_count DESC;

Find packages WITHOUT SLSA provenance (Missing Supply Chain Security)

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE slsa.id IS NULL
ORDER BY p.name;

Find packages with verified SLSA provenance

SELECT p.name, p.version, slsa.source_repository, slsa.commit_sha, slsa.url
FROM report_packages p
JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE slsa.verified = true
ORDER BY p.name;

Find packages with unverified SLSA provenance (Security Risk)

SELECT p.name, p.version, slsa.source_repository, slsa.commit_sha, slsa.verified
FROM report_packages p
JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE slsa.verified = false
ORDER BY p.name;

SLSA provenance summary by ecosystem

SELECT 
    p.ecosystem,
    COUNT(DISTINCT p.id) as total_packages,
    COUNT(DISTINCT CASE WHEN slsa.id IS NOT NULL THEN p.id END) as packages_with_provenance,
    COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) as packages_with_verified_provenance,
    ROUND(
        (COUNT(DISTINCT CASE WHEN slsa.id IS NOT NULL THEN p.id END) * 100.0) / COUNT(DISTINCT p.id), 2
    ) as provenance_coverage_percent,
    ROUND(
        (COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) * 100.0) / COUNT(DISTINCT p.id), 2
    ) as verified_provenance_percent
FROM report_packages p
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
GROUP BY p.ecosystem
ORDER BY provenance_coverage_percent DESC;

Find direct dependencies without SLSA provenance (High Priority Security Risk)

SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE p.is_direct = 1 AND slsa.id IS NULL
ORDER BY p.ecosystem, p.name;

SLSA provenance source repository analysis

SELECT 
    slsa.source_repository,
    COUNT(DISTINCT p.id) as package_count,
    COUNT(CASE WHEN slsa.verified = true THEN 1 END) as verified_count,
    COUNT(CASE WHEN slsa.verified = false THEN 1 END) as unverified_count,
    GROUP_CONCAT(DISTINCT p.name) as packages
FROM report_slsa_provenances slsa
JOIN report_packages p ON slsa.report_package_slsa_provenances = p.id
GROUP BY slsa.source_repository
ORDER BY package_count DESC;

Combined security analysis: Vulnerabilities + Missing SLSA Provenance

SELECT 
    p.name,
    p.version,
    p.ecosystem,
    COUNT(v.id) as vulnerability_count,
    CASE WHEN slsa.id IS NULL THEN 'Missing' ELSE 'Present' END as slsa_provenance,
    CASE WHEN slsa.verified = true THEN 'Verified' 
         WHEN slsa.verified = false THEN 'Unverified' 
         ELSE 'Missing' END as slsa_status,
    SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
GROUP BY p.id, p.name, p.version, p.ecosystem, slsa.id, slsa.verified
HAVING vulnerability_count > 0 OR slsa_provenance = 'Missing'
ORDER BY critical_vulns DESC, vulnerability_count DESC;

Supply chain security scorecard

SELECT 
    m.display_path,
    COUNT(DISTINCT p.id) as total_packages,
    COUNT(DISTINCT CASE WHEN slsa.id IS NOT NULL THEN p.id END) as with_slsa_provenance,
    COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) as with_verified_slsa,
    COUNT(DISTINCT CASE WHEN v.id IS NOT NULL THEN p.id END) as with_vulnerabilities,
    COUNT(DISTINCT CASE WHEN p.is_malware = 1 THEN p.id END) as malware_packages,
    ROUND(
        (COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) * 100.0) / COUNT(DISTINCT p.id), 2
    ) as supply_chain_security_score
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
GROUP BY m.id, m.display_path
ORDER BY supply_chain_security_score DESC;

Insights V2 Data Queries

Query packages with Insights V2 data

SELECT name, version, ecosystem, 
       json_extract(insights_v2, '$.deprecated') as is_deprecated
FROM report_packages
WHERE insights_v2 IS NOT NULL;

Extract vulnerability data from Insights V2

SELECT 
    name, 
    version,
    json_extract(insights_v2, '$.vulnerabilities') as vulnerabilities
FROM report_packages
WHERE json_extract(insights_v2, '$.vulnerabilities') IS NOT NULL;

Vulnerability Analysis Queries

Vulnerability summary by manifest

SELECT 
    m.display_path,
    m.ecosystem,
    COUNT(DISTINCT p.id) as total_packages,
    COUNT(v.id) as total_vulnerabilities,
    SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
    SUM(CASE WHEN v.severity = 'HIGH' THEN 1 ELSE 0 END) as high_vulns,
    SUM(CASE WHEN v.severity = 'MEDIUM' THEN 1 ELSE 0 END) as medium_vulns,
    SUM(CASE WHEN v.severity = 'LOW' THEN 1 ELSE 0 END) as low_vulns
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
GROUP BY m.id, m.display_path, m.ecosystem
ORDER BY critical_vulns DESC, high_vulns DESC;

Vulnerability timeline analysis

SELECT 
    DATE(v.created_at) as scan_date,
    v.severity,
    COUNT(*) as vulnerability_count
FROM report_vulnerabilities v
GROUP BY DATE(v.created_at), v.severity
ORDER BY scan_date DESC, v.severity;

Extract vulnerability data from Insights v2 JSON

SELECT 
    p.name,
    p.version,
    json_extract(p.insights_v2, '$.vulnerabilities') as raw_vulnerabilities
FROM report_packages p
WHERE json_extract(p.insights_v2, '$.vulnerabilities') IS NOT NULL;

Extract license data from Insights v2 JSON

SELECT 
    p.name,
    p.version,
    json_extract(p.insights_v2, '$.licenses') as raw_licenses
FROM report_packages p
WHERE json_extract(p.insights_v2, '$.licenses') IS NOT NULL;

Combined security and license risk analysis

SELECT 
    p.name,
    p.version,
    COUNT(DISTINCT v.id) as vulnerability_count,
    GROUP_CONCAT(DISTINCT l.license_id) as licenses,
    SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
    SUM(CASE WHEN v.severity = 'HIGH' THEN 1 ELSE 0 END) as high_vulns
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
LEFT JOIN report_licenses l ON p.id = l.report_package_licenses
GROUP BY p.id, p.name, p.version
HAVING vulnerability_count > 0 OR licenses IS NOT NULL
ORDER BY critical_vulns DESC, high_vulns DESC, vulnerability_count DESC;

Advanced Analysis Queries

Security risk overview by manifest

SELECT 
    m.display_path,
    m.ecosystem,
    COUNT(p.id) as total_packages,
    SUM(CASE WHEN p.is_malware = 1 THEN 1 ELSE 0 END) as malware_count,
    SUM(CASE WHEN p.is_suspicious = 1 THEN 1 ELSE 0 END) as suspicious_count,
    SUM(CASE WHEN p.is_direct = 1 THEN 1 ELSE 0 END) as direct_deps
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
GROUP BY m.id, m.display_path, m.ecosystem
ORDER BY malware_count DESC, suspicious_count DESC;

Generate package inventory report

SELECT 
    p.ecosystem,
    p.name,
    p.version,
    p.is_direct,
    p.package_url,
    m.display_path as found_in_manifest,
    p.created_at
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
ORDER BY p.ecosystem, p.name, m.display_path;

Working with JSON Data

Since some fields store JSON data, you can use SQLite's JSON functions to query nested data:

Extract specific fields from package details

SELECT 
    name,
    version,
    json_extract(package_details, '$.commit') as commit_hash,
    json_extract(package_details, '$.ecosystem') as detail_ecosystem
FROM report_packages
WHERE package_details IS NOT NULL;

Query code analysis data

SELECT 
    name,
    version,
    json_extract(code_analysis, '$.usage_evidences') as usage_evidences
FROM report_packages
WHERE code_analysis IS NOT NULL;

Tips for Querying

  1. Use EXPLAIN QUERY PLAN to optimize slow queries
  2. Create indexes on frequently queried columns for better performance
  3. Use JSON functions to extract data from JSON fields
  4. Join tables to get comprehensive views across related data
  5. Use aggregate functions to generate summary statistics

Dependency Graph Analysis Queries

The SQLite3 reporter includes a comprehensive dependency graph stored in the report_dependency_graphs table, enabling advanced dependency analysis including path tracing, dependent discovery, and graph traversal queries.

Dependency Graph Schema

The dependency graph is stored in the report_dependency_graphs table with the following structure:

CREATE TABLE report_dependency_graphs (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    
    -- Source package (dependent)
    from_package_id TEXT NOT NULL,
    from_package_name TEXT NOT NULL,
    from_package_version TEXT NOT NULL,
    from_package_ecosystem TEXT NOT NULL,
    
    -- Target package (dependency)
    to_package_id TEXT NOT NULL,
    to_package_name TEXT NOT NULL,
    to_package_version TEXT NOT NULL,
    to_package_ecosystem TEXT NOT NULL,
    
    -- Edge metadata
    dependency_type TEXT,
    version_constraint TEXT,
    depth INTEGER DEFAULT 0,
    is_direct BOOLEAN DEFAULT FALSE,
    is_root_edge BOOLEAN DEFAULT FALSE,
    manifest_id TEXT NOT NULL,
    
    created_at DATETIME,
    updated_at DATETIME
);

Find All Dependencies of a Package

SELECT 
    to_package_name,
    to_package_version,
    to_package_ecosystem,
    depth,
    is_direct,
    dependency_type
FROM report_dependency_graphs 
WHERE from_package_name = 'express' 
  AND from_package_ecosystem = 'npm'
ORDER BY depth, to_package_name;

Find All Dependents of a Package

SELECT 
    from_package_name,
    from_package_version,
    from_package_ecosystem,
    depth,
    is_direct
FROM report_dependency_graphs 
WHERE to_package_name = 'lodash' 
  AND to_package_ecosystem = 'npm'
ORDER BY depth, from_package_name;

Find Direct Dependencies Only

SELECT 
    to_package_name,
    to_package_version,
    to_package_ecosystem,
    dependency_type
FROM report_dependency_graphs 
WHERE from_package_name = 'my-app' 
  AND is_direct = 1
ORDER BY to_package_name;

Find Root Packages (Top-Level Dependencies)

SELECT DISTINCT
    from_package_name,
    from_package_version,
    from_package_ecosystem,
    manifest_id
FROM report_dependency_graphs 
WHERE is_root_edge = 1
ORDER BY from_package_name;

Trace Dependency Path (Simple Path from A to B)

WITH RECURSIVE dependency_path AS (
    -- Base case: find direct dependencies
    SELECT 
        from_package_id,
        from_package_name,
        to_package_id,
        to_package_name,
        depth,
        from_package_name || ' -> ' || to_package_name AS path,
        1 AS level
    FROM report_dependency_graphs 
    WHERE from_package_name = 'my-app'
    
    UNION ALL
    
    -- Recursive case: follow the dependency chain
    SELECT 
        rdg.from_package_id,
        rdg.from_package_name,
        rdg.to_package_id,
        rdg.to_package_name,
        rdg.depth,
        dp.path || ' -> ' || rdg.to_package_name,
        dp.level + 1
    FROM report_dependency_graphs rdg
    JOIN dependency_path dp ON rdg.from_package_id = dp.to_package_id
    WHERE dp.level < 10 -- Prevent infinite loops
)
SELECT path, level 
FROM dependency_path
WHERE to_package_name = 'target-package'
ORDER BY level;

Find Packages with Vulnerabilities in Dependency Chain

SELECT DISTINCT
    dg.from_package_name,
    dg.from_package_version,
    dg.to_package_name AS vulnerable_dep,
    dg.to_package_version AS vulnerable_version,
    rv.vulnerability_id,
    rv.severity,
    dg.depth
FROM report_dependency_graphs dg
JOIN report_packages rp ON dg.to_package_id = rp.package_id
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
WHERE dg.from_package_name = 'my-app'
  AND rv.severity IN ('CRITICAL', 'HIGH')
ORDER BY dg.depth, rv.severity;

Find Dependency Depth Distribution

SELECT 
    depth,
    COUNT(*) as edge_count,
    COUNT(DISTINCT to_package_name) as unique_packages
FROM report_dependency_graphs
GROUP BY depth
ORDER BY depth;

Find Packages with Most Dependencies

SELECT 
    from_package_name,
    from_package_version,
    from_package_ecosystem,
    COUNT(*) as dependency_count
FROM report_dependency_graphs
GROUP BY from_package_id, from_package_name, from_package_version, from_package_ecosystem
ORDER BY dependency_count DESC
LIMIT 10;

Find Packages with Most Dependents

SELECT 
    to_package_name,
    to_package_version,
    to_package_ecosystem,
    COUNT(*) as dependent_count
FROM report_dependency_graphs
GROUP BY to_package_id, to_package_name, to_package_version, to_package_ecosystem
ORDER BY dependent_count DESC
LIMIT 10;

Find Circular Dependencies

WITH RECURSIVE circular_deps AS (
    SELECT 
        from_package_id,
        to_package_id,
        from_package_name,
        to_package_name,
        from_package_name || ' -> ' || to_package_name AS path,
        1 AS level
    FROM report_dependency_graphs
    
    UNION ALL
    
    SELECT 
        rdg.from_package_id,
        rdg.to_package_id,
        rdg.from_package_name,
        rdg.to_package_name,
        cd.path || ' -> ' || rdg.to_package_name,
        cd.level + 1
    FROM report_dependency_graphs rdg
    JOIN circular_deps cd ON rdg.from_package_id = cd.to_package_id
    WHERE cd.level < 50
    AND rdg.to_package_id NOT IN (
        SELECT from_package_id 
        FROM circular_deps c2 
        WHERE c2.path = cd.path
    )
)
SELECT path
FROM circular_deps
WHERE to_package_id = from_package_id
ORDER BY level;

Complex Vulnerability Impact Analysis

-- Find all packages that depend on packages with high severity vulnerabilities
SELECT DISTINCT
    root_pkg.from_package_name as root_package,
    vuln_pkg.to_package_name as vulnerable_package,
    rv.vulnerability_id,
    rv.severity,
    MIN(dg.depth) as min_depth
FROM report_dependency_graphs root_pkg
JOIN report_dependency_graphs dg ON root_pkg.from_package_id = dg.from_package_id
JOIN report_packages rp ON dg.to_package_id = rp.package_id
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
JOIN report_dependency_graphs vuln_pkg ON dg.to_package_id = vuln_pkg.to_package_id
WHERE root_pkg.is_root_edge = 1
  AND rv.severity IN ('CRITICAL', 'HIGH')
GROUP BY root_pkg.from_package_name, vuln_pkg.to_package_name, rv.vulnerability_id
ORDER BY min_depth, rv.severity;

Find Transitive Dependency Chains

SELECT 
    dg1.from_package_name as root_pkg,
    dg1.to_package_name as direct_dep,
    dg2.to_package_name as transitive_dep,
    dg1.depth + dg2.depth as total_depth
FROM report_dependency_graphs dg1 
JOIN report_dependency_graphs dg2 ON dg1.to_package_id = dg2.from_package_id
WHERE dg1.is_root_edge = 1 AND dg2.depth > 0
ORDER BY total_depth
LIMIT 10;

Vulnerability Impact Summary

SELECT 
    rp.name as vulnerable_package,
    rv.vulnerability_id,
    rv.severity,
    COUNT(dg.from_package_id) as packages_affected
FROM report_packages rp
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
LEFT JOIN report_dependency_graphs dg ON rp.package_id = dg.to_package_id
GROUP BY rp.package_id, rv.vulnerability_id
ORDER BY rv.severity, packages_affected DESC;

Performance Optimization

The dependency graph queries are optimized with the following indexes:

  • from_package_id - for finding dependencies
  • to_package_id - for finding dependents
  • manifest_id - for manifest-specific queries
  • is_direct - for direct dependency queries
  • is_root_edge - for root package queries
  • depth - for depth-based queries

For large dependency graphs, consider:

  • Using LIMIT clauses for exploratory queries
  • Adding WHERE clauses to filter by ecosystem or manifest
  • Using the depth field to limit traversal depth
  • Creating additional indexes for frequently queried patterns

OpenSSF Scorecard Database Schema

The OpenSSF scorecard data is stored in separate, normalized entities for optimal querying:

ReportProjects Table

CREATE TABLE report_projects (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL,
    url TEXT,
    description TEXT,
    stars INTEGER,
    forks INTEGER,
    created_at DATETIME,
    updated_at DATETIME,
    report_package_projects INTEGER REFERENCES report_packages(id)
);

ReportScorecards Table

CREATE TABLE report_scorecards (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    score REAL NOT NULL,
    scorecard_version TEXT NOT NULL,
    repo_name TEXT NOT NULL,
    repo_commit TEXT NOT NULL,
    date TEXT,
    created_at DATETIME,
    updated_at DATETIME,
    report_project_scorecard INTEGER REFERENCES report_projects(id)
);

ReportScorecardChecks Table

CREATE TABLE report_scorecard_checks (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL,
    score REAL NOT NULL,
    reason TEXT,
    created_at DATETIME,
    updated_at DATETIME,
    report_scorecard_checks INTEGER REFERENCES report_scorecards(id)
);

ReportSlsaProvenances Table

CREATE TABLE report_slsa_provenances (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    source_repository TEXT NOT NULL,
    commit_sha TEXT NOT NULL,
    url TEXT NOT NULL,
    verified BOOLEAN NOT NULL DEFAULT FALSE,
    created_at DATETIME,
    updated_at DATETIME,
    report_package_slsa_provenances INTEGER REFERENCES report_packages(id)
);

This normalized schema enables:

  • Direct queries on scorecard scores without JSON parsing
  • Individual analysis of each scorecard check
  • Efficient joins across packages → projects → scorecards → checks
  • SLSA provenance tracking for supply chain security analysis
  • Verification status monitoring to identify unverified provenance
  • Source repository analysis to understand package origins
  • Proper indexing on scorecard and provenance fields for performance

Creating Views for Common Queries

You can create views to simplify frequently used queries:

-- Create a view for security overview
CREATE VIEW security_overview AS
SELECT 
    p.name,
    p.version,
    p.ecosystem,
    p.is_direct,
    p.is_malware,
    p.is_suspicious,
    m.display_path,
    m.ecosystem as manifest_ecosystem
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id;

-- Create a view for vulnerability impact analysis
CREATE VIEW vulnerability_impact AS
SELECT 
    rp.name as vulnerable_package,
    rp.version as vulnerable_version,
    rv.vulnerability_id,
    rv.severity,
    dg.from_package_name as affected_package,
    dg.depth as dependency_depth,
    dg.is_direct,
    dg.is_root_edge
FROM report_packages rp
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
LEFT JOIN report_dependency_graphs dg ON rp.package_id = dg.to_package_id;

-- Create a view for scorecard analysis
CREATE VIEW scorecard_analysis AS
SELECT 
    p.name as package_name,
    p.version as package_version,
    p.ecosystem,
    proj.name as project_name,
    proj.url as project_url,
    proj.stars,
    proj.forks,
    s.score as scorecard_score,
    s.scorecard_version,
    s.repo_name,
    s.date as scorecard_date
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
LEFT JOIN report_scorecards s ON proj.id = s.report_project_scorecard;

-- Create a view for security posture overview
CREATE VIEW security_posture AS
SELECT 
    p.name,
    p.version,
    p.ecosystem,
    p.is_malware,
    p.is_suspicious,
    COUNT(DISTINCT v.id) as vulnerability_count,
    SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
    SUM(CASE WHEN v.severity = 'HIGH' THEN 1 ELSE 0 END) as high_vulns,
    s.score as scorecard_score,
    proj.stars
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
LEFT JOIN report_projects proj ON p.id = proj.report_package_projects
LEFT JOIN report_scorecards s ON proj.id = s.report_project_scorecard
GROUP BY p.id, proj.id, s.id;

-- Create a view for SLSA provenance analysis
CREATE VIEW slsa_provenance_analysis AS
SELECT 
    p.name as package_name,
    p.version as package_version,
    p.ecosystem,
    p.is_direct,
    slsa.source_repository,
    slsa.commit_sha,
    slsa.url as provenance_url,
    slsa.verified,
    CASE 
        WHEN slsa.id IS NULL THEN 'Missing'
        WHEN slsa.verified = true THEN 'Verified'
        ELSE 'Unverified'
    END as provenance_status
FROM report_packages p
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances;

-- Use the views
SELECT * FROM security_overview WHERE is_malware = 1;
SELECT * FROM vulnerability_impact WHERE severity IN ('CRITICAL', 'HIGH');
SELECT * FROM scorecard_analysis WHERE scorecard_score < 5.0;
SELECT * FROM security_posture WHERE scorecard_score < 6 AND vulnerability_count > 0;
SELECT * FROM slsa_provenance_analysis WHERE provenance_status = 'Missing';
SELECT * FROM slsa_provenance_analysis WHERE is_direct = 1 AND provenance_status != 'Verified';