Files
Jonathan Miller 18fc79fa0a
Generic: Repo Health / Site Health (push) Has been skipped
Generic: Repo Health / Access control (push) Successful in 1s
Branch Policy Check / Verify merge target (pull_request) Successful in 1s
Generic: Repo Health / Site Health (pull_request) Has been skipped
Universal: PR Check / Branch Policy (pull_request) Successful in 1s
Generic: Repo Health / Access control (pull_request) Successful in 1s
PR RC Release / Build RC Release (pull_request) Successful in 3s
Universal: PR Check / Validate PR (pull_request) Failing after 6s
Branch Cleanup / Delete merged branch (pull_request) Successful in 2s
Universal: Pre-Release / Build Pre-Release (${{ inputs.stability || 'development' }}) (pull_request) Successful in 1m22s
Generic: Repo Health / Scripts governance (push) Has been cancelled
Generic: Repo Health / Repository health (push) Has been cancelled
Generic: Repo Health / Report Issues (push) Has been cancelled
Universal: PR Check / Build RC Package (pull_request) Has been cancelled
Universal: PR Check / Report Issues (pull_request) Has been cancelled
Generic: Repo Health / Scripts governance (pull_request) Has been cancelled
Generic: Repo Health / Repository health (pull_request) Has been cancelled
Generic: Repo Health / Report Issues (pull_request) Has been cancelled
feat(security): add dependency vulnerability scanner (#551)
Add dependency scanner module that parses manifest files (go.mod,
package.json, composer.json, requirements.txt) and checks dependencies
against the OSV.dev API for known CVEs. Implements the existing Scanner
interface and wires into the orchestrator for push-time scanning.
2026-06-07 10:32:04 -05:00

542 lines
15 KiB
Go

// Copyright 2026 Moko Consulting <hello@mokoconsulting.tech>
// SPDX-License-Identifier: GPL-3.0-or-later
package security
import (
"bytes"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
security_model "code.mokoconsulting.tech/MokoConsulting/MokoGitea/models/security"
"code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/git"
"code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/log"
)
// ──────────────────────────────────────────────────────────────────────
// Dependency manifest parsers
// ──────────────────────────────────────────────────────────────────────
// dependency represents a single package with version.
type dependency struct {
Name string
Version string
Ecosystem string // "Go", "npm", "PyPI", "Packagist"
FilePath string // which manifest file it came from
}
// manifestParser extracts dependencies from a file's contents.
type manifestParser struct {
FileName string
Ecosystem string
Parse func(content string, filePath string) []dependency
}
var manifestParsers = []manifestParser{
{"go.mod", "Go", parseGoMod},
{"package.json", "npm", parsePackageJSON},
{"composer.json", "Packagist", parseComposerJSON},
{"requirements.txt", "PyPI", parseRequirementsTxt},
}
// parseGoMod extracts dependencies from go.mod.
func parseGoMod(content, filePath string) []dependency {
var deps []dependency
inRequire := false
for _, line := range strings.Split(content, "\n") {
line = strings.TrimSpace(line)
if line == ")" {
inRequire = false
continue
}
if strings.HasPrefix(line, "require (") || strings.HasPrefix(line, "require(") {
inRequire = true
continue
}
if inRequire {
// Lines like: github.com/foo/bar v1.2.3
parts := strings.Fields(line)
if len(parts) >= 2 && !strings.HasPrefix(parts[0], "//") {
deps = append(deps, dependency{
Name: parts[0],
Version: parts[1],
Ecosystem: "Go",
FilePath: filePath,
})
}
continue
}
// Single-line require: require github.com/foo/bar v1.2.3
if strings.HasPrefix(line, "require ") && !strings.Contains(line, "(") {
parts := strings.Fields(line)
if len(parts) >= 3 {
deps = append(deps, dependency{
Name: parts[1],
Version: parts[2],
Ecosystem: "Go",
FilePath: filePath,
})
}
}
}
return deps
}
// parsePackageJSON extracts dependencies from package.json.
func parsePackageJSON(content, filePath string) []dependency {
var pkg struct {
Dependencies map[string]string `json:"dependencies"`
DevDependencies map[string]string `json:"devDependencies"`
}
if err := json.Unmarshal([]byte(content), &pkg); err != nil {
return nil
}
var deps []dependency
for name, version := range pkg.Dependencies {
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "npm",
FilePath: filePath,
})
}
for name, version := range pkg.DevDependencies {
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "npm",
FilePath: filePath,
})
}
return deps
}
// parseComposerJSON extracts dependencies from composer.json.
func parseComposerJSON(content, filePath string) []dependency {
var pkg struct {
Require map[string]string `json:"require"`
RequireDev map[string]string `json:"require-dev"`
}
if err := json.Unmarshal([]byte(content), &pkg); err != nil {
return nil
}
var deps []dependency
for name, version := range pkg.Require {
if name == "php" || strings.HasPrefix(name, "ext-") {
continue // skip platform requirements
}
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "Packagist",
FilePath: filePath,
})
}
for name, version := range pkg.RequireDev {
if name == "php" || strings.HasPrefix(name, "ext-") {
continue
}
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "Packagist",
FilePath: filePath,
})
}
return deps
}
// parseRequirementsTxt extracts dependencies from requirements.txt.
func parseRequirementsTxt(content, filePath string) []dependency {
var deps []dependency
for _, line := range strings.Split(content, "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") {
continue
}
// Handle: package==1.0.0, package>=1.0.0, package~=1.0.0
for _, sep := range []string{"==", ">=", "~=", "<=", "!="} {
if idx := strings.Index(line, sep); idx > 0 {
name := strings.TrimSpace(line[:idx])
version := strings.TrimSpace(line[idx+len(sep):])
// Strip any trailing constraints like ",<2.0"
if ci := strings.Index(version, ","); ci > 0 {
version = version[:ci]
}
deps = append(deps, dependency{
Name: name,
Version: version,
Ecosystem: "PyPI",
FilePath: filePath,
})
break
}
}
}
return deps
}
// cleanSemver strips npm/composer range prefixes (^, ~, >=) to get a plain version.
func cleanSemver(v string) string {
v = strings.TrimSpace(v)
v = strings.TrimLeft(v, "^~>=<!")
v = strings.TrimSpace(v)
// If it has " || " or " - " (ranges), take the first version
if idx := strings.Index(v, " "); idx > 0 {
v = v[:idx]
}
return v
}
// ──────────────────────────────────────────────────────────────────────
// OSV.dev API client
// ──────────────────────────────────────────────────────────────────────
const osvBatchURL = "https://api.osv.dev/v1/querybatch"
const osvMaxBatch = 1000 // OSV batch limit
var osvClient = &http.Client{Timeout: 30 * time.Second}
// osvQuery is a single query in a batch request.
type osvQuery struct {
Package *osvPackage `json:"package"`
Version string `json:"version"`
}
type osvPackage struct {
Name string `json:"name"`
Ecosystem string `json:"ecosystem"`
}
// osvBatchRequest is the batch query body.
type osvBatchRequest struct {
Queries []osvQuery `json:"queries"`
}
// osvBatchResponse is the batch response.
type osvBatchResponse struct {
Results []osvResult `json:"results"`
}
type osvResult struct {
Vulns []osvVuln `json:"vulns"`
}
type osvVuln struct {
ID string `json:"id"`
Summary string `json:"summary"`
Details string `json:"details"`
Severity []osvSeverity `json:"severity"`
Aliases []string `json:"aliases"`
}
type osvSeverity struct {
Type string `json:"type"` // "CVSS_V3", "CVSS_V2"
Score string `json:"score"` // CVSS vector string
}
// queryOSV sends a batch of dependencies to OSV.dev and returns vulnerabilities.
func queryOSV(deps []dependency) (*osvBatchResponse, error) {
queries := make([]osvQuery, 0, len(deps))
for _, d := range deps {
if d.Version == "" || d.Version == "*" || d.Version == "latest" {
continue // can't query without a concrete version
}
queries = append(queries, osvQuery{
Package: &osvPackage{Name: d.Name, Ecosystem: d.Ecosystem},
Version: d.Version,
})
}
if len(queries) == 0 {
return &osvBatchResponse{}, nil
}
body, err := json.Marshal(osvBatchRequest{Queries: queries})
if err != nil {
return nil, fmt.Errorf("marshal OSV request: %w", err)
}
resp, err := osvClient.Post(osvBatchURL, "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("OSV API request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
return nil, fmt.Errorf("OSV API returned %d: %s", resp.StatusCode, string(respBody))
}
var result osvBatchResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("decode OSV response: %w", err)
}
return &result, nil
}
// ──────────────────────────────────────────────────────────────────────
// Severity mapping
// ──────────────────────────────────────────────────────────────────────
// mapCVSSSeverity converts a CVSS v3 base score to an AlertSeverity.
func mapCVSSSeverity(vulnSeverities []osvSeverity) security_model.AlertSeverity {
for _, s := range vulnSeverities {
if s.Type == "CVSS_V3" {
score := extractCVSSBaseScore(s.Score)
switch {
case score >= 9.0:
return security_model.SeverityCritical
case score >= 7.0:
return security_model.SeverityHigh
case score >= 4.0:
return security_model.SeverityMedium
case score > 0:
return security_model.SeverityLow
}
}
}
// No CVSS score available - default to medium
return security_model.SeverityMedium
}
// extractCVSSBaseScore parses the base score from a CVSS v3 vector string.
// Vector format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
// We compute a simplified score from the vector metrics.
func extractCVSSBaseScore(vector string) float64 {
if vector == "" {
return 0
}
// CVSS v3 vectors encode severity in metrics. Use a simplified
// lookup based on the most impactful metrics.
parts := make(map[string]string)
for _, segment := range strings.Split(vector, "/") {
kv := strings.SplitN(segment, ":", 2)
if len(kv) == 2 {
parts[kv[0]] = kv[1]
}
}
// Simplified scoring based on key CVSS v3 metrics
var score float64
// Attack Vector (AV)
switch parts["AV"] {
case "N": // Network
score += 3.0
case "A": // Adjacent
score += 2.0
case "L": // Local
score += 1.0
case "P": // Physical
score += 0.5
}
// Attack Complexity (AC)
switch parts["AC"] {
case "L": // Low
score += 1.5
case "H": // High
score += 0.5
}
// Privileges Required (PR)
switch parts["PR"] {
case "N": // None
score += 1.5
case "L": // Low
score += 1.0
case "H": // High
score += 0.5
}
// Impact metrics (C/I/A)
for _, metric := range []string{"C", "I", "A"} {
switch parts[metric] {
case "H":
score += 1.2
case "L":
score += 0.5
}
}
// Cap at 10.0
if score > 10.0 {
score = 10.0
}
return score
}
// ──────────────────────────────────────────────────────────────────────
// DependencyScanner
// ──────────────────────────────────────────────────────────────────────
// DependencyScanner checks project dependencies against known vulnerabilities.
type DependencyScanner struct{}
// NewDependencyScanner creates a new dependency vulnerability scanner.
func NewDependencyScanner() *DependencyScanner {
return &DependencyScanner{}
}
func (s *DependencyScanner) Type() security_model.ScannerType {
return security_model.ScannerDependency
}
func (s *DependencyScanner) ScanCommit(commit *git.Commit) ([]Finding, error) {
return s.ScanTree(commit)
}
func (s *DependencyScanner) ScanTree(commit *git.Commit) ([]Finding, error) {
if commit == nil {
return nil, nil
}
// Step 1: Find and parse manifest files
entries, err := commit.ListEntriesRecursiveFast()
if err != nil {
return nil, fmt.Errorf("ListEntriesRecursiveFast: %w", err)
}
var allDeps []dependency
for _, entry := range entries {
if !entry.IsRegular() {
continue
}
path := entry.Name()
baseName := path
if idx := strings.LastIndex(path, "/"); idx >= 0 {
baseName = path[idx+1:]
}
// Skip vendored/nested files
lower := strings.ToLower(path)
if strings.Contains(lower, "vendor/") || strings.Contains(lower, "node_modules/") ||
strings.Contains(lower, "testdata/") {
continue
}
for _, parser := range manifestParsers {
if baseName == parser.FileName {
reader, err := entry.Blob().DataAsync()
if err != nil {
log.Trace("DependencyScanner: skip %s: %v", path, err)
continue
}
content, err := io.ReadAll(io.LimitReader(reader, 5*1024*1024)) // 5MB limit
reader.Close()
if err != nil {
continue
}
deps := parser.Parse(string(content), path)
allDeps = append(allDeps, deps...)
break
}
}
}
if len(allDeps) == 0 {
return nil, nil
}
log.Info("DependencyScanner: found %d dependencies across manifest files", len(allDeps))
// Step 2: Query OSV in batches
var findings []Finding
for i := 0; i < len(allDeps); i += osvMaxBatch {
end := i + osvMaxBatch
if end > len(allDeps) {
end = len(allDeps)
}
batch := allDeps[i:end]
resp, err := queryOSV(batch)
if err != nil {
log.Error("DependencyScanner: OSV query failed: %v", err)
continue
}
// Step 3: Map results to findings
// OSV batch response indices correspond 1:1 with the query indices.
// But we may have skipped deps with empty versions, so build the
// queryable subset to align indices.
queryable := make([]dependency, 0, len(batch))
for _, d := range batch {
if d.Version != "" && d.Version != "*" && d.Version != "latest" {
queryable = append(queryable, d)
}
}
for j, result := range resp.Results {
if j >= len(queryable) {
break
}
dep := queryable[j]
for _, vuln := range result.Vulns {
severity := mapCVSSSeverity(vuln.Severity)
// Build CVE alias for rule ID (prefer CVE over GHSA)
ruleID := vuln.ID
for _, alias := range vuln.Aliases {
if strings.HasPrefix(alias, "CVE-") {
ruleID = alias
break
}
}
title := fmt.Sprintf("%s in %s@%s", ruleID, dep.Name, dep.Version)
description := vuln.Summary
if description == "" {
description = vuln.Details
}
// Truncate long descriptions
if len(description) > 500 {
description = description[:497] + "..."
}
// Metadata JSON
meta, _ := json.Marshal(map[string]string{
"vuln_id": vuln.ID,
"ecosystem": dep.Ecosystem,
"package": dep.Name,
"version": dep.Version,
})
fingerprint := fmt.Sprintf("%x", sha256.Sum256([]byte(vuln.ID+":"+dep.Name+":"+dep.Version)))
findings = append(findings, Finding{
Scanner: security_model.ScannerDependency,
Severity: severity,
RuleID: ruleID,
Title: title,
Description: description,
FilePath: dep.FilePath,
CommitSHA: commit.ID.String(),
Fingerprint: fingerprint[:32],
Metadata: string(meta),
})
}
}
}
return findings, nil
}