Skip to content
Open
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
6ceef5f
feat: migrate gguf parser to separate PR from oci
spiffcs Oct 14, 2025
f92b7d2
chore: lint-fix
spiffcs Oct 14, 2025
1ad4a27
test: migrate gguf tests over
spiffcs Oct 14, 2025
bcd47d1
chore: schema and test additions
spiffcs Oct 14, 2025
b702952
tests: account for epoch in dedupe test
spiffcs Oct 14, 2025
08c0572
test: fix local flake
spiffcs Oct 14, 2025
f664f9e
fix: first pass pr fixes
spiffcs Oct 17, 2025
c689dcf
chore: refactor to use gguf-parser-go; 50mb limit
spiffcs Oct 22, 2025
64dc451
fix: update gguf data to be GGUFFileHeader
spiffcs Oct 22, 2025
38c0e6e
chore: warn -> debug
spiffcs Oct 22, 2025
9a2a45f
chore: pr feedback
spiffcs Oct 22, 2025
9b31c04
wip: wip
spiffcs Nov 3, 2025
6daea43
fix: pr comments
spiffcs Nov 13, 2025
b18f7bb
chore: regenerate json schema
spiffcs Nov 13, 2025
cdb41b0
chore: ignore local agent files
spiffcs Nov 13, 2025
b80592f
chore: pr comments
spiffcs Nov 13, 2025
56761ce
fix: raise model version on package
spiffcs Nov 13, 2025
9609ce2
chore: remove test-binary
spiffcs Nov 13, 2025
2976df5
chore: schema and test additions
spiffcs Oct 14, 2025
7ed34c8
chore: refactor to use gguf-parser-go; 50mb limit
spiffcs Oct 22, 2025
efcfecb
wip: wip no lrg file oci client
spiffcs Nov 5, 2025
8031957
wip: wip
spiffcs Nov 6, 2025
ec978f0
fix: use OCI title annotation for virtual path in GGUF layer extraction
spiffcs Nov 13, 2025
1a85625
fix: update after rebase
spiffcs Nov 13, 2025
bfe63bb
fix: add green fixes before pr fixes
spiffcs Nov 13, 2025
ffdd219
Merge branch 'main' into 4184-pt2-oci-model-support
spiffcs Dec 19, 2025
f5fd311
chore: remove incorrect bump of schema
spiffcs Dec 19, 2025
ea50011
chore: remove old file from previous PR review
spiffcs Dec 19, 2025
2da3718
chore: add oci model resolver back into correct PR
spiffcs Dec 19, 2025
dbe0716
cleanup: cleanup
spiffcs Dec 19, 2025
39a48d9
chore: remove stale implementation; root location
spiffcs Dec 19, 2025
a4ef861
chore: small pr change with media type
spiffcs Dec 19, 2025
04365f3
pr: remove stale debug statements
spiffcs Dec 19, 2025
9e83ddc
wip: wip
spiffcs Dec 19, 2025
ea64192
chore: refactor resolver so cataloger can use FilesByMediaType
spiffcs Dec 23, 2025
28dbf2f
chore: refactor source/provider so provider wraps source correctly
spiffcs Dec 23, 2025
c1929fe
chore: lint-fix
spiffcs Dec 23, 2025
924c790
chore: update so ID is not affected by annotations
spiffcs Dec 23, 2025
11e744d
chore: do not export layerInfo
spiffcs Dec 23, 2025
2718e33
chore: small refactor
spiffcs Dec 23, 2025
1bcd85c
chore: update tests to have new method for file.Resolver
spiffcs Dec 23, 2025
74fdc90
chore: cut round trip requests in half
spiffcs Dec 23, 2025
80ada3c
tests: add tests for provider/source layer
spiffcs Dec 23, 2025
8e2ef24
chore: gosec warnings
spiffcs Dec 23, 2025
9e79513
chore: decouple media type from fileresolver
spiffcs Dec 23, 2025
e9a3b9e
fix: update WithParserByMediaType to be aware of OciLayerResolver
spiffcs Dec 23, 2025
4971537
fix: unexport client
spiffcs Dec 23, 2025
a141a73
chore: pr comments
spiffcs Dec 23, 2025
a69941e
chore: update to unmix layerInfo concerns
spiffcs Dec 23, 2025
aa4d2b5
chore: add comment flagging id:"-"
spiffcs Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion internal/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@ const (
// Changelog
// 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field).
// 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field)

)
4 changes: 4 additions & 0 deletions syft/create_sbom_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/ocimodelsource"
)

// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
Expand Down Expand Up @@ -483,6 +484,9 @@ func findDefaultTags(src source.Description) ([]string, error) {
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
case source.SnapMetadata:
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
case *ocimodelsource.OCIModelMetadata:
// OCI model artifacts should use image-like catalogers since they provide files to scan
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
default:
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
}
Expand Down
2 changes: 1 addition & 1 deletion syft/format/internal/spdxutil/helpers/source_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func SourceInfo(p pkg.Package) string {
case pkg.TerraformPkg:
answer = "acquired package info from Terraform dependency lock file"
case pkg.ModelPkg:
answer = "acquired package info from AI artifact (e.g. GGUF File"
answer = "acquired package info from AI artifact (e.g. GGUF File)"
default:
answer = "acquired package info from the following paths"
}
Expand Down
157 changes: 155 additions & 2 deletions syft/pkg/cataloger/ai/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,165 @@ including support for GGUF (GPT-Generated Unified Format) model files.
package ai

import (
"context"
"fmt"
"io"
"os"

gguf_parser "github.com/gpustack/gguf-parser-go"

"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source/ocimodelsource"
)

const (
catalogerName = "gguf-cataloger"
)

// ggufCataloger implements pkg.Cataloger with support for both file-based and OCI layer-based discovery.
type ggufCataloger struct {
genericCataloger pkg.Cataloger
}

// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
// It supports both traditional file-based discovery and OCI layer-aware discovery
// when the resolver implements OCIResolver.
func NewGGUFCataloger() pkg.Cataloger {
return generic.NewCataloger("gguf-cataloger").
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
return &ggufCataloger{
genericCataloger: generic.NewCataloger(catalogerName).
WithParserByGlobs(parseGGUFModel, "**/*.gguf"),
}
}

// Name returns the cataloger name.
func (c *ggufCataloger) Name() string {
return catalogerName
}

// Catalog discovers GGUF model packages from the given resolver.
// If the resolver implements OCIResolver, it uses layer-aware discovery.
// Otherwise, it falls back to glob-based file discovery.
func (c *ggufCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
// Check if the resolver supports OCI layer-aware access
if ociResolver, ok := resolver.(ocimodelsource.OCIResolver); ok {
log.Debug("using OCI layer-aware discovery for GGUF models")
return c.catalogFromOCILayers(ctx, ociResolver)
}

// Fall back to generic glob-based discovery
log.Debug("using glob-based discovery for GGUF models")
return c.genericCataloger.Catalog(ctx, resolver)
}

// catalogFromOCILayers discovers GGUF models by querying OCI layers by media type.
func (c *ggufCataloger) catalogFromOCILayers(ctx context.Context, resolver ocimodelsource.OCIResolver) ([]pkg.Package, []artifact.Relationship, error) {
// Find all GGUF layers by media type
digests, err := resolver.LayerDigestsByMediaType(ocimodelsource.GGUFLayerMediaType)
if err != nil {
return nil, nil, fmt.Errorf("failed to get GGUF layer digests: %w", err)
}

if len(digests) == 0 {
log.Debug("no GGUF layers found by media type")
return nil, nil, nil
}

var packages []pkg.Package

for idx, digest := range digests {
select {
case <-ctx.Done():
return packages, nil, ctx.Err()
default:
}

log.WithFields("digest", digest, "index", idx).Debug("processing GGUF layer")

p, err := c.parseGGUFLayer(resolver, digest, idx)
if err != nil {
log.WithFields("digest", digest, "error", err).Warn("failed to parse GGUF layer")
continue
}

if p != nil {
packages = append(packages, *p)
}
}

return packages, nil, nil
}

// parseGGUFLayer parses a single GGUF layer and returns the discovered package.
func (c *ggufCataloger) parseGGUFLayer(resolver ocimodelsource.OCIResolver, digest string, layerIndex int) (*pkg.Package, error) {
// Fetch the layer content
reader, err := resolver.LayerContentsByDigest(digest)
if err != nil {
return nil, fmt.Errorf("failed to fetch layer content: %w", err)
}
defer internal.CloseAndLogError(reader, digest)

// Create a temporary file for the gguf-parser library
tempFile, err := os.CreateTemp("", "syft-gguf-layer-*.gguf")
if err != nil {
return nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempPath := tempFile.Name()
defer os.Remove(tempPath)

// Copy and validate the GGUF header using LimitedReader to prevent OOM
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
if err := copyHeader(tempFile, limitedReader); err != nil {
tempFile.Close()
return nil, fmt.Errorf("failed to copy GGUF header: %w", err)
}
tempFile.Close()

// Parse using gguf-parser-go
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
gguf_parser.SkipLargeMetadata(),
)
if err != nil {
return nil, fmt.Errorf("failed to parse GGUF file: %w", err)
}

// Extract metadata
metadata := ggufFile.Metadata()
modelVersion := extractVersion(ggufFile.Header.MetadataKV)

// Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileHeader{
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
}

// If model name is not in metadata, use a generated name
modelName := metadata.Name
if modelName == "" {
modelName = fmt.Sprintf("model-layer-%d", layerIndex)
}

// Create a virtual location for the layer
virtualPath := fmt.Sprintf("/layer-%d.gguf", layerIndex)
location := file.NewLocation(virtualPath).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)

// Create package from metadata
p := newGGUFPackage(
syftMetadata,
modelName,
modelVersion,
metadata.License,
location,
)

return &p, nil
}
39 changes: 39 additions & 0 deletions syft/source/ocimodelsource/metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package ocimodelsource

import "github.com/anchore/syft/syft/source"

// OCIModelMetadata represents all static metadata that defines what an OCI model artifact is.
// This is similar to ImageMetadata but includes model-specific fields and OCI artifact annotations.
type OCIModelMetadata struct {
// Core OCI artifact metadata (mirrors ImageMetadata)
UserInput string `json:"userInput"`
ID string `json:"artifactID"`
ManifestDigest string `json:"manifestDigest"`
MediaType string `json:"mediaType"`
Tags []string `json:"tags"`
Size int64 `json:"artifactSize"`
Layers []source.LayerMetadata `json:"layers"`
RawManifest []byte `json:"manifest"`
RawConfig []byte `json:"config"`
RepoDigests []string `json:"repoDigests"`
Architecture string `json:"architecture"`
Variant string `json:"architectureVariant,omitempty"`
OS string `json:"os"`
Labels map[string]string `json:"labels,omitempty"`

// OCI-specific metadata
Annotations map[string]string `json:"annotations,omitempty"`

// Model-specific metadata
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
}
Copy link
Contributor

@wagoodman wagoodman Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
type OCIModelMetadata struct {
// Core OCI artifact metadata (mirrors ImageMetadata)
UserInput string `json:"userInput"`
ID string `json:"artifactID"`
ManifestDigest string `json:"manifestDigest"`
MediaType string `json:"mediaType"`
Tags []string `json:"tags"`
Size int64 `json:"artifactSize"`
Layers []source.LayerMetadata `json:"layers"`
RawManifest []byte `json:"manifest"`
RawConfig []byte `json:"config"`
RepoDigests []string `json:"repoDigests"`
Architecture string `json:"architecture"`
Variant string `json:"architectureVariant,omitempty"`
OS string `json:"os"`
Labels map[string]string `json:"labels,omitempty"`
// OCI-specific metadata
Annotations map[string]string `json:"annotations,omitempty"`
// Model-specific metadata
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
}
type OCIModelMetadata{
ImageMetadata
Annotations map[string]string
}

or

Suggested change
type OCIModelMetadata struct {
// Core OCI artifact metadata (mirrors ImageMetadata)
UserInput string `json:"userInput"`
ID string `json:"artifactID"`
ManifestDigest string `json:"manifestDigest"`
MediaType string `json:"mediaType"`
Tags []string `json:"tags"`
Size int64 `json:"artifactSize"`
Layers []source.LayerMetadata `json:"layers"`
RawManifest []byte `json:"manifest"`
RawConfig []byte `json:"config"`
RepoDigests []string `json:"repoDigests"`
Architecture string `json:"architecture"`
Variant string `json:"architectureVariant,omitempty"`
OS string `json:"os"`
Labels map[string]string `json:"labels,omitempty"`
// OCI-specific metadata
Annotations map[string]string `json:"annotations,omitempty"`
// Model-specific metadata
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
}
type OCIModelMetadata ImageMetadata

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not saying this must be done, but given that the gguf/model format properties are redundant, then it is essentially the same. The annotations are new, however, still relevant to OCI images.

Copy link
Contributor

@wagoodman wagoodman Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably have id:- to ensure this doesn't affect the ID of existing sources. Probably worth a test to ensure this isn't changing


// GGUFLayerInfo represents metadata about a GGUF layer in the OCI artifact.
type GGUFLayerInfo struct {
Digest string `json:"digest"`
Size int64 `json:"size"` // Full blob size in registry
MediaType string `json:"mediaType"` // Should be "application/vnd.docker.ai.gguf.v3"
Annotations map[string]string `json:"annotations,omitempty"`
FetchedBytes int64 `json:"fetchedBytes"` // How many bytes we actually fetched via range-GET
}
Loading
Loading