Skip to content
Open
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,12 @@ feature is currently disabled by default. To enable this feature add the followi
```yaml
external-sources:
enable: true
abort-after: 10m
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can clarify what this means by changing the name some. This could be interpreted as either:
a. aborting looking up from external sources in general after the duration elapses
b. aborting a single request to an external source after the duration elapses

From the functionality implemented b is implied.

Regarding naming and the above context, request-timeout feels like a more descriptive name.

maven:
search-upstream-by-sha1: true
base-url: https://search.maven.org/solrsearch/select
rate-limit: 300ms # Time between Maven API requests
abort-after: 5m #override the global config
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @wagoodman - I know he's pretty sensitive to duplicate fields that override each other so I'd like him to chime in on where he sees this going or what his preference would be

```

You can also configure the base-url if you're using another registry as your maven endpoint.
Expand Down
27 changes: 22 additions & 5 deletions cmd/grype/cli/options/datasources.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,24 @@ import (

const (
defaultMavenBaseURL = "https://search.maven.org/solrsearch/select"
defaultAbortAfter = 10 * time.Minute
)

type externalSources struct {
Enable bool `yaml:"enable" json:"enable" mapstructure:"enable"`
Maven maven `yaml:"maven" json:"maven" mapstructure:"maven"`
Enable bool `yaml:"enable" json:"enable" mapstructure:"enable"`
AbortAfter *time.Duration `yaml:"abort-after" json:"abortAfter" mapstructure:"abort-after"`
Maven maven `yaml:"maven" json:"maven" mapstructure:"maven"`
}

var _ interface {
clio.FieldDescriber
} = (*externalSources)(nil)

type maven struct {
SearchUpstreamBySha1 bool `yaml:"search-upstream" json:"searchUpstreamBySha1" mapstructure:"search-maven-upstream"`
BaseURL string `yaml:"base-url" json:"baseUrl" mapstructure:"base-url"`
RateLimit time.Duration `yaml:"rate-limit" json:"rateLimit" mapstructure:"rate-limit"`
SearchUpstreamBySha1 bool `yaml:"search-upstream" json:"searchUpstreamBySha1" mapstructure:"search-maven-upstream"`
BaseURL string `yaml:"base-url" json:"baseUrl" mapstructure:"base-url"`
RateLimit time.Duration `yaml:"rate-limit" json:"rateLimit" mapstructure:"rate-limit"`
AbortAfter *time.Duration `yaml:"abort-after" json:"abortAfter" mapstructure:"abort-after"`
}

func defaultExternalSources() externalSources {
Expand All @@ -42,11 +45,25 @@ func (cfg externalSources) ToJavaMatcherConfig() java.ExternalSearchConfig {
if !cfg.Enable {
smu = cfg.Enable
}

cfg.Maven.AbortAfter = multiLevelOption[time.Duration](defaultAbortAfter, cfg.AbortAfter, cfg.Maven.AbortAfter)

return java.ExternalSearchConfig{
SearchMavenUpstream: smu,
MavenBaseURL: cfg.Maven.BaseURL,
MavenRateLimit: cfg.Maven.RateLimit,
AbortAfter: *cfg.Maven.AbortAfter,
}
}

func multiLevelOption[T any](defaultValue T, option ...*T) *T {
result := defaultValue
for _, opt := range option {
if opt != nil {
result = *opt
}
}
return &result
}

func (cfg *externalSources) DescribeFields(descriptions clio.FieldDescriptionSet) {
Expand Down
9 changes: 9 additions & 0 deletions grype/matcher/java/matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type Matcher struct {
type ExternalSearchConfig struct {
SearchMavenUpstream bool
MavenBaseURL string
AbortAfter time.Duration
MavenRateLimit time.Duration
}

Expand Down Expand Up @@ -54,6 +55,14 @@ func (m *Matcher) Match(store vulnerability.Provider, p pkg.Package) ([]match.Ma
var matches []match.Match

if m.cfg.SearchMavenUpstream {
timeout := m.cfg.AbortAfter
ctx := context.Background()
if timeout > 0 {
var cancel context.CancelFunc
_, cancel = context.WithTimeout(ctx, m.cfg.AbortAfter)
defer cancel()
}

upstreamMatches, err := m.matchUpstreamMavenPackages(store, p)
if err != nil {
if strings.Contains(err.Error(), "no artifact found") {
Expand Down
40 changes: 37 additions & 3 deletions grype/matcher/java/matcher_mocks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package java
import (
"context"
"errors"
"testing"
"time"

"github.com/anchore/grype/grype/pkg"
"github.com/anchore/grype/grype/version"
Expand Down Expand Up @@ -33,13 +35,45 @@ func newMockProvider() vulnerability.Provider {
}

type mockMavenSearcher struct {
pkg pkg.Package
tb testing.TB
pkg *pkg.Package
work *time.Duration
simulateRateLimiting bool
}

func (m mockMavenSearcher) GetMavenPackageBySha(context.Context, string) (*pkg.Package, error) {
func newMockSearcher(tb testing.TB) mockMavenSearcher {
return mockMavenSearcher{
tb: tb,
}
}

func (m mockMavenSearcher) WithPackage(p pkg.Package) mockMavenSearcher {
m.pkg = &p
return m
}

func (m mockMavenSearcher) WithWorkDuration(duration time.Duration) mockMavenSearcher {
m.work = &duration
return m
}

func (m mockMavenSearcher) GetMavenPackageBySha(ctx context.Context, sha1 string) (*pkg.Package, error) {
if m.simulateRateLimiting {
return nil, errors.New("you been rate limited")
}
return &m.pkg, nil
deadline, ok := ctx.Deadline()

m.tb.Log("GetMavenPackageBySha called with deadline:", deadline, "deadline set:", ok)

if m.work != nil {
select {
case <-time.After(*m.work):
return m.pkg, nil
case <-ctx.Done():
// If the context is done before the sleep is over, return a context.DeadlineExceeded error
return m.pkg, ctx.Err()
}
} else {
return m.pkg, ctx.Err()
}
}
49 changes: 49 additions & 0 deletions grype/matcher/java/matcher_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package java

import (
"context"
"testing"
"time"

"github.com/google/uuid"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -44,7 +46,7 @@

t.Run("matching from maven search results", func(t *testing.T) {
matcher := newMatcher(mockMavenSearcher{
pkg: p,

Check failure on line 49 in grype/matcher/java/matcher_test.go

View workflow job for this annotation

GitHub Actions / Unit tests

cannot use p (variable of struct type "github.com/anchore/grype/grype/pkg".Package) as *"github.com/anchore/grype/grype/pkg".Package value in struct literal
})
actual, _ := matcher.matchUpstreamMavenPackages(store, p)

Expand Down Expand Up @@ -80,3 +82,50 @@
assert.Errorf(t, err, "should have gotten an error from the rate limiting")
})
}

func TestMatcherJava_TestMatchUpstreamMavenPackagesTimeout(t *testing.T) {
newMatcher := func(searcher MavenSearcher) *Matcher {
return &Matcher{
cfg: MatcherConfig{
ExternalSearchConfig: ExternalSearchConfig{
SearchMavenUpstream: true,
},
},
MavenSearcher: searcher,
}
}
store := newMockProvider()

p := pkg.Package{
ID: pkg.ID(uuid.NewString()),
Name: "org.springframework.spring-webmvc",
Version: "5.1.5.RELEASE",
Language: syftPkg.Java,
Type: syftPkg.JavaPkg,
Metadata: pkg.JavaMetadata{
ArchiveDigests: []pkg.Digest{
{
Algorithm: "sha1",
Value: "236e3bfdbdc6c86629237a74f0f11414adb4e211",
},
},
},
}

t.Run("handles context timeout", func(t *testing.T) {
// Create a mock searcher that simulates rate limiting
searcher := mockMavenSearcher{
simulateRateLimiting: true,
}
matcher := newMatcher(searcher)

// Create a context with a very short timeout
_, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond)
defer cancel()

_, err := matcher.matchUpstreamMavenPackages(store, p)

require.Error(t, err, "expected an error due to timeout")
assert.ErrorIs(t, err, context.DeadlineExceeded, "should have gotten a deadline exceeded error")
})
}
122 changes: 76 additions & 46 deletions grype/matcher/java/maven_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
} `json:"response"`
}

func (ms *mavenSearch) GetMavenPackageBySha(ctx context.Context, sha1 string) (*pkg.Package, error) {

Check failure on line 55 in grype/matcher/java/maven_search.go

View workflow job for this annotation

GitHub Actions / Static analysis

Function 'GetMavenPackageBySha' is too long (87 > 70) (funlen)
if sha1 == "" {
return nil, errors.New("empty sha1 digest")
}
Expand All @@ -72,51 +72,81 @@
return nil, fmt.Errorf("rate limiter error: %w", err)
}

req, err := http.NewRequestWithContext(ctx, http.MethodGet, ms.baseURL, nil)
if err != nil {
return nil, fmt.Errorf("unable to initialize HTTP client: %w", err)
}

q := req.URL.Query()
q.Set("q", fmt.Sprintf(sha1Query, sha1))
q.Set("rows", "1")
q.Set("wt", "json")
req.URL.RawQuery = q.Encode()

resp, err := ms.client.Do(req)
if err != nil {
return nil, fmt.Errorf("sha1 search error: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("status %s from %s", resp.Status, req.URL.String())
}

var res mavenAPIResponse
if err = json.NewDecoder(resp.Body).Decode(&res); err != nil {
return nil, fmt.Errorf("json decode error: %w", err)
resultChan := make(chan *pkg.Package)
errChan := make(chan error)

// Create a new context with cancelation to clean up resources
searchCtx, cancel := context.WithCancel(ctx)
defer cancel() // Ensure resources are cleaned up when function returns

go func() {
defer close(resultChan)
defer close(errChan)

// Create request with the proper context
req, err := http.NewRequestWithContext(searchCtx, http.MethodGet, ms.baseURL, nil)
if err != nil {
errChan <- fmt.Errorf("unable to initialize HTTP client: %w", err)
return
}

q := req.URL.Query()
q.Set("q", fmt.Sprintf(sha1Query, sha1))
q.Set("rows", "1")
q.Set("wt", "json")
req.URL.RawQuery = q.Encode()

resp, err := ms.client.Do(req)
if err != nil {
errChan <- fmt.Errorf("sha1 search error: %w", err)
return
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
errChan <- fmt.Errorf("status %s from %s", resp.Status, req.URL.String())
return
}

var res mavenAPIResponse
if err = json.NewDecoder(resp.Body).Decode(&res); err != nil {
errChan <- fmt.Errorf("json decode error: %w", err)
return
}

if len(res.Response.Docs) == 0 {
errChan <- fmt.Errorf("digest %s: %w", sha1, errors.New("no artifact found"))
return
}

// artifacts might have the same SHA-1 digests.
// e.g. "javax.servlet:jstl" and "jstl:jstl"
docs := res.Response.Docs
sort.Slice(docs, func(i, j int) bool {
return docs[i].ID < docs[j].ID
})
d := docs[0]

resultChan <- &pkg.Package{
Name: fmt.Sprintf("%s:%s", d.GroupID, d.ArtifactID),
Version: d.Version,
Language: syftPkg.Java,
Metadata: pkg.JavaMetadata{
PomArtifactID: d.ArtifactID,
PomGroupID: d.GroupID,
},
}
}()

select {
case <-ctx.Done():
// The context was canceled or its deadline was exceeded
return nil, ctx.Err()
case res := <-resultChan:
// The work finished before the context was done
return res, nil
case err := <-errChan:
// There was an error getting the package
return nil, err
}

if len(res.Response.Docs) == 0 {
return nil, fmt.Errorf("digest %s: %w", sha1, errors.New("no artifact found"))
}

// artifacts might have the same SHA-1 digests.
// e.g. "javax.servlet:jstl" and "jstl:jstl"
docs := res.Response.Docs
sort.Slice(docs, func(i, j int) bool {
return docs[i].ID < docs[j].ID
})
d := docs[0]

return &pkg.Package{
Name: fmt.Sprintf("%s:%s", d.GroupID, d.ArtifactID),
Version: d.Version,
Language: syftPkg.Java,
Metadata: pkg.JavaMetadata{
PomArtifactID: d.ArtifactID,
PomGroupID: d.GroupID,
},
}, nil
}
Loading