Skip to content

Commit 69f7bf5

Browse files
authored
Merge pull request #128 from OscarScholten/master
Connection error when getting robots.txt leads to runtime error
2 parents 3439a67 + 8a6bf23 commit 69f7bf5

File tree

2 files changed

+17
-5
lines changed

2 files changed

+17
-5
lines changed

colly.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -565,17 +565,16 @@ func (c *Collector) isDomainAllowed(domain string) bool {
565565
}
566566

567567
func (c *Collector) checkRobots(u *url.URL) error {
568-
// var robot *robotstxt.RobotsData
569-
// var ok bool
570-
var err error
571-
572568
c.lock.RLock()
573569
robot, ok := c.robotsMap[u.Host]
574570
c.lock.RUnlock()
575571

576572
if !ok {
577573
// no robots file cached
578-
resp, _ := c.backend.Client.Get(u.Scheme + "://" + u.Host + "/robots.txt")
574+
resp, err := c.backend.Client.Get(u.Scheme + "://" + u.Host + "/robots.txt")
575+
if err != nil {
576+
return err
577+
}
579578
robot, err = robotstxt.FromResponse(resp)
580579
if err != nil {
581580
return err

colly_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,19 @@ func TestIgnoreRobotsWhenDisallowed(t *testing.T) {
544544

545545
}
546546

547+
func TestConnectionErrorOnRobotsTxtResultsInError(t *testing.T) {
548+
ts := newTestServer()
549+
ts.Close() // immediately close the server to force a connection error
550+
551+
c := NewCollector()
552+
c.IgnoreRobotsTxt = false
553+
err := c.Visit(ts.URL)
554+
555+
if err == nil {
556+
t.Fatal("Error expected")
557+
}
558+
}
559+
547560
func TestEnvSettings(t *testing.T) {
548561
ts := newTestServer()
549562
defer ts.Close()

0 commit comments

Comments
 (0)