@@ -587,13 +587,19 @@ func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ct
587587
588588 c .handleOnResponse (response )
589589
590- c .handleOnHTML (response )
590+ err = c .handleOnHTML (response )
591+ if err != nil {
592+ c .handleOnError (response , err , request , ctx )
593+ }
591594
592- c .handleOnXML (response )
595+ err = c .handleOnXML (response )
596+ if err != nil {
597+ c .handleOnError (response , err , request , ctx )
598+ }
593599
594600 c .handleOnScraped (response )
595601
596- return nil
602+ return err
597603}
598604
599605func (c * Collector ) requestCheck (u , method string , depth int , checkRevisit bool ) error {
@@ -912,13 +918,13 @@ func (c *Collector) handleOnResponse(r *Response) {
912918 }
913919}
914920
915- func (c * Collector ) handleOnHTML (resp * Response ) {
921+ func (c * Collector ) handleOnHTML (resp * Response ) error {
916922 if len (c .htmlCallbacks ) == 0 || ! strings .Contains (strings .ToLower (resp .Headers .Get ("Content-Type" )), "html" ) {
917- return
923+ return nil
918924 }
919925 doc , err := goquery .NewDocumentFromReader (bytes .NewBuffer (resp .Body ))
920926 if err != nil {
921- return
927+ return err
922928 }
923929 if href , found := doc .Find ("base[href]" ).Attr ("href" ); found {
924930 resp .Request .baseURL , _ = url .Parse (href )
@@ -937,21 +943,22 @@ func (c *Collector) handleOnHTML(resp *Response) {
937943 }
938944 })
939945 }
946+ return nil
940947}
941948
942- func (c * Collector ) handleOnXML (resp * Response ) {
949+ func (c * Collector ) handleOnXML (resp * Response ) error {
943950 if len (c .xmlCallbacks ) == 0 {
944- return
951+ return nil
945952 }
946953 contentType := strings .ToLower (resp .Headers .Get ("Content-Type" ))
947954 if ! strings .Contains (contentType , "html" ) && ! strings .Contains (contentType , "xml" ) {
948- return
955+ return nil
949956 }
950957
951958 if strings .Contains (contentType , "html" ) {
952959 doc , err := htmlquery .Parse (bytes .NewBuffer (resp .Body ))
953960 if err != nil {
954- return
961+ return err
955962 }
956963 if e := htmlquery .FindOne (doc , "//base/@href" ); e != nil {
957964 for _ , a := range e .Attr {
@@ -977,7 +984,7 @@ func (c *Collector) handleOnXML(resp *Response) {
977984 } else if strings .Contains (contentType , "xml" ) {
978985 doc , err := xmlquery .Parse (bytes .NewBuffer (resp .Body ))
979986 if err != nil {
980- return
987+ return err
981988 }
982989
983990 for _ , cc := range c .xmlCallbacks {
@@ -993,13 +1000,14 @@ func (c *Collector) handleOnXML(resp *Response) {
9931000 })
9941001 }
9951002 }
1003+ return nil
9961004}
9971005
9981006func (c * Collector ) handleOnError (response * Response , err error , request * Request , ctx * Context ) error {
9991007 if err == nil && (c .ParseHTTPErrorResponse || response .StatusCode < 203 ) {
10001008 return nil
10011009 }
1002- if err == nil {
1010+ if err == nil && response . StatusCode >= 203 {
10031011 err = errors .New (http .StatusText (response .StatusCode ))
10041012 }
10051013 if response == nil {
0 commit comments