Skip to content

Commit 5f66efb

Browse files
committed
scrape: support textarea, radio, and checkbox
improve form parsing to support textarea elements, as well as proper handling of radio and checkbox inputs (only include the value if the "checked" attribute is set). also allow nil setValue func for fetchAndSubmitForm. While I don't know of a case (yet) where we want to do this, there's no point in panicking in this case.
1 parent d913de9 commit 5f66efb

File tree

2 files changed

+58
-5
lines changed

2 files changed

+58
-5
lines changed

scrape/forms.go

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"fmt"
1414
"net/http"
1515
"net/url"
16+
"strings"
1617

1718
"github.com/PuerkitoBio/goquery"
1819
"golang.org/x/net/html"
@@ -29,8 +30,8 @@ type htmlForm struct {
2930
}
3031

3132
// parseForms parses and returns all form elements beneath node. Form values
32-
// include all nested input elements within the form (textarea is not currently
33-
// supported).
33+
// include all input and textarea elements within the form. The values of radio
34+
// and checkbox inputs are included only if they are checked.
3435
//
3536
// In the future, we might want to allow a custom selector to be passed in to
3637
// further restrict what forms will be returned.
@@ -47,10 +48,28 @@ func parseForms(node *html.Node) (forms []htmlForm) {
4748

4849
s.Find("input").Each(func(_ int, s *goquery.Selection) {
4950
name, _ := s.Attr("name")
51+
if name == "" {
52+
return
53+
}
54+
55+
typ, _ := s.Attr("type")
56+
typ = strings.ToLower(typ)
57+
_, checked := s.Attr("checked")
58+
if (typ == "radio" || typ == "checkbox") && !checked {
59+
return
60+
}
61+
5062
value, _ := s.Attr("value")
51-
if name != "" {
52-
form.Values.Add(name, value)
63+
form.Values.Add(name, value)
64+
})
65+
s.Find("textarea").Each(func(_ int, s *goquery.Selection) {
66+
name, _ := s.Attr("name")
67+
if name == "" {
68+
return
5369
}
70+
71+
value := s.Text()
72+
form.Values.Add(name, value)
5473
})
5574
forms = append(forms, form)
5675
})
@@ -87,7 +106,9 @@ func fetchAndSubmitForm(client *http.Client, urlStr string, setValues func(url.V
87106
actionURL = resp.Request.URL.ResolveReference(actionURL)
88107

89108
// allow caller to fill out the form
90-
setValues(form.Values)
109+
if setValues != nil {
110+
setValues(form.Values)
111+
}
91112

92113
resp, err = client.PostForm(actionURL.String(), form.Values)
93114
if err != nil {

scrape/forms_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,38 @@ func Test_ParseForms(t *testing.T) {
3535
{Action: "a2", Method: "m2", Values: url.Values{"n2": {"v2"}}},
3636
},
3737
},
38+
{
39+
"form with radio buttons (none checked)",
40+
`<html><form>
41+
<input type="radio" name="n1" value="v1">
42+
<input type="radio" name="n1" value="v2">
43+
<input type="radio" name="n1" value="v3">
44+
</form></html>`,
45+
[]htmlForm{{Values: url.Values{}}},
46+
},
47+
{
48+
"form with radio buttons",
49+
`<html><form>
50+
<input type="radio" name="n1" value="v1">
51+
<input type="radio" name="n1" value="v2">
52+
<input type="radio" name="n1" value="v3" checked>
53+
</form></html>`,
54+
[]htmlForm{{Values: url.Values{"n1": {"v3"}}}},
55+
},
56+
{
57+
"form with checkboxes",
58+
`<html><form>
59+
<input type="checkbox" name="n1" value="v1" checked>
60+
<input type="checkbox" name="n2" value="v2">
61+
<input type="checkbox" name="n3" value="v3" checked>
62+
</form></html>`,
63+
[]htmlForm{{Values: url.Values{"n1": {"v1"}, "n3": {"v3"}}}},
64+
},
65+
{
66+
"single form with textarea",
67+
`<html><form><textarea name="n1">v1</textarea></form></html>`,
68+
[]htmlForm{{Values: url.Values{"n1": {"v1"}}}},
69+
},
3870
}
3971

4072
for _, tt := range tests {

0 commit comments

Comments
 (0)