Skip to content

Commit e345db7

Browse files
committed
add selects and textareas in form extraction & submission tutorial
1 parent 52cb6d0 commit e345db7

File tree

2 files changed

+63
-5
lines changed

2 files changed

+63
-5
lines changed

web-scraping/extract-and-fill-forms/form_extractor.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ def get_form_details(form):
2121
including action, method and list of form controls (inputs, etc)"""
2222
details = {}
2323
# get the form action (requested URL)
24-
action = form.attrs.get("action").lower()
24+
action = form.attrs.get("action")
25+
if action:
26+
action = action.lower()
2527
# get the form method (POST, GET, DELETE, etc)
2628
# if not specified, GET is the default in HTML
2729
method = form.attrs.get("method", "get").lower()
@@ -36,6 +38,38 @@ def get_form_details(form):
3638
input_value =input_tag.attrs.get("value", "")
3739
# add everything to that list
3840
inputs.append({"type": input_type, "name": input_name, "value": input_value})
41+
for select in form.find_all("select"):
42+
# get the name attribute
43+
select_name = select.attrs.get("name")
44+
# set the type as select
45+
select_type = "select"
46+
select_options = []
47+
# the default select value
48+
select_default_value = ""
49+
# iterate over options and get the value of each
50+
for select_option in select.find_all("option"):
51+
# get the option value used to submit the form
52+
option_value = select_option.attrs.get("value")
53+
if option_value:
54+
select_options.append(option_value)
55+
if select_option.attrs.get("selected"):
56+
# if 'selected' attribute is set, set this option as default
57+
select_default_value = option_value
58+
if not select_default_value and select_options:
59+
# if the default is not set, and there are options, take the first option as default
60+
select_default_value = select_options[0]
61+
# add the select to the inputs list
62+
inputs.append({"type": select_type, "name": select_name, "values": select_options, "value": select_default_value})
63+
for textarea in form.find_all("textarea"):
64+
# get the name attribute
65+
textarea_name = textarea.attrs.get("name")
66+
# set the type as textarea
67+
textarea_type = "textarea"
68+
# get the textarea value
69+
textarea_value = textarea.attrs.get("value", "")
70+
# add the textarea to the inputs list
71+
inputs.append({"type": textarea_type, "name": textarea_name, "value": textarea_value})
72+
3973
# put everything to the resulting dictionary
4074
details["action"] = action
4175
details["method"] = method

web-scraping/extract-and-fill-forms/form_submitter.py

+28-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from bs4 import BeautifulSoup
2-
from requests_html import HTMLSession
32

43
from pprint import pprint
54
from urllib.parse import urljoin
@@ -10,25 +9,50 @@
109

1110
# get the URL from the command line
1211
url = sys.argv[1]
12+
all_forms = get_all_forms(url)
1313
# get the first form (edit this as you wish)
14-
first_form = get_all_forms(url)[0]
14+
# first_form = get_all_forms(url)[0]
15+
for i, f in enumerate(all_forms, start=1):
16+
form_details = get_form_details(f)
17+
print(f"{i} #")
18+
pprint(form_details)
19+
print("="*50)
20+
21+
choice = int(input("Enter form indice: "))
1522
# extract all form details
16-
form_details = get_form_details(first_form)
23+
form_details = get_form_details(all_forms[choice-1])
1724
pprint(form_details)
1825
# the data body we want to submit
1926
data = {}
2027
for input_tag in form_details["inputs"]:
2128
if input_tag["type"] == "hidden":
2229
# if it's hidden, use the default value
2330
data[input_tag["name"]] = input_tag["value"]
31+
elif input_tag["type"] == "select":
32+
for i, option in enumerate(input_tag["values"], start=1):
33+
# iterate over available select options
34+
if option == input_tag["value"]:
35+
print(f"{i} # {option} (default)")
36+
else:
37+
print(f"{i} # {option}")
38+
choice = input(f"Enter the option for the select field '{input_tag['name']}' (1-{i}): ")
39+
try:
40+
choice = int(choice)
41+
except:
42+
# choice invalid, take the default
43+
value = input_tag["value"]
44+
else:
45+
value = input_tag["values"][choice-1]
46+
data[input_tag["name"]] = value
2447
elif input_tag["type"] != "submit":
2548
# all others except submit, prompt the user to set it
2649
value = input(f"Enter the value of the field '{input_tag['name']}' (type: {input_tag['type']}): ")
2750
data[input_tag["name"]] = value
51+
2852

2953
# join the url with the action (form request URL)
3054
url = urljoin(url, form_details["action"])
31-
55+
# pprint(data)
3256
if form_details["method"] == "post":
3357
res = session.post(url, data=data)
3458
elif form_details["method"] == "get":

0 commit comments

Comments
 (0)