In [70]:
import urllib.parse as urlparse
import urllib.request as urlreq
import glob
import sqlite3
import os
import http.cookiejar as cookielib
import json
import time
import lxml.html


LOGIN_EMAIL = 'b@gm.com'
LOGIN_PASSWORD = '1234567'
LOGIN_URL = 'http://example.webscraping.com/places/default/user/login'


def login_basic():
    """fails because not using formkey
    """
    data = {'email': LOGIN_EMAIL, 'password': LOGIN_PASSWORD}
    encoded_data = urlparse.urlencode(data).encode()
    request = urlreq.Request(LOGIN_URL, encoded_data)
    response = urlreq.urlopen(request)
    print(response.geturl())


def login_formkey():
    """fails because not using cookies to match formkey
    """
    html = urlreq.urlopen(LOGIN_URL).read()
    data = parse_form(html)
    data['email'] = LOGIN_EMAIL
    data['password'] = LOGIN_PASSWORD
    encoded_data = urlparse.urlencode(data).encode()
    request = urlreq.Request(LOGIN_URL, encoded_data)
    response = urlreq.urlopen(request)
    print (response.geturl())


def login_cookies():
    """working login
    """
    cj = cookielib.CookieJar()
    opener = urlreq.build_opener(urlreq.HTTPCookieProcessor(cj))
    html = opener.open(LOGIN_URL).read()
    data = parse_form(html)
    data['email'] = LOGIN_EMAIL
    data['password'] = LOGIN_PASSWORD
    encoded_data = urlparse.urlencode(data).encode()
    request = urlreq.Request(LOGIN_URL, encoded_data)
    response = opener.open(request)
    print (response.geturl())
    return opener


def login_firefox():
    """load cookies from firefox
    """
    session_filename = find_ff_sessions()
    cj = load_ff_sessions(session_filename)
    opener = urlreq.build_opener(urlreq.HTTPCookieProcessor(cj))
    html = opener.open(COUNTRY_URL).read()

    tree = lxml.html.fromstring(html)
    print (tree.cssselect('ul#navbar li a')[0].text_content())
    return opener


def parse_form(html):
    """extract all input properties from the form
    """
    tree = lxml.html.fromstring(html)
    data = {}
    for e in tree.cssselect('form input'):
        if e.get('name'):
            data[e.get('name')] = e.get('value')
    return data



def load_ff_sessions(session_filename):
    cj = cookielib.CookieJar()
    if os.path.exists(session_filename):  
        try: 
            json_data = json.loads(open(session_filename, 'rb').read())
        except ValueError as e:
            print ('Error parsing session JSON:', str(e))
        else:
            for window in json_data.get('windows', []):
                for cookie in window.get('cookies', []):
                    import pprint; pprint.pprint(cookie)
                    c = cookielib.Cookie(0, cookie.get('name', ''), cookie.get('value', ''), 
                        None, False, 
                        cookie.get('host', ''), cookie.get('host', '').startswith('.'), cookie.get('host', '').startswith('.'), 
                        cookie.get('path', ''), False,
                        False, str(int(time.time()) + 3600 * 24 * 7), False, 
                        None, None, {})
                    cj.set_cookie(c)
    else:
        print ('Session filename does not exist:', session_filename)
    return cj


def find_ff_sessions():
    paths = [
        '~/.mozilla/firefox/*.default',
        '~/Library/Application Support/Firefox/Profiles/*.default',
        '%APPDATA%/Roaming/Mozilla/Firefox/Profiles/*.default'
    ]
    for path in paths:
        filename = os.path.join(path, 'sessionstore.js')
        matches = glob.glob(os.path.expanduser(filename))
        if matches:
            return matches[0]

In [71]:
login_basic()

http://example.webscraping.com/places/default/user/login


In [72]:
login_formkey()

http://example.webscraping.com/places/default/user/login


In [73]:
opener = login_cookies()

http://example.webscraping.com/places/default/index


In [74]:
login_firefox()

TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

In [75]:
COUNTRY_URL = 'http://example.webscraping.com/places/default/edit/Afghanistan-1'

def edit_country():
    opener = login_cookies()
    country_html = opener.open(COUNTRY_URL).read()
    data = parse_form(country_html)
    import pprint; pprint.pprint(data)
    print('Population before: ' + data['population'])
    data['population'] = int(data['population']) + 1
    encoded_data = urlparse.urlencode(data).encode()
    request = urlreq.Request(COUNTRY_URL, encoded_data)
    response = opener.open(request)

    country_html = opener.open(COUNTRY_URL).read()
    data = parse_form(country_html)
    print('Population after:', data['population'])

In [76]:
edit_country()

http://example.webscraping.com/places/default/index
{'_formkey': '478274c2-7580-4fc0-b760-e5d15644df54',
 '_formname': 'places/2163421',
 'area': '647500.00',
 'capital': 'Kabul',
 'continent': 'AS',
 'country': 'Afghanistan',
 'currency_code': 'AFN',
 'currency_name': 'Afghani',
 'id': '2163421',
 'iso': 'AF',
 'languages': 'fa-AF,ps,uz-AF,tk',
 'neighbours': 'TM,CN,IR,TJ,PK,UZ',
 'phone': '93',
 'population': '29121286',
 'postal_code_format': '',
 'postal_code_regex': '',
 'tld': '.af'}
Population before: 29121286
Population after: 29121287


In [81]:
import bs4
import requests

def bs4_edit():
    """Use beautifulsoup4 to increment population
    """
    # login
    sess = requests.Session()
    soup = bs4.BeautifulSoup(sess.get(LOGIN_URL).content)
    form = soup.find('form')
    fields = form.findAll('input')
    print(fields)

    formdata = dict( (field.get('name'), field.get('value')) for field in fields)

    formdata['email'] = LOGIN_EMAIL
    formdata['password'] = LOGIN_PASSWORD

    print(formdata)
    posturl = urlparse.urljoin(LOGIN_URL, form['action'])
    print(posturl)

    resp = sess.post(posturl, data=formdata)
    print(resp.url) 

    # edit country
    soup = bs4.BeautifulSoup(sess.get(COUNTRY_URL).content)
    form = soup.find('form')
    fields = form.findAll('input')
    formdata = dict( (field.get('name'), field.get('value')) for field in fields)
    print('Population before:', formdata['population'])
    formdata['population'] = str(int(formdata['population']) + 1)
    print(formdata)
    posturl = urlparse.urljoin(COUNTRY_URL, form['action'])
    print(posturl)

    resp = sess.post(posturl, data=formdata)

    # check population increased
    soup = bs4.BeautifulSoup(sess.get(COUNTRY_URL).content)
    form = soup.find('form')
    fields = form.findAll('input')
    formdata = dict( (field.get('name'), field.get('value')) for field in fields)
    print('Population after:', formdata['population'])


In [82]:
bs4_edit()



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


[<input class="string" id="auth_user_email" name="email" type="text" value=""/>, <input class="password" id="auth_user_password" name="password" type="password" value=""/>, <input class="boolean" id="auth_user_remember_me" name="remember_me" type="checkbox" value="on"/>, <input type="submit" value="Log In"/>, <input name="_next" type="hidden" value="/places/default/index"/>, <input name="_formkey" type="hidden" value="a5305586-f506-4736-9289-78b04883eacd"/>, <input name="_formname" type="hidden" value="login"/>]
{'email': 'b@gm.com', 'password': '1234567', 'remember_me': 'on', None: 'Log In', '_next': '/places/default/index', '_formkey': 'a5305586-f506-4736-9289-78b04883eacd', '_formname': 'login'}
http://example.webscraping.com/places/default/user/login
http://example.webscraping.com/places/default/index
Population before: 29121288
{'area': '647500.00', 'population': '29121289', 'iso': 'AF', 'country': 'Afghanistan', 'capital': 'Kabul', 'continent': 'AS', 'tld': '.af', 'currency_code'