# urllib.request

### Task 1 - Get my Public IP address

In [1]:
import urllib.request

In [2]:
connection = urllib.request.urlopen('http://checkip.dyndns.org')

In [3]:
print(f'''
    {connection.url    = }
    {connection.status = }
    {connection.reason = }
    {connection.msg    = }
    {connection.code   = }
''')


    connection.url    = 'http://checkip.dyndns.org'
    connection.status = 200
    connection.reason = 'OK'
    connection.msg    = 'OK'
    connection.code   = 200



In [5]:
type(data)

bytes

In [6]:
data = data.decode('utf-8')
print(type(data))

<class 'str'>


In [7]:
data

'<html><head><title>Current IP Check</title></head><body>Current IP Address: 103.41.99.225</body></html>\r\n'

#### Parsing the html 
    Method 1 - bruteforce

In [8]:
data.split('Current IP Address: ')[1]

'103.41.99.227</body></html>\r\n'

In [9]:
my_public_ip = data.split('Current IP Address: ')[1].split('</body>')[0]
print("My Public IP Address is: ", my_public_ip)

My Public IP Address is:  103.41.99.227


Method 2 - Using regular Expressions

In [10]:
import re

In [11]:
my_public_ip = re.findall(r"\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}", data)[0]
print("My Public IP Address is: ", my_public_ip)

My Public IP Address is:  103.41.99.227


### Task 2 - Get all valid weblinks from a website

In [12]:
import urllib.request

In [13]:
connection = urllib.request.urlopen('https://www.stackoverflow.com')

In [14]:
print(f'''
    {connection.url    = }
    {connection.status = }
    {connection.reason = }
    {connection.msg    = }
    {connection.code   = }
''')


    connection.url    = 'https://stackoverflow.com/'
    connection.status = 200
    connection.reason = 'OK'
    connection.msg    = 'OK'
    connection.code   = 200



In [15]:
data = connection.read()

In [16]:
data



In [17]:
with open('stackoverflow.html', 'wb') as f:
    f.write(data)
    f.close()

### Parsing html data

In [18]:
import lxml.html
dom = lxml.html.fromstring(data)

In [19]:
dom  # Document Object Model, in html

<Element html at 0x1f785789680>

#### select the url in href for all a tags(links)

In [20]:
for link in dom.xpath('//a/@href'):
    if str(link).startswith('https'):
        print(link)

https://stackoverflow.com
https://stackoverflow.com/talent
https://stackoverflow.com/advertising
https://stackoverflow.com/users/login?ssrc=head&returnurl=https%3a%2f%2fstackoverflow.com%2f
https://stackoverflow.com/users/signup?ssrc=head&returnurl=%2fusers%2fstory%2fcurrent
https://stackoverflow.com
https://stackoverflow.com
https://stackoverflow.com/help
https://chat.stackoverflow.com
https://meta.stackoverflow.com
https://stackoverflow.com/users/signup?ssrc=site_switcher&returnurl=%2fusers%2fstory%2fcurrent
https://stackoverflow.com/users/login?ssrc=site_switcher&returnurl=https%3a%2f%2fstackoverflow.com%2f
https://stackexchange.com/sites
https://stackoverflow.blog
https://stackoverflow.com/legal/cookie-policy
https://stackoverflow.com/legal/privacy-policy
https://stackoverflow.com/legal/terms-of-service/public
https://stackoverflow.com/teams
https://stackoverflow.com/teams
https://stackoverflow.com/talent
https://stackoverflow.com/advertising
https://www.g2.com/products/stack-overf

### Task 3 - Get Image data

In [21]:
URL = 'https://http.cat/200'
connection = urllib.request.Request(URL)

In [48]:
print(f'''
    {connection.unverifiable      =}
    {connection.type              =}
    {connection.full_url          =}
    {connection.host              =}
    {connection.origin_req_host   =}
    
    {connection.get_method()      =}
    {connection.headers           =}
    {connection.header_items()    =}
    {connection.unredirected_hdrs =}
''')


    connection.unverifiable      =False
    connection.type              ='https'
    connection.full_url          ='https://http.cat/200'
    connection.host              ='http.cat'
    connection.origin_req_host   ='http.cat'
    
    connection.get_method()      ='GET'
    connection.headers           ={}
    connection.header_items()    =[]
    connection.unredirected_hdrs ={}



In [49]:
connection.data

In [64]:
URL = 'https://http.cat/200.jpg'
connection = urllib.request.urlopen(URL)

In [65]:
print(f'''
    {connection.url    = }
    {connection.status = }
    {connection.reason = }
    {connection.msg    = }
    {connection.code   = }
''')


    connection.url    = 'https://http.cat/200.jpg'
    connection.status = 200
    connection.reason = 'OK'
    connection.msg    = 'OK'
    connection.code   = 200



In [67]:
data = connection.read()

In [68]:
print(dir(connection))

['__abstractmethods__', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '_abc_impl', '_checkClosed', '_checkReadable', '_checkSeekable', '_checkWritable', '_check_close', '_close_conn', '_get_chunk_left', '_method', '_peek_chunked', '_read1_chunked', '_read_and_discard_trailer', '_read_next_chunk_size', '_read_status', '_readall_chunked', '_readinto_chunked', '_safe_read', '_safe_readinto', 'begin', 'chunk_left', 'chunked', 'close', 'closed', 'code', 'debuglevel', 'detach', 'fileno', 'flush', 'fp', 'getcode', 'getheader', 'getheaders', 'geturl', 'headers', 'info', 'isatty', 'isclosed', 'length', 'msg', 'peek', 'read', 'read1', 'readable', 'readinto', 

In [69]:
connection.headers

<http.client.HTTPMessage at 0x1f785859df0>

In [70]:
type(data)

bytes