In [2]:
import requests
from bs4 import BeautifulSoup
import re
import math
import itertools
import json
import urllib
from pandas.io.json import json_normalize
import pandas as pd
import time


# Main URL - We have to get accessvariable hash code to proof we are human
# Page URL is not important, we use random page to get hash.
main_url = 'https://resistancemap.cddep.org/AntibioticResistance.php'

# Data URL - Send parameteres with hash code
data_url = 'https://resistancemap.cddep.org/getData.php'

# Years
years = [2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,-99]

# Add all IDs to get whole data
parameters = {
    # All Countries
    'countryId[]': [
        39, 13, 35, 65, 5, 38, 17, 33, 97, 83, 73, 37, 44, 43, 34, 32, 31, 42, 30, 29, 28, 27, 23, 25, 22, 21, 18, 19, 15, 14, 12, 11, 10, 8, 9, 4, 6, 98, 2, 3, 36, 93, 1
    ],  
    'charttype':'chart',
    'defaultValues':'true',
    'page':'DRI',
}


def get_source(url):
    source = requests.get(url)
    soup = BeautifulSoup(source.content, 'html.parser')
    return soup


def get_access_variable(url):
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')
    scripts = soup.findAll('script')
    input = soup.findAll('script', type="text/javascript")[2]
    accessvariable = re.search(r'accessvariable: "(.*)"', str(input))[1]
    return str(accessvariable)


def save_xlsx(data, excel_name):
    writer = pd.ExcelWriter(excel_name + '.xlsx', engine='xlsxwriter')
    df.to_excel(writer,
                sheet_name='Sheet1',
                index=False,
                startrow=1,
                header=False)
    worksheet = writer.sheets['Sheet1']
    workbook = writer.book
    header_format = workbook.add_format({
        'bold': True,
        'valign': 'top',
        #'fg_color': '#ffcccc',
        'border': 0
    })
    worksheet.set_row(0, None, header_format)
    for col_num, value in enumerate(data.columns.values):
        worksheet.write(0, col_num, value, header_format)
    writer.save()


# Hash Code
parameters["accessvariable"] = get_access_variable(main_url)
parameters["year"] = years[0]

# Request and get Json
r = requests.post(data_url, data=parameters).json()
#json.dumps(r)

# Get All Json files by year
dataframes = []
for y in years:
    parameters["year"] = y
    r = requests.post(data_url, data=parameters).json()
    dataframes.append( pd.DataFrame(r[1]) )
    time.sleep(2)

# Concat all years
all = pd.concat(dataframes,ignore_index=True)

# write to excel
all.to_excel("DrugResistance.xlsx", index=False, header=True)