# MacroX
## Fetch Air-Pollution Data - India
---
The below code fetches **Air Pollution** data of Indian Cities from official government source. [CPC](https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing/data).
Installations required prior running the code :-
1. Python 3.8
2. Selenium installed [`pip install selenium`]
3. Pandas installed [`pip install pandas`]
4. Latest geckodriver [version - 0.31.0, [GeckoLink](https://github.com/mozilla/geckodriver/releases)]
5. Latest version of Mozilla FireFox [FireFox](https://www.mozilla.org/en-US/firefox/)

## Code Flow

### Install Dependencies

In [1]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.common.by import By

In [2]:
df = pd.read_excel("India_cities.xlsx")
df = df.iloc[3:7,:]
df

Unnamed: 0,STATE,CITY,NO2,SO2,PM2.5,CO,Stations,Comments,Single File,Two Files,Unnamed: 10
3,Karnataka,Bengaluru,Y,Y,Y,Y,10,,Y,,Rui
4,Tamil Nadu,Chennai,Y,Y,Y,,9,,Y,,
5,Maharashtra,Pune,Y,Y,Y,,7,SO2 Missing from Official Source,,,
6,West Bengal,Kolkata,Y,Y,Y,,6,,Y,,


### Fetch List of available stations for a given city



In [3]:
PATH = r'C:\Users\Rui\Desktop\Capstone_MacroX\geckodriver.exe' #Provide the path of geckodriver.exe
downloadDir= r"C:\Users\Rui\Desktop\Capstone_MacroX\Gov_data" #Change the download directory

def get_station(state, city):
    options = FirefoxOptions()
    options.add_argument("window-size=2400,1400")
    options.set_preference("dom.push.enabled", False)

    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'

    fp = webdriver.FirefoxProfile()
    fp.set_preference("browser.download.folderList", 2)
    fp.set_preference("browser.download.manager.showWhenStarting", False)
    fp.set_preference("browser.download.dir", downloadDir)
    fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml")
    fp.set_preference("general.useragent.override", user_agent)
    fp.set_preference("security.insecure_field_warning.contextual.enabled", False)
    fp.set_preference("browser.cache.disk.enable", False)
    fp.set_preference("browser.cache.memory.enable", False)
    fp.set_preference("browser.cache.offline.enable", False)
    fp.set_preference("network.http.use-cache", False)

    #Opens firefox with link mentioned in get
    driver = webdriver.Firefox(executable_path=PATH, options = options, firefox_profile=fp)
    driver.delete_all_cookies()
    driver.get('https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing/data')
    time.sleep(15)

    l = driver.find_element("xpath","//*[contains(text(), 'Select ...')]")
    l.click()
    m = driver.find_element("xpath","//*[contains(text(), '" + state + "')]") #Select the State for ex: Maharashtra
    m.click()

    time.sleep(3)

    l = driver.find_element("xpath","//*[contains(text(), 'Select ...')]")
    l.click()

    m = driver.find_element("xpath","//*[contains(text(), '" + city + "')]") #Select City for ex: Mumbai
    m.click()

    time.sleep(3)

    l = driver.find_element("xpath","//*[contains(text(), 'Select ...')]")
    l.click()

    all_li = driver.find_elements("tag name","li")
    lis = []
    #Gives a list of all stations in the City
    for li in all_li:
        text = li.text
        print(text)
        lis.append(text)

    driver.quit()
    return lis

In [11]:
station = {}
for i in range(len(df)):
    state = df.STATE.iloc[i]
    city = df.CITY.iloc[i]
    station[state + ' ' + city] = get_station(state, city)

  fp = webdriver.FirefoxProfile()
  driver = webdriver.Firefox(executable_path=PATH, options = options, firefox_profile=fp)
  driver = webdriver.Firefox(executable_path=PATH, options = options, firefox_profile=fp)


BTM Layout, Bengaluru - CPCB
BWSSB Kadabesanahalli, Bengaluru - CPCB
Bapuji Nagar, Bengaluru - KSPCB
City Railway Station, Bengaluru - KSPCB
Hebbal, Bengaluru - KSPCB
Hombegowda Nagar, Bengaluru - KSPCB
Jayanagar 5th Block, Bengaluru - KSPCB
Peenya, Bengaluru - CPCB
Sanegurava Halli, Bengaluru - KSPCB
Silk Board, Bengaluru - KSPCB
Alandur Bus Depot, Chennai - CPCB
Arumbakkam, Chennai - TNPCB
Gandhi Nagar_Ennore, Chennai - TNPCB
Kodungaiyur, Chennai - TNPCB
Manali Village, Chennai - TNPCB
Manali, Chennai - CPCB
Perungudi, Chennai - TNPCB
Royapuram, Chennai - TNPCB
Velachery Res. Area, Chennai - CPCB
Alandi, Pune - IITM
Bhosari, Pune - IITM
Hadapsar, Pune - IITM
Karve Road, Pune - MPCB
MIT-Kothrud, Pune - IITM
Mhada Colony, Pune - IITM
Revenue Colony-Shivajinagar, Pune - IITM
Transport Nagar-Nigdi, Pune - IITM
Ballygunge, Kolkata - WBPCB
Bidhannagar, Kolkata - WBPCB
Fort William, Kolkata - WBPCB
Jadavpur, Kolkata - WBPCB
Rabindra Bharati University, Kolkata - WBPCB
Rabindra Sarobar, Kolk

In [4]:
import json
f = open('station.json')
station = json.load(f)

In [5]:
import os

### Iterate over the list fetched Above

In [7]:
#Iterate over the Cities found in the above code
for i in range(len(df)):
    state = df.STATE.iloc[i]
    city = df.CITY.iloc[i]
    for li in station[state + ' ' + city]:
        print(li)
        text = li
        #PATH = r'C:\Users\zhxum\OneDrive - University of Rochester\UR\Fall 2022\DSCC 383\AirvsEconomy\geckodriver.exe' #Provide the path of geckodriver.exe
        #downloadDir= r"C:\Users\zhxum\OneDrive - University of Rochester\UR\Fall 2022\DSCC 383\Capstone_MacroX\Gases_Official_Data" #Change the download directory - Provide the City name


        options = FirefoxOptions()
        #options.add_argument("--headless")
        options.add_argument("window-size=2400,1400")
        options.set_preference("dom.push.enabled", False)

        user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'

        fp = webdriver.FirefoxProfile()
        fp.set_preference("browser.download.folderList", 2)
        fp.set_preference("browser.download.manager.showWhenStarting", False)
        fp.set_preference("browser.download.dir", downloadDir)
        fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml")
        fp.set_preference("general.useragent.override", user_agent)
        fp.set_preference("security.insecure_field_warning.contextual.enabled", False)
        fp.set_preference("browser.cache.disk.enable", False)
        fp.set_preference("browser.cache.memory.enable", False)
        fp.set_preference("browser.cache.offline.enable", False)
        fp.set_preference("network.http.use-cache", False)

        #Opens firefox with link mentioned in get
        driver = webdriver.Firefox(executable_path=PATH, options = options, firefox_profile=fp)
        driver.delete_all_cookies()
        driver.get('https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing/data')
        while(True):
            try:
                l = driver.find_element("xpath","//*[contains(text(), 'Select ...')]")
                l.click()
                break
            except:
                time.sleep(5)

        m = driver.find_element("xpath","//*[contains(text(), '" + state + "')]") #Select the State for ex: Maharashtra
        m.click()

        l = driver.find_element("xpath","//*[contains(text(), 'Select ...')]")
        l.click()
        m = driver.find_element("xpath","//*[contains(text(), '" + city + "')]") #Select City for ex: Mumbai
        m.click()

        l = driver.find_element("xpath","//*[contains(text(), 'Select ...')]")
        l.click()
        
        m = driver.find_element("xpath","//*[contains(text(),'" + text + "')]")
        m.click()

        l = driver.find_element("xpath","//*[contains(text(), 'Select Parameter')]")
        l.click()
        
        #SELECT THE POLLUTANTs YOU WANT
        #CO cuurently not available through code
        try:
            m = driver.find_element("xpath","//*[contains(text(), 'PM2.5')]")
            m.click()
        except:
            pass

        try:
            m = driver.find_element("xpath","//*[contains(text(), 'SO2')]")
            m.click()
        except:
            pass

        try:
            m = driver.find_element("xpath","//*[contains(text(), 'NO2')]")
            m.click()
        except:
            pass

        try:
            m = driver.find_element("xpath", "/html/body/app-root/app-caaqm-dashboard/div[1]/div/main/section/app-caaqm-view-data/div/div/div[2]/div[2]/div/div/multi-select/angular2-multiselect/div/div[2]/div[2]//*[contains(text(), 'CO')]")
            m.click()
        except:
            pass


        l = driver.find_element(By.CLASS_NAME,'wc-date-container')
        l.click()

        m = driver.find_element(By.CLASS_NAME,'month-year')
        m.click()

        n = driver.find_element(By.ID,'JAN')
        n.click()

        o = driver.find_element(By.CLASS_NAME,'year-dropdown')
        o.click()

        n = driver.find_element(By.ID,'2018')
        n.click()

        l = driver.find_element(By.CLASS_NAME,"calendar-day")
        l.click()

        m = driver.find_element("xpath","//*[contains(text(), 'Submit')]")
        m.click()

        while(True):
            try:
                m = driver.find_element("xpath",'//i[@class="fa fa-file-excel-o"]')
                m.click()
                break
            except:
                time.sleep(5)

        time.sleep(500) #Increase if the download is taking longer
        driver.quit()

BTM Layout, Bengaluru - CPCB


  fp = webdriver.FirefoxProfile()
  driver = webdriver.Firefox(executable_path=PATH, options = options, firefox_profile=fp)
  driver = webdriver.Firefox(executable_path=PATH, options = options, firefox_profile=fp)


BWSSB Kadabesanahalli, Bengaluru - CPCB
Bapuji Nagar, Bengaluru - KSPCB
City Railway Station, Bengaluru - KSPCB
Hebbal, Bengaluru - KSPCB
Hombegowda Nagar, Bengaluru - KSPCB
Jayanagar 5th Block, Bengaluru - KSPCB
Peenya, Bengaluru - CPCB
Sanegurava Halli, Bengaluru - KSPCB
Silk Board, Bengaluru - KSPCB
Alandur Bus Depot, Chennai - CPCB
Arumbakkam, Chennai - TNPCB
Gandhi Nagar_Ennore, Chennai - TNPCB
Kodungaiyur, Chennai - TNPCB
Manali Village, Chennai - TNPCB
Manali, Chennai - CPCB
Perungudi, Chennai - TNPCB
Royapuram, Chennai - TNPCB
Velachery Res. Area, Chennai - CPCB


In [6]:
dir_path = r'C:\Users\Rui\Desktop\Capstone_MacroX\Gov_data'
print(len([entry for entry in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, entry))]))

2
