In [1]:
# Dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import requests
import pandas as pd
import numpy
import os
import glob

#Oklahoma Latitude and Longitude Limits
minlatitude = 33.863,
maxlatitude = 37.004,
minlongitude = -99.976,
maxlongitude = -94.559

url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
payload = {
    "format" : "geojson",
    "minlatitude" : minlatitude,
    "maxlatitude" : maxlatitude,
    "minlongitude" : minlongitude,
    "maxlongitude" : maxlongitude,
    "starttime" : "2010-01-01",
    "endtime" : "2020-01-01",
    "minmagnitude" : 2
    
}

In [2]:
new = requests.get(url, payload)

In [3]:
new.json()

{'type': 'FeatureCollection',
 'metadata': {'generated': 1578455583000,
  'url': 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&minlatitude=33.863&maxlatitude=37.004&minlongitude=-99.976&maxlongitude=-94.559&starttime=2010-01-01&endtime=2020-01-01&minmagnitude=2',
  'title': 'USGS Earthquakes',
  'status': 200,
  'api': '1.8.1',
  'count': 10626},
 'features': [{'type': 'Feature',
   'properties': {'mag': 2.04,
    'place': '18km SE of Helena, Oklahoma',
    'time': 1577693779710,
    'updated': 1577982621212,
    'tz': -360,
    'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/ok2019znvz',
    'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ok2019znvz&format=geojson',
    'felt': None,
    'cdi': None,
    'mmi': None,
    'alert': None,
    'status': 'reviewed',
    'tsunami': 0,
    'sig': 64,
    'net': 'ok',
    'code': '2019znvz',
    'ids': ',ok2019znvz,',
    'sources': ',ok,',
    'types': ',geoserve,origin,phase-data,',
    'nst'

In [7]:
# List out the files needed using the glob module
excel_data = glob.glob("Injection_Volumes_Data/*.xlsx")
excel_data

['Injection_Volumes_Data/2015UICvolumes.xlsx',
 'Injection_Volumes_Data/2013UICvolumes.xlsx',
 'Injection_Volumes_Data/2014UICvolumes.xlsx',
 'Injection_Volumes_Data/2016UICvolumes.xlsx',
 'Injection_Volumes_Data/2018UICvolumes.xlsx',
 'Injection_Volumes_Data/2017UICvolumes.xlsx']

In [8]:
# Import all the files and combine them into one dataframe
all_data_df = pd.DataFrame()
for f in excel_data:
    df = pd.read_excel(f)
    all_data_df = all_data_df.append(df,ignore_index=True, sort=False)

In [9]:
all_data_df.head()

Unnamed: 0,API,OperatorName,WellName,WellNumber,Lat_Y,Long_X,CountyName,TotalDepth,Jan Vol,Feb Vol,Mar Vol,Apr Vol,May Vol,Jun Vol,Jul Vol,Aug Vol,Sep Vol,Oct Vol,Nov Vol,Dec Vol
0,3500300026,PHOENIX PETROCORP INC,SE EUREKA UNIT-TUCKER #1,21,36.900324,-98.21826,ALFALFA,5012.0,465.0,420.0,465.0,450.0,465.0,450.0,465.0,465.0,450.0,465.0,411.0,404.0
1,3500300163,CHAMPLIN EXPLORATION INC,CHRISTENSEN,1,36.896636,-98.17772,ALFALFA,5060.0,1063.0,779.0,1063.0,1063.0,1063.0,1063.0,1063.0,1063.0,1063.0,1063.0,1063.0,992.0
2,3500320145,CONTINENTAL RESOURCES INC,SINGREE,1,36.504849,-98.43324,ALFALFA,6700.0,4804.0,2111.0,3789.0,3444.0,360.0,1460.0,1342.0,1586.0,1546.0,300.0,0.0,0.0
3,3500320145,CONTINENTAL RESOURCES INC,SINGREE,1,36.504849,-98.43324,ALFALFA,6700.0,4804.0,2111.0,3789.0,3444.0,360.0,1460.0,1342.0,1586.0,1546.0,300.0,0.0,0.0
4,3500320786,LINN OPERATING LLC,NE CHEROKEE UNIT,85,36.806113,-98.32584,ALFALFA,5247.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
#Get total volumes per month per WellName
total_volume_per_month = all_data_df.groupby(["WellName", "Lat_Y", "Long_X", "CountyName", "TotalDepth"])["Jan Vol", "Feb Vol", "Mar Vol", 
                                                           "Apr Vol", "May Vol", "Jun Vol", 
                                                           "Jul Vol", "Aug Vol", "Sep Vol", 
                                                           "Oct Vol", "Nov Vol", "Dec Vol"].sum()

In [11]:
total_volume_per_month

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Jan Vol,Feb Vol,Mar Vol,Apr Vol,May Vol,Jun Vol,Jul Vol,Aug Vol,Sep Vol,Oct Vol,Nov Vol,Dec Vol
WellName,Lat_Y,Long_X,CountyName,TotalDepth,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
CALDWELL-LIONEL HARRIS,36.074014,-96.766570,PAYNE,3920.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0
MARTHA HOLATHA AKA WACOHO,35.990140,-96.024770,TULSA,0.0,5300.0,3396.0,4172.0,3864.0,4237.0,3917.0,10167.0,4095.0,3694.0,3894.0,3770.0,2947.0
ROBINSON (NO MIT NEEDED),35.798333,-95.822111,OKMULGEE,1030.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SEBU 1 (SARAH ADDISON 1),34.821354,-97.683180,GRADY,10473.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7593.0,8321.0,8075.0,8456.0,4998.0
SW VELMA SIMMS UNITUT#84446,34.428090,-97.716180,STEPHENS,6603.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZOLLARS,36.506905,-97.505177,GARFIELD,5200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZOLLARS,36.507187,-97.504170,GARFIELD,5200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZOMA TANNER,36.768306,-95.551167,NOWATA,683.0,32306.0,19664.0,21425.0,27846.0,32644.0,26609.0,32732.0,31346.0,35120.0,32423.0,33934.0,16548.0
ZOMA TANNER,36.769111,-95.551333,NOWATA,697.0,34924.0,27144.0,32961.0,34365.0,37775.0,38526.0,35534.0,38124.0,44359.0,44837.0,28907.0,25676.0


In [61]:
#Convert to dataframe 
total_volume_per_month_df = pd.DataFrame(total_volume_per_month).reset_index()
total_volume_per_month_df

Unnamed: 0,WellName,Lat_Y,Long_X,CountyName,TotalDepth,Jan Vol,Feb Vol,Mar Vol,Apr Vol,May Vol,Jun Vol,Jul Vol,Aug Vol,Sep Vol,Oct Vol,Nov Vol,Dec Vol
0,CALDWELL-LIONEL HARRIS,36.074014,-96.766570,PAYNE,3920.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0,5460.0
1,MARTHA HOLATHA AKA WACOHO,35.990140,-96.024770,TULSA,0.0,5300.0,3396.0,4172.0,3864.0,4237.0,3917.0,10167.0,4095.0,3694.0,3894.0,3770.0,2947.0
2,ROBINSON (NO MIT NEEDED),35.798333,-95.822111,OKMULGEE,1030.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,SEBU 1 (SARAH ADDISON 1),34.821354,-97.683180,GRADY,10473.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7593.0,8321.0,8075.0,8456.0,4998.0
4,SW VELMA SIMMS UNITUT#84446,34.428090,-97.716180,STEPHENS,6603.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14208,ZOLLARS,36.506905,-97.505177,GARFIELD,5200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14209,ZOLLARS,36.507187,-97.504170,GARFIELD,5200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14210,ZOMA TANNER,36.768306,-95.551167,NOWATA,683.0,32306.0,19664.0,21425.0,27846.0,32644.0,26609.0,32732.0,31346.0,35120.0,32423.0,33934.0,16548.0
14211,ZOMA TANNER,36.769111,-95.551333,NOWATA,697.0,34924.0,27144.0,32961.0,34365.0,37775.0,38526.0,35534.0,38124.0,44359.0,44837.0,28907.0,25676.0


In [59]:
#Delete data with zero volume (pick non-zero values which turns all zero values into nan and remove nan-values)
total_volume_per_month_nozero_df = total_volume_per_month_df[total_volume_per_month_df!=0].dropna()

In [60]:
total_volume_per_month_nozero_df

Unnamed: 0,WellName,Lat_Y,Long_X,CountyName,TotalDepth,Jan Vol,Feb Vol,Mar Vol,Apr Vol,May Vol,Jun Vol,Jul Vol,Aug Vol,Sep Vol,Oct Vol,Nov Vol,Dec Vol
0,CALDWELL-LIONEL HARRIS,36.074014,-96.766570,PAYNE,3920.0,5460.0,5460.00,5460.0,5460.00,5460.0,5460.00,5460.00,5460.0,5460.00,5460.0,5460.00,5460.0
6,(BEAN M#3) PHILLIPS,36.586872,-95.471970,ROGERS,428.0,800.0,240.25,132.5,110.25,111.5,397.75,404.75,465.5,261.25,40.5,74.25,112.0
7,(TDU) CD,34.509226,-97.533300,GARVIN,2866.0,155324.0,147081.00,158163.0,152159.00,155914.0,146281.00,157561.00,159548.0,155311.00,160369.0,156017.00,157821.0
8,(TDU) RI,34.508796,-97.527780,GARVIN,2995.0,166864.0,153156.00,166467.0,162559.00,165507.0,153073.00,161085.00,164530.0,158182.00,163275.0,160029.00,168990.0
9,(TDU) SH,34.512399,-97.543700,GARVIN,3760.0,219550.0,203017.00,222549.0,218057.00,213159.0,211810.00,222669.00,223606.0,216100.00,226540.0,218049.00,226425.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14206,ZOLDOSKE,36.421100,-97.364211,NOBLE,2364.0,5700.0,7500.00,9300.0,8400.00,4950.0,7350.00,8100.00,6450.0,4650.00,8250.0,6000.00,600.0
14207,ZOLLAR,35.711794,-96.625730,LINCOLN,4300.0,96019.0,85946.00,93886.0,92788.00,81547.0,78709.00,98854.00,92828.0,63743.00,58482.0,58618.00,55655.0
14210,ZOMA TANNER,36.768306,-95.551167,NOWATA,683.0,32306.0,19664.00,21425.0,27846.00,32644.0,26609.00,32732.00,31346.0,35120.00,32423.0,33934.00,16548.0
14211,ZOMA TANNER,36.769111,-95.551333,NOWATA,697.0,34924.0,27144.00,32961.0,34365.00,37775.0,38526.00,35534.00,38124.0,44359.00,44837.0,28907.00,25676.0
