In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import re
import psycopg2

In [2]:
earth_data = pd.read_csv('Mag6PlusEarthquakes_1900-2013.csv', parse_dates=True)

In [3]:
earth_data.describe()

Unnamed: 0,latitude,longitude,depth,mag,nst,gap,dmin,rms
count,8313.0,8313.0,8313.0,8312.0,8111.0,1098.0,164.0,1164.0
mean,4.95411,47.350161,76.855975,6.56487,245.151399,30.41867,4.264072,1.007775
std,29.898265,122.446592,134.029768,0.482917,204.853588,19.698842,4.018516,0.186605
min,-71.0,-179.99,0.0,6.0,0.0,8.7,0.017966,0.0
25%,-16.12,-72.051,15.0,6.2,63.0,17.9,1.47675,0.88
50%,-0.534,120.231,32.1,6.5,221.0,24.7,3.271137,0.99
75%,32.327,146.43,57.2,6.9,371.5,36.9,5.79,1.13
max,84.964,180.0,720.0,9.6,999.0,230.4,26.063,1.68


In [467]:
connection = psycopg2.connect(database='lab_4', user='alex', password='95qaz26plm')
cursor = connection.cursor()

![title](lab_4.png)

In [468]:
cursor.execute("DROP TABLE IF EXISTS net")
cursor.execute("DROP TABLE IF EXISTS places")
cursor.execute("DROP TABLE IF EXISTS locations")
cursor.execute("DROP TABLE IF EXISTS magnitude")
cursor.execute("DROP TABLE IF EXISTS stations")
cursor.execute("DROP TABLE IF EXISTS earthquake")

In [469]:
event_list = list(map(lambda i: (earth_data['place'][i]), xrange(len(earth_data['longitude']))))
print len(event_list) 
print len(set(event_list))

8313
755


In [470]:
cursor.execute("CREATE TABLE net("+
               "net_id SERIAL UNIQUE,"+
               "net varchar(15) PRIMARY KEY)")

In [471]:
cursor.execute("CREATE TABLE places("
               "place_id SERIAL PRIMARY KEY,"+
               "place varchar(255))")

### Earthquake may happen in same points, of course not so frequently, but its not good for database table. Column 'place' have 755 unique values. May be improved by creating event table. Now primary key is 'local_id', bad choise but it is give posipility to quick import data to database for some tests.

In [472]:
cursor.execute("CREATE TABLE locations("+
               "local_id SERIAL UNIQUE,"+
               "latitude real NOT NULL,"+
               "longitude real NOT NULL,"+
               "depth real,"+
               "place int4 REFERENCES places(place_id),"+
               "net int4 REFERENCES net(net_id),"+
               "PRIMARY KEY(local_id))")

In [473]:
cursor.execute("CREATE TABLE magnitude("+
               "mag_id SERIAL UNIQUE,"+
               "magtype varchar(5) PRIMARY KEY)")

In [474]:
cursor.execute("CREATE TABLE stations("+
               "station_id SERIAL PRIMARY KEY,"+
               "nst int4,"+
               "gap real,"+
               "dmin real,"+
               "rms real)")

In [475]:
cursor.execute("CREATE TABLE earthquake("+
               "id varchar(255) PRIMARY KEY,"+
               "time timestamp NOT NULL,"+
               "location int4 REFERENCES locations(local_id),"+
               "mag float NOT NULL,"+
               "magnitude int4 REFERENCES magnitude(mag_id),"+
               "receive_station int4 REFERENCES stations(station_id),"+
               "update_time timestamp NOT NULL)"
                )

In [476]:
for i in earth_data:
    print i

time
Date
Time
latitude
longitude
depth
mag
magType
nst
gap
dmin
rms
net
id
updated
place
type


In [477]:
mag_list = list(set(earth_data['magType']))
print mag_list

[nan, 'Mwc', 'mwp', 'mww', 'mwr', 'Mwb', 'Mt', 'mb', 'mw', 'mj', 'mwc', 'mwb', 'Mw', 'Mwp', 'uk', 'ms', 'Mww']


In [478]:
# insert data into magnitude table
for mag in mag_list:
    cursor.execute("INSERT INTO magnitude (magtype) VALUES (%s)", (mag,))

In [479]:
net_list = list(set(earth_data['net']))
print net_list

['official', 'nc', 'us', 'centennial', 'atlas', 'pde']


In [480]:
# insert data into net table
for net in net_list:
    cursor.execute("INSERT INTO net (net) VALUES (%s)", (net,))

### For insert data with key, python have great dictionary realization. And it is way to do complex key in database.

In [481]:
dict_maker = lambda data_list: {key:item for key,item in zip(data_list, range(1,len(data_list)+1))}

In [482]:
place_list = list(set(earth_data['place']))

In [483]:
for place in place_list:
    cursor.execute("INSERT INTO places (place) VALUES (%s)", (place,))

In [484]:
net_dict = dict_maker(net_list)
mag_dict = dict_maker(mag_list)
place_dict = dict_maker(place_list)

In [485]:
for i in range(len(earth_data['longitude'])):
    cursor.execute("INSERT INTO locations (latitude, longitude, depth, place, net) VALUES (%s, %s, %s, %s, %s)",
                  (earth_data['latitude'][i], earth_data['longitude'][i], earth_data['depth'][i], 
                   place_dict[earth_data['place'][i]], net_dict[earth_data['net'][i]],))

In [486]:
earth_data['nst'] = earth_data['nst'].replace(np.nan, 0)
earth_data['gap'] = earth_data['gap'].replace(np.nan, 0)
earth_data['dmin'] = earth_data['dmin'].replace(np.nan, 0)
earth_data['rms'] = earth_data['rms'].replace(np.nan, 0)

In [487]:
for i in range(len(earth_data['longitude'])):
        cursor.execute("INSERT INTO stations (nst, gap, dmin, rms) VALUES (%s, %s, %s, %s)", 
                       (earth_data['nst'][i],earth_data['gap'][i],earth_data['dmin'][i],earth_data['rms'][i]),)

In [488]:
for i in range(len(earth_data['longitude'])):
    print(earth_data['id'][i],earth_data['time'][i],place_dict[earth_data['place'][i]], 
           earth_data['mag'][i], mag_dict[earth_data['magType'][i]], i+1, earth_data['updated'][i])
    cursor.execute("INSERT INTO earthquake (id, time, location, mag, magnitude, receive_station, update_time) VALUES (%s, %s, %s, %s, %s, %s, %s)", 
                       (earth_data['id'][i],earth_data['time'][i],place_dict[earth_data['place'][i]], 
                        earth_data['mag'][i], mag_dict[earth_data['magType'][i]], i+1, earth_data['updated'][i]))

('usc000ndw9', '2014-03-17T05:11:34.980Z', 738, 6.2000000000000002, 4, 1, '2014-03-17T14:57:11.000Z')
('usc000ndnj', '2014-03-16T21:16:30.770Z', 52, 6.7000000000000002, 4, 2, '2014-03-17T18:43:29.249Z')
('usc000nd0c', '2014-03-15T23:51:30.610Z', 595, 6.2999999999999998, 3, 3, '2014-03-18T17:52:18.000Z')
('usc000ncbl', '2014-03-15T08:59:21.790Z', 109, 6.0999999999999996, 4, 4, '2014-03-16T05:34:44.387Z')
('usc000nabv', '2014-03-13T17:06:50.800Z', 583, 6.2999999999999998, 12, 5, '2014-03-17T23:24:32.491Z')
('usc000n8ez', '2014-03-11T22:03:11.450Z', 749, 6.0999999999999996, 4, 6, '2014-03-12T21:57:02.000Z')
('usc000n7bm', '2014-03-11T02:44:05.990Z', 8, 6.4000000000000004, 4, 7, '2014-03-12T00:57:32.000Z')
('nc72182046', '2014-03-10T05:18:13.400Z', 619, 6.7999999999999998, 13, 8, '2014-03-18T16:46:55.524Z')
('usb000n1ex', '2014-03-05T09:56:58.630Z', 49, 6.2999999999999998, 3, 9, '2014-03-05T17:59:33.305Z')
('usb000mzmn', '2014-03-02T20:11:22.640Z', 88, 6.5, 11, 10, '2014-03-06T09:06:38.195

In [489]:
connection.commit()
cursor.close()
connection.close()