### Pandas

In [1]:
#### Introducing Series ####
import pandas as pd
py_list = [3, 8, 15, 25, 11]
series = pd.Series(py_list)
series

0     3
1     8
2    15
3    25
4    11
dtype: int64

In [2]:
#### Change Indices ####
indices = ['A', 'B', 'C', 'D', 'E']
series = pd.Series(py_list, index = indices)
series

A     3
B     8
C    15
D    25
E    11
dtype: int64

In [3]:
#### Introducing DataFrame ####
data = {'Name': ['Brian', 'George', 'Kate', 'Amy', 'Joe'], 'Age': [23, 41, 26, 19, 35]}
data_frame = pd.DataFrame(data)
data_frame

Unnamed: 0,Age,Name
0,23,Brian
1,41,George
2,26,Kate
3,19,Amy
4,35,Joe


In [4]:
#### Print by Column Name ####
data_frame['Age']

0    23
1    41
2    26
3    19
4    35
Name: Age, dtype: int64

In [5]:
#### Sort by Column ####
data_frame.sort(columns = 'Age')

  from ipykernel import kernelapp as app


Unnamed: 0,Age,Name
3,19,Amy
0,23,Brian
2,26,Kate
4,35,Joe
1,41,George


In [6]:
#### Describe Data Frame ####
data_frame.describe()

Unnamed: 0,Age
count,5.0
mean,28.8
std,9.011104
min,19.0
25%,23.0
50%,26.0
75%,35.0
max,41.0


In [7]:
#### Read in From CSV ####
sales = pd.read_csv('sales.csv')
print("the shape of sale is {}".format(sales.shape))
print("First three columns are {}".format(sales.columns[:3]))
print("Fist five rows are \n{}".format(sales['1995_COUNT_ALL_TYPES'].head()))

the shape of sale is (348, 97)
First three columns are Index(['LA_Code', 'LA_Name', '1995_COUNT_ALL_TYPES'], dtype='object')
Fist five rows are 
0    1,188
1    1,652
2    1,684
3    2,314
4    1,558
Name: 1995_COUNT_ALL_TYPES, dtype: object


### Scikit-Learn

In [8]:
#### Load Dataset ####
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
print("The shape of diabetes data is {}".format(diabetes.data.shape))
diabetes.data

The shape of diabetes data is (442, 10)


array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ..., 
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [9]:
#### Model Evaluation ####
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test  = train_test_split(diabetes.data, diabetes.target, random_state = 50)
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
LinearRegression(copy_X = True, fit_intercept = True, normalize = False)
lr.coef_

array([  80.73490856, -195.84197988,  474.68083473,  371.06688824,
       -952.26675602,  611.63783483,  174.40777144,  159.78382579,
        832.01569658,   12.04749505])

In [10]:
#### Prediction ####
y_pred = lr.predict(X_test)
y_pred[:10]

array([  71.96974998,   82.55916305,  265.71560021,   79.37396336,
         72.48674613,   47.01580194,  149.11263906,  185.36563936,
         94.88688296,  132.08984366])

In [11]:
#### R2 ####
lr.score(X_test, y_test)

0.48699089712593369

### NetCDF4

In [12]:
#### Read in Dataset ####
import netCDF4 as nc
dataset = nc.Dataset('sresa1b_ncar_ccsm3-example.nc', 'r')
variables = [var for var in dataset.variables]
variables

['area',
 'lat',
 'lat_bnds',
 'lon',
 'lon_bnds',
 'msk_rgn',
 'plev',
 'pr',
 'tas',
 'time',
 'time_bnds',
 'ua']

In [13]:
#### Attributes of netCDF4 ####
precipitation = dataset.variables['pr']
print("Standard Name: {}".format(precipitation.standard_name))
print("Missing Value: {}".format(precipitation.missing_value))
print("Number of Dimensions: {}".format(precipitation.ndim))
print("Shape of Precipitation: {}".format(precipitation.shape))
precipitation[:, 1, :10]

Standard Name: precipitation_flux
Missing Value: 1.0000000200408773e+20
Number of Dimensions: 3
Shape of Precipitation: (1, 128, 256)


array([[  8.50919207e-07,   8.01471970e-07,   7.74396426e-07,
          7.74230614e-07,   7.47181844e-07,   7.21426375e-07,
          7.19294349e-07,   6.99790974e-07,   6.83397502e-07,
          6.74683179e-07]], dtype=float32)

In [14]:
#### Create a netCDF4 File ####
import numpy as np
time = np.arange(10)
lat = 54 + np.random.randn(8)
lon = np.random.randn(6)
data = np.random.randn(480).reshape(10, 8, 6)
output = nc.Dataset('test_output.nc', 'w')
#### Create Dimensions: time, latitude, longitude ####
output.createDimension('time', 10)
output.createDimension('lat', 8)
output.createDimension('lon', 6)
#### Declare Variable for Time ####
time_var = output.createVariable('time', 'f4', ('time',))
time_var[:] = time
#### Declare Variable for Space_Y (Latitude)
lat_var = output.createVariable('lat', 'f4', ('lat',))
lat_var[:] = lat
#### Declare Variable for Space_X (Longitude)
lon_var = output.createVariable('lon', 'f4', ('lon',))
lon_var[:] = lon
#### Create Variable based on Declared Dimensions ####
var = output.createVariable('test', 'f8', ('time', 'lat', 'lon'))
var[:] = data
#### Specify Time Variable Detail ####
time_var.standard_name = 'Time'
time_var.units = 'days since 2015-01-01 00:00:00'
time_var.calendar = 'gregorian'

In [15]:
#### Close netCDF4 Dataset ####
output.close()

### SciPy

In [16]:
#### Load Image ####
from scipy.misc import imread, imsave, ascent
import matplotlib.pyplot as plt
image_data = ascent()
image_data.shape

(512, 512)

In [17]:
#### Add Noise ####
noise_img = image_data +  image_data.std() * np.random.random(image_data.shape)
imsave('noise_img.png', noise_img)
plt.imshow(noise_img)
plt.show()

In [18]:
#### Apply Gaussian Filter ####
from scipy import ndimage
gaussian_denoised = ndimage.gaussian_filter(noise_img, 3)
imsave('gaussian_denoised.png', gaussian_denoised )
plt.imshow(gaussian_denoised)
plt.show()

In [19]:
#### Apply Uniform Filter ####
uniform_denoised = ndimage.uniform_filter(noise_img)
imsave('uniform_denoised.png', uniform_denoised)
plt.imshow(gaussian_denoised)
plt.show()