# Pandas
The Pandas library provides high-performance, easy-to-use data structures and analysis tools for tabular data

In [None]:
# Standard way to import the Pandas library
import pandas as pd

# Usually used together with NumPy and Matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Create a Pandas series from array
f1 = 1
x1 = np.linspace(0, 1, 11)
y1 = np.sin(2 * np.pi * f1 * x1)

# Create a series
signal1 = pd.Series(y1, name='y1')
print(signal1) 

In [None]:
# Use timedelta to the Series
signal1.index = pd.to_timedelta(x1, unit='s')
print(signal1)

## # Select data by index value

In [None]:
# Just like a dictionary
print(signal1['0.5s'])

# Or by index position--like an array
print(signal1[5])

## Resample data

In [None]:
# Up-sampling
signal2 = signal1.resample('50ms').interpolate(method='linear')
print(signal2)

In [None]:
# Down-sampling
signal3 = signal1.resample('200ms').asfreq()
print(signal3)

## Invalid/Missing data can be easily filled with values

In [None]:
# Create invalid data
signal1[1:3] = np.NaN
print(signal1)

In [None]:
# Now fill it in with zeros
signal1 = signal1.fillna(0)
# equivalently, 
# step_counts.fillna(0., inplace=True)

print(signal1)

## Pandas DataFrame

In [None]:
# Create DataFrame from NumPy arrays
f1, f2 = 0.5, 1
x = np.linspace(0, 1, 11)
y1 = np.sin(2 * np.pi * f1 * x)
y2 = np.sin(2 * np.pi * f2 * x)

print(y1)
print(y2)

In [None]:
# Create DataFrame
df = pd.DataFrame(data={'y1':y1, 'y2':y2})
print(df)
df.index = pd.to_timedelta(x, unit='s')
print(df)

## DataFrame rows can be indexed by row using the 'loc' and 'iloc' methods

In [None]:
# Select row of data by index name
print(df.loc['0.8s'])

In [None]:
# Select row of data by integer position
print(df.iloc[-3])

## DataFrame columns can be indexed by row using 'loc' and 'iloc' methods

In [None]:
# Dictionary approach
print(df['y2'])

In [None]:
# First column (Column 1)
print(df.iloc[:,1])

## Pandas has built-in methods that simplify creating visualizations from DataFrame and Series

In [None]:
# plot the Series
signal2.plot()
plt.show()

In [None]:
# plot the DataFrame
df.plot(marker='*', title='DataFrame Plot')
plt.show()

## CSV and other common filetypes can be read with a single command

In [None]:
# The location of the data file
filepath = 'data/iris_data.csv'

# Import the data
data = pd.read_csv(filepath)

# Print a few rows 
print(data.iloc[:5])

## Using the groupby method calculated aggregated DataFrame statistics.

In [None]:
# Use the size method with a 
# DataFrame to get count
# For a Series, use the .value_counts 
# method
group_sizes = (data.groupby('species').size())

print(group_sizes)

## Pandas contains a variety of statistical methods — mean, median, mode, standard deviation etc.

In [None]:
# Mean calculated on a DataFrame
print(data.mean())

In [None]:
# Median calculated on a Series
print(data.petal_length.median())

In [None]:
# Mode calculated on a Series
print(data.petal_length.mode())

In [None]:
# Standard dev, and variance on a series 
print(data.petal_length.std(), data.petal_length.var())

In [None]:
# Multiple statisccal calculations can be presented in a DataFrame
print(data.describe())

## Data can be assigned to a DataFrame column

In [None]:
# Create a new column that is a product 
# of both measurements
data['sepal_area'] = data.sepal_length * data.sepal_width

print(data)

## Two DataFrames can be concatenated along either dimension

In [None]:
# Concatenate the first two and the last two rows
first2 = data.iloc[:2]
last2 = data.iloc[-2:]
small_data = pd.concat([first2, last2])
print(small_data)