In [1]:
import numpy as np
import pandas as pd

### Basic series; default integer index

In [2]:
my_series = pd.Series([1,2,3,np.nan,6])
my_series

0    1.0
1    2.0
2    3.0
3    NaN
4    6.0
dtype: float64

### datetime index

In [3]:
my_dates = pd.date_range('20180327',periods=6)
my_dates

DatetimeIndex(['2018-03-27', '2018-03-28', '2018-03-29', '2018-03-30',
               '2018-03-31', '2018-04-01'],
              dtype='datetime64[ns]', freq='D')

### sample NumPy data

In [7]:
sample_numpy_data = np.array(np.arange(24)).reshape(6,4)

In [8]:
sample_numpy_data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [6]:
### Sample data frame with column headers; uses our dates_index

In [9]:
sample_df = pd.DataFrame(sample_numpy_data,index=my_dates,columns=list('ABCD'))

In [10]:
sample_df

Unnamed: 0,A,B,C,D
2018-03-27,0,1,2,3
2018-03-28,4,5,6,7
2018-03-29,8,9,10,11
2018-03-30,12,13,14,15
2018-03-31,16,17,18,19
2018-04-01,20,21,22,23


In [16]:
df_from_dictionary = pd.DataFrame({
    'float' : 1.,
    'time' : pd.Timestamp('20160825'),
    'array' : np.arange(3),
    'dull':'boringdata'
})
df_from_dictionary

Unnamed: 0,array,dull,float,time
0,0,boringdata,1.0,2016-08-25
1,1,boringdata,1.0,2016-08-25
2,2,boringdata,1.0,2016-08-25


### pandas retains data typr for each columns

In [17]:
df_from_dictionary.dtypes

array             int32
dull             object
float           float64
time     datetime64[ns]
dtype: object

In [18]:
df_from_dictionary.head()

Unnamed: 0,array,dull,float,time
0,0,boringdata,1.0,2016-08-25
1,1,boringdata,1.0,2016-08-25
2,2,boringdata,1.0,2016-08-25


In [19]:
df_from_dictionary.head(1)

Unnamed: 0,array,dull,float,time
0,0,boringdata,1.0,2016-08-25


In [20]:
df_from_dictionary.values

array([[0, 'boringdata', 1.0, Timestamp('2016-08-25 00:00:00')],
       [1, 'boringdata', 1.0, Timestamp('2016-08-25 00:00:00')],
       [2, 'boringdata', 1.0, Timestamp('2016-08-25 00:00:00')]], dtype=object)

In [21]:
df_from_dictionary.index

RangeIndex(start=0, stop=3, step=1)

In [22]:
df_from_dictionary.describe()

Unnamed: 0,array,float
count,3.0,3.0
mean,1.0,1.0
std,1.0,0.0
min,0.0,1.0
25%,0.5,1.0
50%,1.0,1.0
75%,1.5,1.0
max,2.0,1.0


In [27]:
pd.set_option('display.precision',0)

In [28]:
df_from_dictionary.describe()

Unnamed: 0,array,float
count,3.0,3
mean,1.0,1
std,1.0,0
min,0.0,1
25%,0.5,1
50%,1.0,1
75%,2.0,1
max,2.0,1


In [29]:
df_from_dictionary.T


Unnamed: 0,0,1,2
array,0,1,2
dull,boringdata,boringdata,boringdata
float,1,1,1
time,2016-08-25 00:00:00,2016-08-25 00:00:00,2016-08-25 00:00:00


In [32]:
df_from_dictionary.sort_index(axis = 1,ascending=True)

Unnamed: 0,array,dull,float,time
0,0,boringdata,1,2016-08-25
1,1,boringdata,1,2016-08-25
2,2,boringdata,1,2016-08-25


In [35]:
df_from_dictionary.sort_values(by='array',ascending=False)

Unnamed: 0,array,dull,float,time
2,2,boringdata,1,2016-08-25
1,1,boringdata,1,2016-08-25
0,0,boringdata,1,2016-08-25


## Difference between interactive and production work

In [36]:
import pandas as pd
import numpy as np

In [39]:
sample_numpy_data = np.array(np.arange(24)).reshape((6,4))
dates_index = pd.date_range('20160101',periods = 6)
sample_df = pd.DataFrame(sample_numpy_data, index = dates_index, columns = list('ABCD'))
sample_df

Unnamed: 0,A,B,C,D
2016-01-01,0,1,2,3
2016-01-02,4,5,6,7
2016-01-03,8,9,10,11
2016-01-04,12,13,14,15
2016-01-05,16,17,18,19
2016-01-06,20,21,22,23
