# Advantages of Pandas

* A fast and efficient DataFrame object for data manipulation with integrated indexing
* Tools for reading and writing data between in-memory data structures and different formats: CSV and text files, Microsoft Excel, SQL databases, and the fast HDF5 format 

In [2]:
import pandas as pd
import numpy as np
data = np.arange(5000).reshape(100,50)
data

array([[   0,    1,    2, ...,   47,   48,   49],
       [  50,   51,   52, ...,   97,   98,   99],
       [ 100,  101,  102, ...,  147,  148,  149],
       ...,
       [4850, 4851, 4852, ..., 4897, 4898, 4899],
       [4900, 4901, 4902, ..., 4947, 4948, 4949],
       [4950, 4951, 4952, ..., 4997, 4998, 4999]])

# Data Frames

In [3]:
data

array([[   0,    1,    2, ...,   47,   48,   49],
       [  50,   51,   52, ...,   97,   98,   99],
       [ 100,  101,  102, ...,  147,  148,  149],
       ...,
       [4850, 4851, 4852, ..., 4897, 4898, 4899],
       [4900, 4901, 4902, ..., 4947, 4948, 4949],
       [4950, 4951, 4952, ..., 4997, 4998, 4999]])

In [4]:
columnName = ['columns_'+str(i) for i in range(50)]
columnName

['columns_0',
 'columns_1',
 'columns_2',
 'columns_3',
 'columns_4',
 'columns_5',
 'columns_6',
 'columns_7',
 'columns_8',
 'columns_9',
 'columns_10',
 'columns_11',
 'columns_12',
 'columns_13',
 'columns_14',
 'columns_15',
 'columns_16',
 'columns_17',
 'columns_18',
 'columns_19',
 'columns_20',
 'columns_21',
 'columns_22',
 'columns_23',
 'columns_24',
 'columns_25',
 'columns_26',
 'columns_27',
 'columns_28',
 'columns_29',
 'columns_30',
 'columns_31',
 'columns_32',
 'columns_33',
 'columns_34',
 'columns_35',
 'columns_36',
 'columns_37',
 'columns_38',
 'columns_39',
 'columns_40',
 'columns_41',
 'columns_42',
 'columns_43',
 'columns_44',
 'columns_45',
 'columns_46',
 'columns_47',
 'columns_48',
 'columns_49']

In [5]:
#Putting Data in tabular format- converting to a dataframe
dataSamp = pd.DataFrame(data)

In [6]:
dataSamp.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
1,50,51,52,53,54,55,56,57,58,59,...,90,91,92,93,94,95,96,97,98,99
2,100,101,102,103,104,105,106,107,108,109,...,140,141,142,143,144,145,146,147,148,149
3,150,151,152,153,154,155,156,157,158,159,...,190,191,192,193,194,195,196,197,198,199
4,200,201,202,203,204,205,206,207,208,209,...,240,241,242,243,244,245,246,247,248,249


In [7]:
type(dataSamp)

pandas.core.frame.DataFrame

In [8]:
dataSamp[0]

0        0
1       50
2      100
3      150
4      200
      ... 
95    4750
96    4800
97    4850
98    4900
99    4950
Name: 0, Length: 100, dtype: int64

In [9]:
type(dataSamp[0])

pandas.core.series.Series

In [10]:
dataSamp = pd.DataFrame(data,columns=columnName)

In [11]:
dataSamp

Unnamed: 0,columns_0,columns_1,columns_2,columns_3,columns_4,columns_5,columns_6,columns_7,columns_8,columns_9,...,columns_40,columns_41,columns_42,columns_43,columns_44,columns_45,columns_46,columns_47,columns_48,columns_49
0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
1,50,51,52,53,54,55,56,57,58,59,...,90,91,92,93,94,95,96,97,98,99
2,100,101,102,103,104,105,106,107,108,109,...,140,141,142,143,144,145,146,147,148,149
3,150,151,152,153,154,155,156,157,158,159,...,190,191,192,193,194,195,196,197,198,199
4,200,201,202,203,204,205,206,207,208,209,...,240,241,242,243,244,245,246,247,248,249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,4750,4751,4752,4753,4754,4755,4756,4757,4758,4759,...,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799
96,4800,4801,4802,4803,4804,4805,4806,4807,4808,4809,...,4840,4841,4842,4843,4844,4845,4846,4847,4848,4849
97,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,...,4890,4891,4892,4893,4894,4895,4896,4897,4898,4899
98,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909,...,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949


In [12]:
dataSamp.head()

Unnamed: 0,columns_0,columns_1,columns_2,columns_3,columns_4,columns_5,columns_6,columns_7,columns_8,columns_9,...,columns_40,columns_41,columns_42,columns_43,columns_44,columns_45,columns_46,columns_47,columns_48,columns_49
0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
1,50,51,52,53,54,55,56,57,58,59,...,90,91,92,93,94,95,96,97,98,99
2,100,101,102,103,104,105,106,107,108,109,...,140,141,142,143,144,145,146,147,148,149
3,150,151,152,153,154,155,156,157,158,159,...,190,191,192,193,194,195,196,197,198,199
4,200,201,202,203,204,205,206,207,208,209,...,240,241,242,243,244,245,246,247,248,249


In [None]:
dict1 = {'name1': 'XYZ1', 'name2': 'XYZ2'}

In [None]:
dict2 = {'score1': 15, 'score2': 25}

In [17]:
pd.DataFrame(data=(dict1,dict2))

Unnamed: 0,name1,name2,score1,score2
0,XYZ1,XYZ2,,
1,,,15.0,25.0


In [30]:
dict1 = {'name1': list(range(10,0,-1)), 'name2': list(range(10,20))}
dict1

{'name1': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
 'name2': [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]}

In [31]:
pd.DataFrame(data = (dict1))

Unnamed: 0,name1,name2
0,10,10
1,9,11
2,8,12
3,7,13
4,6,14
5,5,15
6,4,16
7,3,17
8,2,18
9,1,19


## Read Data

In [None]:
data = pd.read_csv('data')