# Series Basic Functionality


![title](basic.PNG)

In [4]:
import pandas as pd
import numpy as np

#Create a series with 100 random numbers
s = pd.Series(np.random.randn(4))
print(s)

0   -1.049039
1   -0.958847
2    0.782916
3    0.951219
dtype: float64


# axes
Returns the list of the labels of the series.

In [5]:
print(s.axes)

[RangeIndex(start=0, stop=4, step=1)]


# empty
Returns the Boolean value saying whether the Object is empty or not. True indicates that the object is empty.

In [6]:
print(s.empty)

False


# ndim
Returns the number of dimensions of the object. By definition, a Series is a 1D data structure, so it returns

In [10]:
print(s)
print(" ")
print("Dimensions :",s.ndim)

0   -1.049039
1   -0.958847
2    0.782916
3    0.951219
dtype: float64
 
Dimensions : 1


# size
Returns the size(length) of the series

In [11]:
print(s.size)

4


# values
Returns the actual data in the series as an array.

In [12]:
print(s.values)

[-1.04903904 -0.9588469   0.78291576  0.95121883]


# Head & Tail
To view a small sample of a Series or the DataFrame object, use the head() and the tail() methods.

In [17]:
print("head")
print(s.head)

head
<bound method NDFrame.head of 0   -1.049039
1   -0.958847
2    0.782916
3    0.951219
dtype: float64>


In [18]:
print("Tail")
print(s.tail)

Tail
<bound method NDFrame.tail of 0   -1.049039
1   -0.958847
2    0.782916
3    0.951219
dtype: float64>


# DataFrame Basic Functionality


![title](f.PNG)

In [19]:
import pandas as pd
import numpy as np

#Create a Dictionary of series
d = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
   'Age':pd.Series([25,26,25,23,30,29,23]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}

#Create a DataFrame
df = pd.DataFrame(d)
print(df)

    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
5  Smith   29    4.60
6   Jack   23    3.80


In [20]:
df.T

Unnamed: 0,0,1,2,3,4,5,6
Name,Tom,James,Ricky,Vin,Steve,Smith,Jack
Age,25,26,25,23,30,29,23
Rating,4.23,3.24,3.98,2.56,3.2,4.6,3.8


In [21]:
df.axes

[RangeIndex(start=0, stop=7, step=1),
 Index(['Name', 'Age', 'Rating'], dtype='object')]

In [22]:
df.dtypes

Name       object
Age         int64
Rating    float64
dtype: object

In [23]:
df.empty

False

In [24]:
df.shape

(7, 3)

In [25]:
df.ndim

2

In [26]:
df.size

21

In [27]:
df.head()

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2


In [28]:
df.tail()

Unnamed: 0,Name,Age,Rating
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2
5,Smith,29,4.6
6,Jack,23,3.8


In [29]:
df.values

array([['Tom', 25, 4.23],
       ['James', 26, 3.24],
       ['Ricky', 25, 3.98],
       ['Vin', 23, 2.56],
       ['Steve', 30, 3.2],
       ['Smith', 29, 4.6],
       ['Jack', 23, 3.8]], dtype=object)

# Python Pandas - Reindexing


In [30]:
import pandas as pd
import numpy as np

N=20

df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
})

#reindex the DataFrame
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])

print(df_reindexed)

           A       C   B
0 2016-01-01  Medium NaN
2 2016-01-03     Low NaN
5 2016-01-06     Low NaN


# Example 2

In [32]:
import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.random.randn(10,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(7,3),columns=['col1','col2','col3'])

df1 = df1.reindex_like(df2)
print(df1)

       col1      col2      col3
0 -0.621237  0.617255 -1.024003
1  1.980998  2.050104  0.426758
2  0.953572 -0.188998 -2.309905
3  0.244630  1.634965  0.543501
4  0.199498  0.888335  2.666225
5 -0.964161  1.329101  1.530655
6  0.257759  1.237729  0.661187


# Example3 

In [33]:
import pandas as pd
import numpy as np

df1 = pd.DataFrame(np.random.randn(6,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(2,3),columns=['col1','col2','col3'])

# Padding NAN's
print(df2.reindex_like(df1))



       col1      col2      col3
0 -1.264701  0.432584  0.264099
1  0.400733  1.991422 -0.063198
2       NaN       NaN       NaN
3       NaN       NaN       NaN
4       NaN       NaN       NaN
5       NaN       NaN       NaN


In [35]:
# Now Fill the NAN's with preceding Values
print ("Data Frame with Forward Fill:")
print(df2.reindex_like(df1,method='ffill'))

Data Frame with Forward Fill:
       col1      col2      col3
0 -1.264701  0.432584  0.264099
1  0.400733  1.991422 -0.063198
2  0.400733  1.991422 -0.063198
3  0.400733  1.991422 -0.063198
4  0.400733  1.991422 -0.063198
5  0.400733  1.991422 -0.063198


# Python Pandas - Iteration


In [36]:
import pandas as pd
import numpy as np
 
N=20
df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N),
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
   })

print(df)
  

            A     x         y       C           D
0  2016-01-01   0.0  0.904610  Medium   99.264390
1  2016-01-02   1.0  0.796942    High  113.919772
2  2016-01-03   2.0  0.218437     Low   92.354216
3  2016-01-04   3.0  0.356472     Low   88.908327
4  2016-01-05   4.0  0.194077  Medium  115.036480
5  2016-01-06   5.0  0.534748    High  101.015795
6  2016-01-07   6.0  0.320662  Medium  101.572297
7  2016-01-08   7.0  0.163509     Low  108.442027
8  2016-01-09   8.0  0.533327     Low  101.061745
9  2016-01-10   9.0  0.343221    High   97.490935
10 2016-01-11  10.0  0.768779    High  110.307678
11 2016-01-12  11.0  0.971402     Low  119.599592
12 2016-01-13  12.0  0.190447    High  106.539687
13 2016-01-14  13.0  0.853924  Medium  102.502830
14 2016-01-15  14.0  0.057836     Low  122.565932
15 2016-01-16  15.0  0.797479     Low  102.368549
16 2016-01-17  16.0  0.382393  Medium  121.304715
17 2016-01-18  17.0  0.135384     Low   81.025957
18 2016-01-19  18.0  0.930352     Low  102.723052


In [37]:
for col in df:
    print(col)

A
x
y
C
D


In [40]:
for col in df:
    print(col[0])

A
x
y
C
D


# iteritems()
Iterates over each column as key, value pair with label as key and column value as a Series object.

In [42]:
import pandas as pd
import numpy as np
 
df = pd.DataFrame(np.random.randn(4,3),columns=['col1','col2','col3'])
for key,e in df.iteritems():
    print(key)
    print(value)
  

col1
0   -0.558246
1   -1.998198
2   -1.309356
3   -0.656238
Name: col1, dtype: float64
col2
0    1.568714
1    0.023446
2    0.582394
3    0.697011
Name: col2, dtype: float64
col3
0   -0.320420
1   -0.249520
2   -0.438061
3   -1.259055
Name: col3, dtype: float64


# iterrows() − iterate over the rows as (index,series) pairs

In [44]:
for row_index,value in df.iterrows():
    print(row_index)
    print(value)

0
col1   -0.558246
col2    1.568714
col3   -0.320420
Name: 0, dtype: float64
1
col1   -1.998198
col2    0.023446
col3   -0.249520
Name: 1, dtype: float64
2
col1   -1.309356
col2    0.582394
col3   -0.438061
Name: 2, dtype: float64
3
col1   -0.656238
col2    0.697011
col3   -1.259055
Name: 3, dtype: float64


# Python Pandas - Sorting

There are two kinds of sorting available in Pandas. They are −

1. By label
2. By Actual Value

In [46]:
import pandas as pd
import numpy as np

unsorted_df=pd.DataFrame(np.random.randn(10,2),index=[1,4,6,2,3,5,9,8,0,7],columns=['col2','col1'])
print(unsorted_df)

       col2      col1
1 -2.027636 -1.445947
4 -0.346513  0.557388
6  0.860799 -2.135852
2 -0.886623  0.787555
3  0.595734  1.065287
5 -0.845461  1.102211
9 -0.585511 -0.318334
8  0.250476 -0.937402
0  1.457797  0.837356
7 -1.114216  0.150068


# By Label
Using the sort_index() method

In [48]:
sorted_df = unsorted_df.sort_index()
print(sorted_df)

       col2      col1
0  1.457797  0.837356
1 -2.027636 -1.445947
2 -0.886623  0.787555
3  0.595734  1.065287
4 -0.346513  0.557388
5 -0.845461  1.102211
6  0.860799 -2.135852
7 -1.114216  0.150068
8  0.250476 -0.937402
9 -0.585511 -0.318334


# Sort the Columns

In [50]:
sorted_df = unsorted_df.sort_index(axis=1)
print(sorted_df)

       col1      col2
1 -1.445947 -2.027636
4  0.557388 -0.346513
6 -2.135852  0.860799
2  0.787555 -0.886623
3  1.065287  0.595734
5  1.102211 -0.845461
9 -0.318334 -0.585511
8 -0.937402  0.250476
0  0.837356  1.457797
7  0.150068 -1.114216


# By Value
Like index sorting, sort_values() is the method

In [55]:
import pandas as pd
import numpy as np

unsorted_df1 = pd.DataFrame({'col1':[2,1,1,1],'col2':[1,3,2,4]})
print(unsorted_df1)

   col1  col2
0     2     1
1     1     3
2     1     2
3     1     4


In [57]:
sorted_df1 = unsorted_df1.sort_values(by = 'col1')

In [58]:
sorted_df1

Unnamed: 0,col1,col2
1,1,3
2,1,2
3,1,4
0,2,1


In [59]:
unsorted_df1.sort_values(by = 'col2')

Unnamed: 0,col1,col2
0,2,1
2,1,2
1,1,3
3,1,4
