## Installation

In [2]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


### What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

### Pandas Series

A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

In [2]:
import pandas as pd
import numpy as np

In [14]:
# string
country = ['India', 'Nepal','Pakistan','Bhutan']
cnty = pd.Series(country)

In [5]:
# integerts
pd.Series([12.5,14.9,15.7,21.6,36.6])

0    12.5
1    14.9
2    15.7
3    21.6
4    36.6
dtype: float64

In [8]:
# custum index
alph = ['A','B','C','D']
num = [12.5,14.9,15.7,21.6]
pd.Series(num,index=alph)

A    12.5
B    14.9
C    15.7
D    21.6
dtype: float64

In [10]:
# Series from Dict
marks = {
    'maths':87,
    'english':56,
    'science':75,
    'hindi':88
}

marks_ser = pd.Series(marks)

In [11]:
marks_ser

maths      87
english    56
science    75
hindi      88
dtype: int64

# Series Attributes

In [12]:
# size
marks_ser.size

4

In [15]:
# dtype
cnty.dtype

dtype('O')

In [16]:
# is_unique
marks_ser.is_unique

True

In [17]:
pd.Series([1,1,2,23,3,3]).is_unique

False

In [18]:
# index
marks_ser.index

Index(['maths', 'english', 'science', 'hindi'], dtype='object')

In [19]:
# values
marks_ser.values

array([87, 56, 75, 88], dtype=int64)

## Series using read_csv

In [81]:
random_num = pd.Series(np.random.randint(1,50,25))
random_num

0     47
1     21
2     34
3     38
4     40
5      2
6     34
7     34
8     14
9     26
10    31
11    39
12    47
13    28
14    15
15    28
16     5
17    22
18     6
19    12
20    16
21    41
22    26
23    43
24    20
dtype: int32

In [34]:
movies = pd.read_csv('bollywood.csv', index_col='movie', squeeze=True)
movies



  movies = pd.read_csv('bollywood.csv', index_col='movie', squeeze=True)


movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [35]:
movies.values

array(['Vicky Kaushal', 'Vicky Ahuja', 'Anupam Kher', ..., 'Vivek Oberoi',
       'Ajay Devgn', 'Akshay Kumar'], dtype=object)

### Series methods

In [47]:
# head and tail
movies.head(10)

movie
Uri: The Surgical Strike                       Vicky Kaushal
Battalion 609                                    Vicky Ahuja
The Accidental Prime Minister (film)             Anupam Kher
Why Cheat India                                Emraan Hashmi
Evening Shadows                             Mona Ambegaonkar
Soni (film)                             Geetika Vidya Ohlyan
Fraud Saiyaan                                   Arshad Warsi
Bombairiya                                      Radhika Apte
Manikarnika: The Queen of Jhansi              Kangana Ranaut
Thackeray (film)                         Nawazuddin Siddiqui
Name: lead, dtype: object

In [48]:
movies.tail(10)

movie
Raaz (2002 film)                    Dino Morea
Zameen (2003 film)                  Ajay Devgn
Waisa Bhi Hota Hai Part II        Arshad Warsi
Devdas (2002 Hindi film)        Shah Rukh Khan
Kaante                        Amitabh Bachchan
Hum Tumhare Hain Sanam          Shah Rukh Khan
Aankhen (2002 film)           Amitabh Bachchan
Saathiya (film)                   Vivek Oberoi
Company (film)                      Ajay Devgn
Awara Paagal Deewana              Akshay Kumar
Name: lead, dtype: object

In [40]:
# sample
random_num.sample(5)

7      5
13    25
9     14
3     32
12     4
dtype: int32

In [49]:
movies.sample(5)

movie
Manikarnika: The Queen of Jhansi        Kangana Ranaut
Student of the Year 2                     Tiger Shroff
Praan Jaye Par Shaan Na Jaye            Raveena Tandon
Dum Laga Ke Haisha                  Ayushmann Khurrana
Gangs of Wasseypur                      Manoj Bajpayee
Name: lead, dtype: object

In [55]:
# value_counts
movies.value_counts()

Akshay Kumar        48
Amitabh Bachchan    45
Ajay Devgn          38
Salman Khan         31
Sanjay Dutt         26
                    ..
Diganth              1
Parveen Kaur         1
Seema Azmi           1
Akanksha Puri        1
Edwin Fernandes      1
Name: lead, Length: 566, dtype: int64

In [54]:
# sort_values
random_num.sort_values(ascending=False)

10    47
14    47
6     44
4     43
23    42
18    41
1     41
5     33
16    32
3     32
20    30
17    29
15    27
21    26
13    25
0     23
22    21
11    20
8     18
2     18
9     14
19    14
7      5
12     4
24     4
dtype: int32

In [63]:
# method chaining
random_num.sort_values(ascending=False).head(1).values[0]

47

## Series Math Methods

In [64]:
# count
movies.count()

1500

In [65]:
a = pd.Series([12,25,45,np.nan,55,12,25])

In [66]:
a.size

7

In [67]:
a.count()

6

In [68]:
# sum()
random_num.sum()

680

In [71]:
# product()
random_num.product()

22200

In [73]:
# min/max
random_num.max()

37

In [79]:
# mean
random_num.mean()

23.6

In [78]:
# median
random_num.median()

19.0

In [82]:
# mode
random_num.mode()

0    34
dtype: int32

In [83]:
random_num.describe()

count    25.000000
mean     26.760000
std      13.182311
min       2.000000
25%      16.000000
50%      28.000000
75%      38.000000
max      47.000000
dtype: float64

## Series Indexing

In [87]:
x = pd.Series([12,15,13,45,85,79,14,19,75], index=['a','b','c','d','e','f','g','h','i'])
x

a    12
b    15
c    13
d    45
e    85
f    79
g    14
h    19
i    75
dtype: int64

In [90]:
x['g']

14

In [92]:
# slicing
x['b':'f']

b    15
c    13
d    45
e    85
f    79
dtype: int64

In [94]:
# fancy indexing
x[['c','f','a']]

c    13
f    79
a    12
dtype: int64

In [95]:
arr = np.arange(1,11).reshape(2,5)
df = pd.DataFrame(arr)

In [99]:
df

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,5
1,6,7,8,9,10


In [98]:
df.to_csv('df1.csv')

In [103]:
arr1 = np.array([1,2,3,4,5,6])
file = open('file1.txt', 'w+')
save_file = str(arr1)
file.write(save_file)
file.close()


In [104]:
file = open('file1.txt', 'r')
read_file = file.read()
print(read_file)
file.close()

[1 2 3 4 5 6]


In [105]:
# editting
marks_ser

maths      87
english    56
science    75
hindi      88
dtype: int64

In [106]:
marks_ser[1]=75

In [113]:
marks_ser

maths        0
english     75
science      0
hindi      100
abc          0
dtype: int64

In [108]:
marks_ser['abc'] = 100

In [110]:
marks_ser[2:4] = [100,100]

In [112]:
# fancy indexing
marks_ser[[0,2,4]]=[0,0,0]