# Series

In [1]:
import numpy as np
import pandas as pd

## create series from array

In [2]:
random_int_arr = np.random.randint(1,100,10)

In [3]:
pd.Series(random_int_arr)

0    66
1    22
2    35
3    17
4    75
5    26
6    44
7     4
8    95
9     3
dtype: int64

## create series from dictionary

In [5]:
score = {"batman": 100, "superman": 200, "ironman": 400, "spiderman": 500}

In [6]:
pd.Series(score)

batman       100
superman     200
ironman      400
spiderman    500
dtype: int64

## create series from array of values and index

In [11]:
values = [10, 20, 30, 40]
index = ['a', 'b', 'c', 'd']

In [12]:
pd.Series(values, index)

a    10
b    20
c    30
d    40
dtype: int64

## add two series

In [13]:
# this will add the values if the index is present in both the series, NaN otherwise.
s1 = pd.Series({"a": 1, "b": 2, "c": 3, "e": 8})
s2 = pd.Series({"a": 4, "b": 5, "c": 6, "d": 7})
s1 + s2

a    5.0
b    7.0
c    9.0
d    NaN
e    NaN
dtype: float64

# DataFrames

In [19]:
# create a 2d array having 5 rows and 5 columns
data = np.random.randint(1, 100, 25).reshape(5, 5)
data

array([[15, 23, 57, 83, 63],
       [74, 87, 22, 81, 50],
       [86, 54, 51, 10, 69],
       [14, 34, 60, 11, 67],
       [85, 84, 74, 21, 20]])

In [21]:
index = ['player1', 'player2', 'player3', 'player4', 'player5'] # row labels
columns = ['I', 'II', 'III', 'IV', 'V'] # column labels

In [25]:
df = pd.DataFrame(data, index, columns)
df

Unnamed: 0,I,II,III,IV,V
player1,15,23,57,83,63
player2,74,87,22,81,50
player3,86,54,51,10,69
player4,14,34,60,11,67
player5,85,84,74,21,20


In [26]:
# get column
df['I']

player1    15
player2    74
player3    86
player4    14
player5    85
Name: I, dtype: int64

In [40]:
# get multiple columns
df[['I','II']]

Unnamed: 0,I,II
player1,15,23
player2,74,87
player3,86,54
player4,14,34
player5,85,84


In [41]:
# get row
df.loc['player1']

I      15
II     23
III    57
IV     83
V      63
Name: player1, dtype: int64

In [43]:
# get row by index
df.iloc[0]

I      15
II     23
III    57
IV     83
V      63
Name: player1, dtype: int64

In [44]:
# get multiple rows
df.loc[['player1','player2']]

Unnamed: 0,I,II,III,IV,V
player1,15,23,57,83,63
player2,74,87,22,81,50


In [45]:
# get subset of rows and columns
df.loc[['player1','player2'],['I','II']]

Unnamed: 0,I,II
player1,15,23
player2,74,87


## add/delete columns

In [37]:
df['VI'] = df['I'] + df['II']
df

Unnamed: 0,I,II,III,IV,V,VI
player1,15,23,57,83,63,38
player2,74,87,22,81,50,161
player3,86,54,51,10,69,140
player4,14,34,60,11,67,48
player5,85,84,74,21,20,169


In [38]:
# to remove a column set axis=1. default value of axis=0 which is for row.
df.drop('VI', axis=1)

Unnamed: 0,I,II,III,IV,V
player1,15,23,57,83,63
player2,74,87,22,81,50
player3,86,54,51,10,69
player4,14,34,60,11,67
player5,85,84,74,21,20


In [39]:
# by default dropped row/column is not removed from dataframe, hence we need to set inplace=True
df.drop('VI', axis=1, inplace=True)
df

Unnamed: 0,I,II,III,IV,V
player1,15,23,57,83,63
player2,74,87,22,81,50
player3,86,54,51,10,69
player4,14,34,60,11,67
player5,85,84,74,21,20
