In [1]:
# two workhorse data structures: Series and DataFrame.

In [1]:
import pandas as pd
import numpy  as np

In [3]:
# Series
# A Series is a one-dimensional array-like object containing a sequence 
# of values (of similar types to NumPy types) and an associated array of 
# data labels, called its index.

In [2]:
list('abcbcbcbc')

['a', 'b', 'c', 'b', 'c', 'b', 'c', 'b', 'c']

In [3]:
list('abcd')

['a', 'b', 'c', 'd']

In [4]:
series1 = pd.Series(data=list('abcd'))
series1

0    a
1    b
2    c
3    d
dtype: object

In [5]:
city = pd.Series(data=list(['Jberg', 'east london', 'cape town']))
city

0          Jberg
1    east london
2      cape town
dtype: object

In [8]:
obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [6]:
obj2 = pd.Series(index=['d', 'b', 'a', 'a'], data=[4, 7.0, -5, 3] )
obj2

d    4.0
b    7.0
a   -5.0
a    3.0
dtype: float64

In [7]:
obj2.index

Index(['d', 'b', 'a', 'a'], dtype='object')

In [8]:
obj2['a']

a   -5.0
a    3.0
dtype: float64

In [9]:
obj2['d'] = 600000
obj2

d    600000.0
b         7.0
a        -5.0
a         3.0
dtype: float64

In [11]:
obj2[['c', 'a', 'd']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]


c         NaN
a        -5.0
a         3.0
d    600000.0
dtype: float64

In [11]:
print(obj2)

d    600000.0
b         7.0
a        -5.0
a         3.0
dtype: float64


In [16]:
# Using NumPy functions or NumPy-like operations

In [12]:
obj2 > 3

d     True
b     True
a    False
a    False
dtype: bool

In [13]:
obj2[obj2 > 0]

d    600000.0
b         7.0
a         3.0
dtype: float64

In [14]:
obj2 * 2

d    1200000.0
b         14.0
a        -10.0
a          6.0
dtype: float64

In [13]:
np_cities = np.array(['Blore', 'delhi', 'chennai'])

In [14]:
series_cities = pd.Series(np_cities)
series_cities

0      Blore
1      delhi
2    chennai
dtype: object

In [22]:
# series is sort of dict
# as it is a mapping of index values to data values

In [22]:
obj2

d    6.0
b    7.0
a   -5.0
a    3.0
dtype: float64

In [15]:
'b' in obj2

True

In [26]:
'e' in obj2

False

In [21]:
# building Series from Python dict
# dict keys ==> index
# dict values ==> Series values
# Resultant Series is sorted
citydata = {'Hybd': 35000, 
            'Bangalore': 71000, 
            'Delhi': 16000, 
            'Chennai': 5000}

In [22]:
obj3 = pd.Series(citydata)
obj3

Hybd         35000
Bangalore    71000
Delhi        16000
Chennai       5000
dtype: int64

In [29]:
# override the dict key as index
citynames = ['Bangalore', 'Delhi', 'Hybd', 'Chennai']

In [31]:
obj4 = pd.Series(citydata, index=citynames)
obj4

Bangalore    71000
Delhi        16000
Hybd         35000
Chennai       5000
dtype: int64

In [32]:
# A useful Series feature for many applications is that it automatically 
# aligns by index label in arithmetic operations
# more like RDBMS join operation

In [33]:
obj3 + obj4

Bangalore    142000
Chennai       10000
Delhi         32000
Hybd          70000
dtype: int64

In [34]:
# series from scalar input

scalar_series = pd.Series(100, index=['pos1', 'pos2','pos3'])
scalar_series

pos1    100
pos2    100
pos3    100
dtype: int64

In [29]:
scalar_series = pd.Series([100, 200, 300], index=['pos1', 'pos2','pos3'])
scalar_series

pos1    100
pos2    200
pos3    300
dtype: int64

In [28]:
# access series
scalar_series[0]

100

In [26]:
# access series
scalar_series['pos1']

100

In [27]:
# access series
scalar_series['pos1': 'pos3']

pos1    100
pos2    200
pos3    300
dtype: int64

In [28]:
# vectorize operations
vector1 = pd.Series([1,2,3,4], index=['a', 'b','c','d'])
vector2 = pd.Series([10,20,30,40], index=['a', 'b','c','d'])

In [29]:
vector1 + vector2

a    11
b    22
c    33
d    44
dtype: int64

In [30]:
vector1 * vector2

a     10
b     40
c     90
d    160
dtype: int64

In [31]:
vector1 / vector2

a    0.1
b    0.1
c    0.1
d    0.1
dtype: float64

In [44]:
vector3 = pd.Series([10,20,300,400], index=['a', 'b','e','f'])

In [45]:
vector1 + vector3

a    11.0
b    22.0
c     NaN
d     NaN
e     NaN
f     NaN
dtype: float64