# Pandas Series: Logical & String Operations

In [3]:
import numpy as np
import pandas as pd
import string 

pd.__version__

'0.25.3'

In [4]:
s1 = pd.Series(np.random.randint(1,100, 10), 
               index = list(string.ascii_lowercase[:10]))

s1.index.name = 'name'
s1.name = 'age'
s1

name
a     4
b    63
c    39
d    10
e    46
f    74
g    59
h     4
i    44
j    35
Name: age, dtype: int64

In [6]:
s2 = pd.Series(np.random.randint(50000,100000, 10), 
               index = list(string.ascii_lowercase[:10]))

s2.index.name = 'name'
s2.name = 'income'
s2

name
a    63352
b    92273
c    58160
d    61413
e    52845
f    86577
g    53405
h    92460
i    92381
j    54213
Name: income, dtype: int64

#### All Income above 75K

In [7]:
s2 > 75000

name
a    False
b     True
c    False
d    False
e    False
f     True
g    False
h     True
i     True
j    False
Name: income, dtype: bool

In [13]:
( s2 > 75000).all()     # and

False

In [9]:
( s2 > 10000).all()

True

In [12]:
( s2 > 75000).any()     # or

True

In [11]:
( s2 < 10000).any()

False

#### Lets get the details of those people

In [14]:
s2[ s2 > 75000]

name
b    92273
f    86577
h    92460
i    92381
Name: income, dtype: int64

In [15]:
s2[ s2 < 75000 ]

name
a    63352
c    58160
d    61413
e    52845
g    53405
j    54213
Name: income, dtype: int64

#### Let's create a Mask

In [23]:
import random

mask = [ random.choice([True, False]) for _ in range(len(s2))]

mask

[True, True, True, True, False, False, False, True, False, False]

#### Use mask with the Series

In [30]:
s2[ mask ]    

name
a    63352
b    92273
c    58160
d    61413
h    92460
Name: income, dtype: int64

#### So we can use anything inside which returns Bools

In [31]:
s1 > 30

name
a    False
b     True
c     True
d    False
e     True
f     True
g     True
h    False
i     True
j     True
Name: age, dtype: bool

In [32]:
s2[ s1 > 30]

name
b    92273
c    58160
e    52845
f    86577
g    53405
i    92381
j    54213
Name: income, dtype: int64

### Logical Operations work like we saw with Numpy

In [33]:
s2[ (s2 < 60000) | ( s2 > 80000)]

name
b    92273
c    58160
e    52845
f    86577
g    53405
h    92460
i    92381
j    54213
Name: income, dtype: int64

In [34]:
s1[ (s1 > 50) & ( s2 > 75000)]

name
b    63
f    74
Name: age, dtype: int64

### String Operations on Series values

In [35]:
s7 = pd.Series({
        'a': 'Jim',
        'b' : 'jack',
        'c' : 'vivek ramachandran',
        'd' : 'kane',
        'e' : 'undertaker',
    })

s7

a                   Jim
b                  jack
c    vivek ramachandran
d                  kane
e            undertaker
dtype: object

In [40]:
# side_by_side function from Wes McKinney, author of Pandas

def side_by_side(*objs, **kwds):
    from pandas.io.formats.printing import adjoin
    space = kwds.get('space', 4)
    reprs = [repr(obj).split('\n') for obj in objs]
    print (adjoin(space, *reprs))
    

In [38]:
s7.str.len()

a     3
b     4
c    18
d     4
e    10
dtype: int64

In [41]:
side_by_side(s7, s7.str.len())

a                   Jim    a     3     
b                  jack    b     4     
c    vivek ramachandran    c    18     
d                  kane    d     4     
e            undertaker    e    10     
dtype: object              dtype: int64


In [44]:
s7.str.replace(' ','-')

a                   Jim
b                  jack
c    vivek-ramachandran
d                  kane
e            undertaker
dtype: object

In [45]:
# Apply a function to every value - great for cleanups and wrangling data
# Lots of options 
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html#pandas.Series.apply



def ReverseString(inp) :
    return inp[::-1]

side_by_side(s7, s7.apply(ReverseString))


a                   Jim    a                   miJ
b                  jack    b                  kcaj
c    vivek ramachandran    c    nardnahcamar keviv
d                  kane    d                  enak
e            undertaker    e            rekatrednu
dtype: object              dtype: object          


In [46]:
print ('String: ', s7.to_string())
print ('List: ', s7.tolist())
print ('Dict: ', s7.to_dict())
print ('JSON: ', s7.to_json())

String:  a                   Jim
b                  jack
c    vivek ramachandran
d                  kane
e            undertaker
List:  ['Jim', 'jack', 'vivek ramachandran', 'kane', 'undertaker']
Dict:  {'a': 'Jim', 'b': 'jack', 'c': 'vivek ramachandran', 'd': 'kane', 'e': 'undertaker'}
JSON:  {"a":"Jim","b":"jack","c":"vivek ramachandran","d":"kane","e":"undertaker"}


In [49]:
s = pd.Series([20, 21, 12],index=['London', 'New York', 'Helsinki'])
s

London      20
New York    21
Helsinki    12
dtype: int64

In [60]:
def add_custom_values(x, **kwargs):
    for month in kwargs:        
        x += kwargs[month]
    return x


In [61]:
s.apply(add_custom_values, june=30, july=20, august=25)

London      95
New York    96
Helsinki    87
dtype: int64

In [59]:
s.apply(np.log)

London      2.995732
New York    3.044522
Helsinki    2.484907
dtype: float64