# Python Pandas - Working with Text Data


In [1]:
import pandas as pd
import numpy as np

s = pd.Series(['Tom', 'William Rick', 'John', 'Alber@t', np.nan, '1234','SteveSmith'])

print(s)

0             Tom
1    William Rick
2            John
3         Alber@t
4             NaN
5            1234
6      SteveSmith
dtype: object


# Lower()

In [2]:
print(s.str.lower())

0             tom
1    william rick
2            john
3         alber@t
4             NaN
5            1234
6      stevesmith
dtype: object


# upper()

In [3]:
print(s.str.upper())

0             TOM
1    WILLIAM RICK
2            JOHN
3         ALBER@T
4             NaN
5            1234
6      STEVESMITH
dtype: object


# len()

In [5]:
print(s.str.len())

0     3.0
1    12.0
2     4.0
3     7.0
4     NaN
5     4.0
6    10.0
dtype: float64


# split(pattern)

In [14]:
s1 = pd.Series(['Tom ', ' William Rick', 'John', 'Alber@t'])
print(s1)
print("  ")
print("After Spliting")
print(s1.str.split(' '))

0             Tom 
1     William Rick
2             John
3          Alber@t
dtype: object
  
After Spliting
0              [Tom, ]
1    [, William, Rick]
2               [John]
3            [Alber@t]
dtype: object


# get_dummies()


In [16]:
import pandas as pd
import numpy as np

s = pd.Series(['Tom ', ' William Rick', 'John', 'Alber@t'])
print(s)

0             Tom 
1     William Rick
2             John
3          Alber@t
dtype: object


In [18]:
print(s.str.get_dummies())

    William Rick  Alber@t  John  Tom 
0              0        0     0     1
1              1        0     0     0
2              0        0     1     0
3              0        1     0     0


# replace(a,b)

In [21]:
import pandas as pd
s = pd.Series(['Tom ', ' William Rick', 'John', 'Alber@t'])
print(s)



0             Tom 
1     William Rick
2             John
3          Alber@t
dtype: object


In [22]:
print ("After replacing @ with $:")
print(s.str.replace('@','$'))

After replacing @ with $:
0             Tom 
1     William Rick
2             John
3          Alber$t
dtype: object


# count(pattern)


In [23]:
import pandas as pd
 
s = pd.Series(['Tom ', ' William Rick', 'John', 'Alber@t'])

print ("The number of 'm's in each string:")
print(s.str.count('m'))

The number of 'm's in each string:
0    1
1    1
2    0
3    0
dtype: int64


# startswith(pattern)


In [25]:
print(s.str.startswith('r'))

0    False
1    False
2    False
3    False
dtype: bool


In [26]:
print(s.str.startswith('R'))

0    False
1    False
2    False
3    False
dtype: bool


# endswith(pattern)


In [28]:
s.str.endswith("t")

0    False
1    False
2    False
3     True
dtype: bool

In [29]:
s.str.endswith("m")

0    False
1    False
2    False
3    False
dtype: bool

# findall(pattern)


In [30]:
s

0             Tom 
1     William Rick
2             John
3          Alber@t
dtype: object

In [32]:
print(s.str.findall("W"))

0     []
1    [W]
2     []
3     []
dtype: object


In [33]:
print(s.str.findall("@"))

0     []
1     []
2     []
3    [@]
dtype: object


In [34]:
print(s.str.isupper())

0    False
1    False
2    False
3    False
dtype: bool


In [36]:
print(s.str.islower())

0    False
1    False
2    False
3    False
dtype: bool


In [37]:
print(s.str.isnumeric())

0    False
1    False
2    False
3    False
dtype: bool


# Python Pandas - Indexing and Selecting Data


![title](c.png)

In [38]:
#import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])

#select all rows for a specific column
print(df.loc[:,'A'])

a    0.219711
b   -0.684575
c    0.440247
d   -1.498358
e    0.752049
f    0.749663
g    1.520816
h   -0.843354
Name: A, dtype: float64


In [39]:
print(df.loc[:,['A','C']])


          A         C
a  0.219711  0.644081
b -0.684575 -1.095837
c  0.440247  0.715629
d -1.498358 -1.111683
e  0.752049 -1.550021
f  0.749663  1.175886
g  1.520816  0.174725
h -0.843354  0.461372
