# Python Decorators

In [1]:
mylist = [1,2,10, 100, -1]

In [16]:
def operfunc(func):
    def wrapper(l):
        print([].append(l))
        print(len(l))
        func(l)
        print(tuple(l))
        print(set(l))
    return wrapper

In [17]:
@operfunc
def mainl(l):
    l.sort()
    print(l)

In [18]:
mainl(mylist)

None
5
[-1, 1, 2, 10, 100]
(-1, 1, 2, 10, 100)
{1, 2, 100, 10, -1}


In [13]:
mylist.sort

<function list.sort(*, key=None, reverse=False)>

# Python Generators

 - `Lazy Execution` - executes an element when needed
 - `memory saving` - when working with large files, entire file cannot be loaded into memory.
 - Example: `map`, `filter`

# `yield` Keyword

In [54]:
def fun_num(num):
    result=[]
    for i in num:
        result.append(i*i)
    return result

In [55]:
def fun1_num(num):
    result=[]
    for x in num:
        yield x*x

In [56]:
a=[1,2,3,4,5]

In [57]:
f1 = fun_num(a)
f2 = fun1_num(a)

In [34]:
f1

[1, 4, 9, 16, 25]

In [35]:
f2

<generator object fun1_num at 0x7f74dd75e040>

In [36]:
for x in f2:
    print(x)

1
4
9
16
25


In [38]:
for x in f2:
    print(x)

### Lazy execution works here (so no output) = runs only once

In [44]:
next(f2)

1

In [45]:
next(f2)

4

In [46]:
next(f2)

9

In [47]:
next(f2)

16

In [48]:
next(f2)

25

In [49]:
next(f2)

StopIteration: 

In [50]:
dir(f2)

['__class__',
 '__del__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__name__',
 '__ne__',
 '__new__',
 '__next__',
 '__qualname__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'close',
 'gi_code',
 'gi_frame',
 'gi_running',
 'gi_yieldfrom',
 'send',
 'throw']

In [58]:
list(f2)

[1, 4, 9, 16, 25]

In [60]:
[x*x for x in a]

[1, 4, 9, 16, 25]

In [66]:
new_gem = (x*x for x in a)

In [67]:
type(new_gem)

generator

### `iter()` Function - says that the generator is iterable

In [68]:
for x in iter(new_gem):
    print(x)

1
4
9
16
25


In [69]:
iter(new_gem)

<generator object <genexpr> at 0x7f74c3f67ac0>

In [72]:
{x:y for x,y in zip(range(5),range(6,11))}

{0: 6, 1: 7, 2: 8, 3: 9, 4: 10}

# `pandas` Library

- written on top of `numpy` for better memory management
- ### `pd.Series`
    - `pd.Series` are single column which has content, (name and datatype)->schema
- ### `pd.DataFrame`
    - `pd.DataFrame` are with `n rows` and `n columns`

In [76]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from numpy.random import *

In [79]:
country = ["India","UK", "USA"]
pop = [23423,345345,46545]

In [89]:
ser1 = Series(pop, dtype="int64")

In [173]:
Series([[1,2],[3,4]])

0    [1, 2]
1    [3, 4]
dtype: object

In [90]:
type(ser1)

pandas.core.series.Series

In [91]:
ser1

0     23423
1    345345
2     46545
dtype: int64

In [93]:
ser2 = Series(pop, index=country)

In [98]:
ser2.to_dict()

{'India': 23423, 'UK': 345345, 'USA': 46545}

In [102]:
"UK" in ser2

True

In [104]:
for x in ser2.index:
    print(x)

India
UK
USA


In [108]:
ser2["China"] = 934953459

In [109]:
ser2["India"] = 987238747387

In [110]:
ser2

India    987238747387
UK             345345
USA             46545
China       934953459
dtype: int64

In [112]:
ser2.dtype

dtype('int64')

In [113]:
ser2.ndim

1

In [114]:
ser2.astype(float)

India    9.872387e+11
UK       3.453450e+05
USA      4.654500e+04
China    9.349535e+08
dtype: float64

In [115]:
dict(ser2)

{'India': 987238747387, 'UK': 345345, 'USA': 46545, 'China': 934953459}

In [116]:
list(ser2)

[987238747387, 345345, 46545, 934953459]

## `pd.DataFrame`

In [118]:
nfl = pd.read_csv("nfl.csv")

In [120]:
import os
os.system('pwd')

/home/varunm15t38hedu/assets


0

In [121]:
nfl

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
0,1,Dallas Cowboys,981,562,413,6 (0),0.576,1960,NFC East
1,2,Green Bay Packers,1435,800,598,38 (8),0.572,1921,NFC North
2,3,Baltimore Ravens,451,256,194,1,0.569,1996,AFC North
3,4,Chicago Bears,1469,793,634,42 (1),0.556,1920,NFC North
4,5,New England Patriots,983,541,433,9 (0),0.555,1960,AFC East
5,6,Miami Dolphins,899,496,399,4 (1),0.554,1966,AFC East
6,7,Kansas City Chiefs,983,532,439,12 (4),0.548,1960,AFC West
7,8,Minnesota Vikings,969,523,435,11 (4),0.546,1961,NFC North
8,9,San Francisco 49ers[b],1101,586,501,14 (3),0.539,1950,NFC West
9,10,Pittsburgh Steelers,1271,671,578,22 (4),0.537,1933,AFC North


In [122]:
nfl.head(5)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
0,1,Dallas Cowboys,981,562,413,6 (0),0.576,1960,NFC East
1,2,Green Bay Packers,1435,800,598,38 (8),0.572,1921,NFC North
2,3,Baltimore Ravens,451,256,194,1,0.569,1996,AFC North
3,4,Chicago Bears,1469,793,634,42 (1),0.556,1920,NFC North
4,5,New England Patriots,983,541,433,9 (0),0.555,1960,AFC East


In [123]:
nfl.tail(5)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
27,28,Atlanta Falcons,899,390,503,6 (2),0.437,1966,NFC South
28,29,Houston Texans,355,152,202,1,0.43,2002,AFC South
29,30,Jacksonville Jaguars,467,198,269,0,0.424,1995,AFC South
30,31,Arizona Cardinals,1429,585,803,41 (6),0.422,1920,NFC West
31,32,Tampa Bay Buccaneers,759,308,450,1,0.406,1976,NFC South


In [124]:
nfl.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied[c]', 'Pct.',
       'First NFL season', 'Division'],
      dtype='object')

In [125]:
nfl['Won']

0     562
1     800
2     256
3     793
4     541
5     496
6     532
7     523
8     586
9     671
10    721
11    556
12    508
13    505
14    392
15    614
16    483
17    624
18    629
19    512
20    476
21    473
22    412
23    214
24    394
25    591
26    428
27    390
28    152
29    198
30    585
31    308
Name: Won, dtype: int64

In [3]:
import pandas as pd

In [6]:
df1 = pd.read_csv("assets/data_subplot.csv")

In [7]:
df1.describe()

Unnamed: 0,Age,All_Devs,Python,JavaScript
count,38.0,38.0,38.0,38.0
mean,36.5,69257.078947,75754.210526,69707.921053
std,11.113055,28739.996679,30104.281467,28899.282262
min,18.0,16500.0,17100.0,16446.0
25%,27.25,47394.0,54709.25,47440.5
50%,36.5,77616.0,84016.0,78754.0
75%,45.75,91653.25,99810.0,91660.0
max,55.0,108923.0,122870.0,108000.0


In [127]:
df1

Unnamed: 0,Age,All_Devs,Python,JavaScript
0,18,17784,20046,16446
1,19,16500,17100,16791
2,20,18012,20000,18942
3,21,20628,24744,21780
4,22,25206,30500,25704
5,23,30252,37732,29000
6,24,34368,41247,34372
7,25,38496,45372,37810
8,26,42000,48876,43515
9,27,46752,53850,46823


In [137]:
df2 = pd.read_json("Persons1_eramd.json")

In [129]:
df3 = pd.read_excel("supermarket_sales.xlsx")

In [131]:
df2.company[0]

{'title': 'YURTURE',
 'email': 'aureliagonzales@yurture.com',
 'phone': '+1 (940 501-3963',
 'location': {'country': 'USA', 'address': '694 Hewes Street'}}

In [138]:
def extract_company_info():
    for x in df2.company:
        df2["company_title"] = x["title"]
        df2["company_email"] = x["email"]
        df2["company_phone"] = x["phone"]
        df2["company_location"] = x["location"]["country"]
        df2["company_address"] = x["location"]["address"]

In [141]:
nfl.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Rank,32.0,16.5,9.380832,1.0,8.75,16.5,24.25,32.0
Won,32.0,497.65625,157.823832,152.0,407.5,510.0,587.25,800.0
Lost,32.0,488.84375,138.033916,194.0,434.5,495.0,583.0,803.0
Pct.,32.0,0.5005,0.049226,0.406,0.45825,0.5015,0.54075,0.576
First NFL season,32.0,1956.625,22.511287,1920.0,1936.0,1960.0,1966.25,2002.0


In [147]:
df3.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Unit price,1000.0,55.67213,26.49463,10.08,32.875,55.23,77.935,99.96
Quantity,1000.0,5.51,2.923431,1.0,3.0,5.0,8.0,10.0
Tax 5%,1000.0,15.379369,11.70883,0.5085,5.924875,12.088,22.44525,49.65
Total,1000.0,322.966749,245.8853,10.6785,124.422375,253.848,471.35025,1042.65
cogs,1000.0,307.58738,234.1765,10.17,118.4975,241.76,448.905,993.0
gross margin percentage,1000.0,4.761905,6.131498e-14,4.761905,4.761905,4.761905,4.761905,4.761905
gross income,1000.0,15.379369,11.70883,0.5085,5.924875,12.088,22.44525,49.65
Rating,1000.0,6.9727,1.71858,4.0,5.5,7.0,8.5,10.0


In [149]:
won=nfl[['Team','Won']]

In [150]:
won[won['Won']>500]

Unnamed: 0,Team,Won
0,Dallas Cowboys,562
1,Green Bay Packers,800
3,Chicago Bears,793
4,New England Patriots,541
6,Kansas City Chiefs,532
7,Minnesota Vikings,523
8,San Francisco 49ers[b],586
9,Pittsburgh Steelers,671
10,New York Giants,721
11,Indianapolis Colts[d],556


In [151]:
nfl.count()

Rank                32
Team                32
GP                  32
Won                 32
Lost                32
Tied[c]             32
Pct.                32
First NFL season    32
Division            32
dtype: int64

In [165]:
nfl[(nfl.Won>=200) & (nfl.Won<=500)]

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
2,3,Baltimore Ravens,451,256,194,1,0.569,1996,AFC North
5,6,Miami Dolphins,899,496,399,4 (1),0.554,1966,AFC East
14,15,Seattle Seahawks,759,392,366,1,0.517,1976,NFC West
16,17,Los Angeles Chargers,983,483,489,11 (2),0.497,1960,AFC West
20,21,Tennessee Titans,983,476,501,6 (0),0.487,1960,AFC South
21,22,Buffalo Bills,982,473,501,8 (1),0.486,1960,AFC East
22,23,New Orleans Saints,870,412,468,5 (1),0.468,1967,NFC South
23,24,Carolina Panthers,467,214,252,1,0.459,1995,NFC South
24,25,Cincinnati Bengals,869,394,471,5 (4),0.456,1968,AFC North
26,27,New York Jets,983,428,547,8 (2),0.439,1960,AFC East


In [171]:
nfl[6:10]

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
6,7,Kansas City Chiefs,983,532,439,12 (4),0.548,1960,AFC West
7,8,Minnesota Vikings,969,523,435,11 (4),0.546,1961,NFC North
8,9,San Francisco 49ers[b],1101,586,501,14 (3),0.539,1950,NFC West
9,10,Pittsburgh Steelers,1271,671,578,22 (4),0.537,1933,AFC North


In [179]:
nfl.iloc[:6]

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
0,1,Dallas Cowboys,981,562,413,6 (0),0.576,1960,NFC East
1,2,Green Bay Packers,1435,800,598,38 (8),0.572,1921,NFC North
2,3,Baltimore Ravens,451,256,194,1,0.569,1996,AFC North
3,4,Chicago Bears,1469,793,634,42 (1),0.556,1920,NFC North
4,5,New England Patriots,983,541,433,9 (0),0.555,1960,AFC East
5,6,Miami Dolphins,899,496,399,4 (1),0.554,1966,AFC East


In [187]:
nfl.iloc[[6,9,13,18]]

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied[c],Pct.,First NFL season,Division
6,7,Kansas City Chiefs,983,532,439,12 (4),0.548,1960,AFC West
9,10,Pittsburgh Steelers,1271,671,578,22 (4),0.537,1933,AFC North
13,14,Las Vegas Raiders,983,505,467,11 (2),0.52,1960,AFC West
18,19,Washington Commanders,1301,629,643,29 (3),0.495,1932,NFC East


In [190]:
nfl.iloc[9,7]

1933

#### Excercise

In [174]:
df1.head(1)

Unnamed: 0,Age,All_Devs,Python,JavaScript
0,18,17784,20046,16446


In [175]:
df1.Python.mean()

75754.21052631579

In [177]:
df1.JavaScript.mean()

69707.92105263157

In [192]:
print(f"minimum python dev salary: {min(df1.Python)}")
print(f"minimum JavaScript dev salary: {min(df1.JavaScript)}")
print("="*50)
print(f"maximum Python dev salary: {max(df1.Python)}")
print(f"maximum JavaScript dev salary: {max(df1.JavaScript)}")

minimum python dev salary: 17100
minimum JavaScript dev salary: 16446
maximum Python dev salary: 122870
maximum JavaScript dev salary: 108000


In [196]:
df1[(df1.Age>=30)&(df1.Age<=40)]["JavaScript"].mean()

72656.63636363637

In [242]:
df1[((df1.Age>=18) & (df1.Age<=25)) | ((df1.Age>=30) & (df1.Age<=35))][["Age","Python","JavaScript"]]

Unnamed: 0,Age,Python,JavaScript
0,18,20046,16446
1,19,17100,16791
2,20,20000,18942
3,21,24744,21780
4,22,30500,25704
5,23,37732,29000
6,24,41247,34372
7,25,45372,37810
12,30,65998,56373
13,31,70003,62375


### Titanic Data

In [197]:
titanic = pd.read_csv("titanic.csv")

In [198]:
titanic.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [232]:
titanic.groupby("Sex")["Sex"].count()

Sex
female    314
male      577
Name: Sex, dtype: int64

In [233]:
titanic.groupby("Sex")["Sex"].get_group("female")

1      female
2      female
3      female
8      female
9      female
        ...  
880    female
882    female
885    female
887    female
888    female
Name: Sex, Length: 314, dtype: object

In [204]:
titanic['Sex'].value_counts()

male      577
female    314
Name: Sex, dtype: int64

In [243]:
titanic.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.560236,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,13.00501,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,22.0,0.0,0.0,7.9104
50%,446.0,0.0,3.0,29.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,35.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [209]:
titanic.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [208]:
titanic["Age"] = titanic["Age"].fillna(int(titanic.Age.mean()))

In [211]:
titanic[titanic.Survived==1]["Survived"].count()

342

In [212]:
titanic[titanic.Name in "Rose"]

TypeError: 'in <string>' requires string as left operand, not Series

In [222]:
name = titanic.Name

In [225]:
name = name[name.str.contains("rose")]

In [226]:
name.count()

0

In [244]:
titanic.Age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888    29.0
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

### Handling Null values using

- drop
- fill
- ffill
- bfill

In [245]:
titanic.dropna().count()

PassengerId    202
Survived       202
Pclass         202
Name           202
Sex            202
Age            202
SibSp          202
Parch          202
Ticket         202
Fare           202
Cabin          202
Embarked       202
dtype: int64

In [252]:
tit=titanic.dropna(subset=["Embarked"])

In [253]:
type(tit)

pandas.core.frame.DataFrame

In [262]:
titanic.dropna(thresh=12).count()

PassengerId    202
Survived       202
Pclass         202
Name           202
Sex            202
Age            202
SibSp          202
Parch          202
Ticket         202
Fare           202
Cabin          202
Embarked       202
dtype: int64

In [270]:
titanic.ffill().head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,C85,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,C123,S


In [271]:
titanic.bfill().head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,C85,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,C123,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,E46,S


In [273]:
titanic.bfill().tail(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,B42,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,29.0,1,2,W./C. 6607,23.45,C148,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [291]:
testdf = pd.DataFrame({
    'A': [1, 2, np.nan, 4, 5,np.nan],
    'B': [np.nan, 2, 3, np.nan, 5,np.nan],
    'C': [1, np.nan, 3, 4, np.nan, np.nan],
    'D': [1, 2, 3, 4, 5, np.nan],
    'E': [1, 2, np.nan, 4, 5,np.nan],
    'F': [np.nan, 2, 3, np.nan, 5,np.nan],
    'G': [1, np.nan, 3, 4, np.nan,np.nan],
    'H': [1, 2, 3, 4, 5,np.nan],
    'I': [np.nan, np.nan,9.0,np.nan, np.nan,np.nan]
})

In [292]:
testdf

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,1.0,,1.0,1.0,1.0,,1.0,1.0,
1,2.0,2.0,,2.0,2.0,2.0,,2.0,
2,,3.0,3.0,3.0,,3.0,3.0,3.0,9.0
3,4.0,,4.0,4.0,4.0,,4.0,4.0,
4,5.0,5.0,,5.0,5.0,5.0,,5.0,
5,,,,,,,,,


In [294]:
testdf.dropna(thresh=1)

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,1.0,,1.0,1.0,1.0,,1.0,1.0,
1,2.0,2.0,,2.0,2.0,2.0,,2.0,
2,,3.0,3.0,3.0,,3.0,3.0,3.0,9.0
3,4.0,,4.0,4.0,4.0,,4.0,4.0,
4,5.0,5.0,,5.0,5.0,5.0,,5.0,


In [295]:
titanic.Sex = titanic.Sex.replace("female","F")
titanic.Sex = titanic.Sex.replace("male","M")

In [296]:
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",M,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",F,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",F,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",F,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",M,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",M,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",F,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",F,29.0,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",M,26.0,0,0,111369,30.0000,C148,C


In [297]:
titanic.fillna(value={"Cabin":"D15"})

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",M,22.0,1,0,A/5 21171,7.2500,D15,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",F,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",F,26.0,0,0,STON/O2. 3101282,7.9250,D15,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",F,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",M,35.0,0,0,373450,8.0500,D15,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",M,27.0,0,0,211536,13.0000,D15,S
887,888,1,1,"Graham, Miss. Margaret Edith",F,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",F,29.0,1,2,W./C. 6607,23.4500,D15,S
889,890,1,1,"Behr, Mr. Karl Howell",M,26.0,0,0,111369,30.0000,C148,C


In [303]:
titanic.drop(columns=['Cabin', 'Parch']).head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",M,22.0,1,A/5 21171,7.25,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",F,38.0,1,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",F,26.0,0,STON/O2. 3101282,7.925,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",F,35.0,1,113803,53.1,S
4,5,0,3,"Allen, Mr. William Henry",M,35.0,0,373450,8.05,S


In [304]:
titanic.drop(index=0)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",F,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",F,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",F,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",M,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",M,29.0,0,0,330877,8.4583,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",M,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",F,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",F,29.0,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",M,26.0,0,0,111369,30.0000,C148,C


In [310]:
titanic.drop(labels=range(0,4)).head(8)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
4,5,0,3,"Allen, Mr. William Henry",M,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",M,29.0,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",M,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",M,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",F,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",F,14.0,1,0,237736,30.0708,,C
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",F,4.0,1,1,PP 9549,16.7,G6,S
11,12,1,1,"Bonnell, Miss. Elizabeth",F,58.0,0,0,113783,26.55,C103,S


In [314]:
titanic.drop_duplicates(['Sex', 'Age'])

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",M,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",F,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",F,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",F,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",M,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
819,820,0,3,"Skoog, Master. Karl Thorsten",M,10.0,3,2,347088,27.9000,,S
829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",F,62.0,0,0,113572,80.0000,B28,
843,844,0,3,"Lemberopolous, Mr. Peter L",M,34.5,0,0,2683,6.4375,,C
851,852,0,3,"Svensson, Mr. Johan",M,74.0,0,0,347060,7.7750,,S


In [316]:
titanic['Sex'].duplicated()

0      False
1      False
2       True
3       True
4       True
       ...  
886     True
887     True
888     True
889     True
890     True
Name: Sex, Length: 891, dtype: bool

### Working with multiple dataframes

In [317]:
emp = pd.read_csv("employee.csv")

In [318]:
emp

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60
...,...,...,...,...,...,...,...,...,...,...,...
103,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40
104,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70
105,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110
106,206,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110


In [324]:
dept = pd.read_json("department.json")

In [325]:
dept

Unnamed: 0,department_id,dname,managerId,locationId
0,10,Administration,200,1700
1,20,Marketing,201,1800
2,30,Purchasing,114,1700
3,40,Human Resources,203,2400
4,50,Shipping,121,1500
5,60,IT,103,1400
6,70,Public Relations,204,2700
7,80,Sales,145,2500


In [327]:
dept.describe()

Unnamed: 0,department_id,managerId,locationId
count,8.0,8.0,8.0
mean,45.0,161.375,1962.5
std,24.494897,44.979162,495.515604
min,10.0,103.0,1400.0
25%,27.5,119.25,1650.0
50%,45.0,172.5,1750.0
75%,62.5,201.5,2425.0
max,80.0,204.0,2700.0


### `pd.concat`

In [337]:
pd.concat([emp,dept])

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,100.0,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0.0,90,,,
1,101.0,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100.0,90,,,
2,102.0,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100.0,90,,,
3,103.0,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102.0,60,,,
4,104.0,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103.0,60,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,,,,,,,,,,,40,Human Resources,203.0,2400.0
4,,,,,,,,,,,50,Shipping,121.0,1500.0
5,,,,,,,,,,,60,IT,103.0,1400.0
6,,,,,,,,,,,70,Public Relations,204.0,2700.0


In [338]:
pd.concat([emp, dept], axis=1)

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,department_id.1,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,10.0,Administration,200.0,1700.0
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,20.0,Marketing,201.0,1800.0
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,30.0,Purchasing,114.0,1700.0
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,40.0,Human Resources,203.0,2400.0
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,50.0,Shipping,121.0,1500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,,,,
104,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,,,,
105,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,,
106,206,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110,,,,


In [339]:
pd.concat([emp, dept], axis=1).head(5)

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,department_id.1,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,10.0,Administration,200.0,1700.0
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,20.0,Marketing,201.0,1800.0
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,30.0,Purchasing,114.0,1700.0
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,40.0,Human Resources,203.0,2400.0
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,50.0,Shipping,121.0,1500.0


In [340]:
pd.concat([emp, dept], axis=1).tail(5)

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,department_id.1,dname,managerId,locationId
103,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,,,,
104,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,,,,
105,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,,
106,206,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110,,,,
107,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,,,,


### `pd.merge`

In [335]:
pd.merge(emp,dept,on="department_id", how="outer")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,,,
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,,,
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,,,
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,IT,103.0,1400.0
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,IT,103.0,1400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,202,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.0,201,20,Marketing,201.0,1800.0
104,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,Human Resources,203.0,2400.0
105,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,Public Relations,204.0,2700.0
106,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,


In [343]:
empcopy = emp.copy()

In [344]:
emp.append(empcopy)

  emp.append(empcopy)


Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60
...,...,...,...,...,...,...,...,...,...,...,...
103,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40
104,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70
105,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110
106,206,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110


### `pd.join()`

## Types of Join
- ## Cross join
    - `E x D` (basic)
- ## Inner join
    - cross join + condition=(matching)
- ## Outer join
    - cross join + condition
        - left
        - right
        - full
- ## semi join
    - joins based on one condition
- ## anti join
    - cross join + condition = unmatch

In [359]:
pd.merge(emp, dept, how="cross")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id_x,department_id_y,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,10,Administration,200,1700
1,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,20,Marketing,201,1800
2,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,30,Purchasing,114,1700
3,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,40,Human Resources,203,2400
4,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,50,Shipping,121,1500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
859,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,40,Human Resources,203,2400
860,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,50,Shipping,121,1500
861,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,60,IT,103,1400
862,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,70,Public Relations,204,2700


In [360]:
pd.merge(emp, dept, how="inner")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,IT,103,1400
1,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,IT,103,1400
2,105,David,Austin,DAUSTIN,590.423.4569,1997-06-25,IT_PROG,4800.0,0.0,103,60,IT,103,1400
3,106,Valli,Pataballa,VPATABAL,590.423.4560,1998-02-05,IT_PROG,4800.0,0.0,103,60,IT,103,1400
4,107,Diana,Lorentz,DLORENTZ,590.423.5567,1999-02-07,IT_PROG,4200.0,0.0,103,60,IT,103,1400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,Administration,200,1700
92,201,Michael,Hartstein,MHARTSTE,515.123.5555,1996-02-17,MK_MAN,13000.0,0.0,100,20,Marketing,201,1800
93,202,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.0,201,20,Marketing,201,1800
94,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,Human Resources,203,2400


In [361]:
pd.merge(emp, dept, how="outer")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,,,
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,,,
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,,,
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,IT,103.0,1400.0
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,IT,103.0,1400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,202,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.0,201,20,Marketing,201.0,1800.0
104,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,Human Resources,203.0,2400.0
105,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,Public Relations,204.0,2700.0
106,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,


In [362]:
pd.merge(emp, dept, how="left")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,,,
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,,,
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,,,
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,IT,103.0,1400.0
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,IT,103.0,1400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,Human Resources,203.0,2400.0
104,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,Public Relations,204.0,2700.0
105,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,
106,206,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110,,,


In [363]:
pd.merge(emp, dept, how="right")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,200,Jennifer,Whalen,JWHALEN,515.123.4444,1987-09-17,AD_ASST,4400.0,0.00,101,10,Administration,200,1700
1,900,Ram,,,,,AC_MGR,0.0,0.00,0,10,Administration,200,1700
2,201,Michael,Hartstein,MHARTSTE,515.123.5555,1996-02-17,MK_MAN,13000.0,0.00,100,20,Marketing,201,1800
3,202,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.00,201,20,Marketing,201,1800
4,114,Den,Raphaely,DRAPHEAL,515.127.4561,1994-12-07,PU_MAN,11000.0,0.00,100,30,Purchasing,114,1700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,174,Ellen,Abel,EABEL,011.44.1644.429267,1996-05-11,SA_REP,11000.0,0.30,149,80,Sales,145,2500
92,175,Alyssa,Hutton,AHUTTON,011.44.1644.429266,1997-03-19,SA_REP,8800.0,0.25,149,80,Sales,145,2500
93,176,Jonathon,Taylor,JTAYLOR,011.44.1644.429265,1998-03-24,SA_REP,8600.0,0.20,149,80,Sales,145,2500
94,177,Jack,Livingston,JLIVINGS,011.44.1644.429264,1998-04-23,SA_REP,8400.0,0.20,149,80,Sales,145,2500


In [365]:
pd.merge(emp, dept, on="department_id", how="inner")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,IT,103,1400
1,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,IT,103,1400
2,105,David,Austin,DAUSTIN,590.423.4569,1997-06-25,IT_PROG,4800.0,0.0,103,60,IT,103,1400
3,106,Valli,Pataballa,VPATABAL,590.423.4560,1998-02-05,IT_PROG,4800.0,0.0,103,60,IT,103,1400
4,107,Diana,Lorentz,DLORENTZ,590.423.5567,1999-02-07,IT_PROG,4200.0,0.0,103,60,IT,103,1400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,900,Ram,,,,,AC_MGR,0.0,0.0,0,10,Administration,200,1700
92,201,Michael,Hartstein,MHARTSTE,515.123.5555,1996-02-17,MK_MAN,13000.0,0.0,100,20,Marketing,201,1800
93,202,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.0,201,20,Marketing,201,1800
94,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,Human Resources,203,2400


In [366]:
pd.merge(emp, dept, on="department_id", how="outer")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id,dname,managerId,locationId
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,,,
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,,,
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,,,
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,IT,103.0,1400.0
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,IT,103.0,1400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,202,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.0,201,20,Marketing,201.0,1800.0
104,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,Human Resources,203.0,2400.0
105,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,Public Relations,204.0,2700.0
106,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,


In [369]:
dept.columns = ["department_id","dname","manager_id","location_id"]

In [371]:
pd.merge(emp, dept, on="manager_id", how="outer")

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id_x,department_id_y,dname,location_id
0,100.0,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90.0,,,
1,900.0,Ram,,,,,AC_MGR,0.0,0.0,0,10.0,,,
2,101.0,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90.0,,,
3,102.0,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90.0,,,
4,114.0,Den,Raphaely,DRAPHEAL,515.127.4561,1994-12-07,PU_MAN,11000.0,0.0,100,30.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,202.0,Pat,Fay,PFAY,603.123.6666,1997-08-17,MK_REP,6000.0,0.0,201,20.0,20.0,Marketing,1800.0
107,206.0,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110.0,,,
108,,,,,,,,,,200,,10.0,Administration,1700.0
109,,,,,,,,,,203,,40.0,Human Resources,2400.0


In [375]:
dfemp = emp[["employee_id", "first_name", "manager_id"]]

In [381]:
jn1=pd.merge(dfemp, dfemp,left_on="manager_id", right_on="employee_id", suffixes=("emp","mgr"))

In [383]:
jn1[["employee_idemp","first_nameemp","employee_idmgr","first_namemgr"]]

Unnamed: 0,employee_idemp,first_nameemp,employee_idmgr,first_namemgr
0,101,Neena,100,Steven
1,102,Lex,100,Steven
2,114,Den,100,Steven
3,120,Matthew,100,Steven
4,121,Adam,100,Steven
...,...,...,...,...
101,177,Jack,149,Eleni
102,178,Kimberely,149,Eleni
103,179,Charles,149,Eleni
104,202,Pat,201,Michael


In [384]:
jn=emp.join(dept, on="department_id", rsuffix="dept", lsuffix="emp")

In [386]:
jn

Unnamed: 0,employee_id,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_idemp,department_idemp,department_iddept,dname,manager_iddept,location_id
0,100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000.0,0.0,0,90,,,,
1,101,Neena,Kochhar,NKOCHHAR,515.123.4568,1989-09-21,AD_VP,17000.0,0.0,100,90,,,,
2,102,Lex,De Haan,LDEHAAN,515.123.4569,1993-01-13,AD_VP,17000.0,0.0,100,90,,,,
3,103,Alexander,Hunold,AHUNOLD,590.423.4567,1990-01-03,IT_PROG,9000.0,0.0,102,60,,,,
4,104,Bruce,Ernst,BERNST,590.423.4568,1991-05-21,IT_PROG,6000.0,0.0,103,60,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,203,Susan,Mavris,SMAVRIS,515.123.7777,1994-06-07,HR_REP,6500.0,0.0,101,40,,,,
104,204,Hermann,Baer,HBAER,515.123.8888,1994-06-07,PR_REP,10000.0,0.0,101,70,,,,
105,205,Shelley,Higgins,SHIGGINS,515.123.8080,1994-06-07,AC_MGR,12000.0,0.0,101,110,,,,
106,206,William,Gietz,WGIETZ,515.123.8181,1994-06-07,AC_ACCOUNT,8300.0,0.0,205,110,,,,


In [14]:
df1.pivot_table(values=["Python", "JavaScript"],index="Age")

Unnamed: 0_level_0,JavaScript,Python
Age,Unnamed: 1_level_1,Unnamed: 2_level_1
18,16446,20046
19,16791,17100
20,18942,20000
21,21780,24744
22,25704,30500
23,29000,37732
24,34372,41247
25,37810,45372
26,43515,48876
27,46823,53850


In [16]:
df1.JavaScript

0      16446
1      16791
2      18942
3      21780
4      25704
5      29000
6      34372
7      37810
8      43515
9      46823
10     49293
11     53437
12     56373
13     62375
14     66674
15     68745
16     68746
17     74583
18     79000
19     78508
20     79996
21     80403
22     83820
23     88833
24     91660
25     87892
26     96243
27     90000
28     99313
29     91660
30    102264
31    100000
32    100000
33     91660
34     99240
35    108000
36    105000
37    104000
Name: JavaScript, dtype: int64