# Pandas Where
- replace values where your condition is False
- It is useful when you have values that do not meet a criteria, and they need replacing.
- df.where(조건, 조건을 충족하지 못할때 value to fill) 

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame(data=np.random.randint(0,100,(4,3)),
                  columns=('Test1', 'Test2', 'Test3'),
                  index=['Bob', 'Sally', 'Frank', 'Patty']
                 )
df

Unnamed: 0,Test1,Test2,Test3
Bob,55,94,54
Sally,30,24,86
Frank,47,69,44
Patty,98,69,27


In [2]:
# replace the >90 with "A+."
# 90보다 작으면 True, 크면 False
df<90

Unnamed: 0,Test1,Test2,Test3
Bob,True,False,True
Sally,True,True,True
Frank,True,True,True
Patty,False,True,True


In [3]:
df.where(df<90, "A+")

Unnamed: 0,Test1,Test2,Test3
Bob,55,A+,54
Sally,30,24,86
Frank,47,69,44
Patty,A+,69,27


In [5]:
# replace a all values in the row where Test3<90
# false가 90보다 이상인것. 
df.Test2< 90

Bob      False
Sally     True
Frank     True
Patty     True
Name: Test2, dtype: bool

# with lambda Functdion 

In [6]:
df.where(df["Test2"]<90, "A+")

Unnamed: 0,Test1,Test2,Test3
Bob,A+,A+,A+
Sally,30,24,86
Frank,47,69,44
Patty,98,69,27


In [9]:
df.where(df<90, lambda x: x+100)

Unnamed: 0,Test1,Test2,Test3
Bob,55,194,54
Sally,30,24,86
Frank,47,69,44
Patty,198,69,27


In [None]:
# with other df..

In [12]:
df2 = pd.DataFrame(data=np.random.randint(0,100,(4,3)),
                  columns=('Test1', 'Test2', 'Test3'),
                  index=['Bob', 'Sally', 'Frank', 'Patty']
                 )
df2

Unnamed: 0,Test1,Test2,Test3
Bob,61,67,33
Sally,35,63,16
Frank,51,44,1
Patty,21,69,76


In [13]:
df

Unnamed: 0,Test1,Test2,Test3
Bob,55,94,54
Sally,30,24,86
Frank,47,69,44
Patty,98,69,27


In [14]:
# if df> 90, replace values in df with  the value of df2 , which is the same location.
df.where(df<90, df2)

Unnamed: 0,Test1,Test2,Test3
Bob,55,67,54
Sally,30,24,86
Frank,47,69,44
Patty,21,69,27


In [14]:
import pandas as pd

data = pd.read_csv("C:\\LECTRUE\\dataSet\\nba.csv")
data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [15]:
data.isnull().sum()

Name         1
Team         1
Number       1
Position     1
Age          1
Height       1
Weight       1
College     85
Salary      12
dtype: int64

In [5]:
data[data.Team.isnull()]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
457,,,,,,,,,


In [16]:
data.loc[457,:]

Name        NaN
Team        NaN
Number      NaN
Position    NaN
Age         NaN
Height      NaN
Weight      NaN
College     NaN
Salary      NaN
Name: 457, dtype: object

In [17]:
data.sort_values("Team", inplace=True)
data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
317,Lamar Patterson,Atlanta Hawks,13.0,SG,24.0,6-5,225.0,Pittsburgh,525093.0
309,Kent Bazemore,Atlanta Hawks,24.0,SF,26.0,6-5,201.0,Old Dominion,2000000.0
310,Tim Hardaway Jr.,Atlanta Hawks,10.0,SG,24.0,6-6,205.0,Michigan,1304520.0
311,Kirk Hinrich,Atlanta Hawks,12.0,SG,35.0,6-4,190.0,Kansas,2854940.0
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
...,...,...,...,...,...,...,...,...,...
369,Bradley Beal,Washington Wizards,3.0,SG,22.0,6-5,207.0,Florida,5694674.0
368,Alan Anderson,Washington Wizards,6.0,SG,33.0,6-6,220.0,Michigan State,4000000.0
382,John Wall,Washington Wizards,2.0,PG,25.0,6-4,195.0,Kentucky,15851950.0
370,Jared Dudley,Washington Wizards,1.0,SF,30.0,6-7,225.0,Boston College,4375000.0


In [18]:
data.where(data["Team"]=="Atlanta Hawks",inplace=True) 

In [19]:
data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
317,Lamar Patterson,Atlanta Hawks,13.0,SG,24.0,6-5,225.0,Pittsburgh,525093.0
309,Kent Bazemore,Atlanta Hawks,24.0,SF,26.0,6-5,201.0,Old Dominion,2000000.0
310,Tim Hardaway Jr.,Atlanta Hawks,10.0,SG,24.0,6-6,205.0,Michigan,1304520.0
311,Kirk Hinrich,Atlanta Hawks,12.0,SG,35.0,6-4,190.0,Kansas,2854940.0
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
...,...,...,...,...,...,...,...,...,...
369,,,,,,,,,
368,,,,,,,,,
382,,,,,,,,,
370,,,,,,,,,


In [20]:
data.Team.unique()

array(['Atlanta Hawks', nan], dtype=object)

In [21]:
data = pd.read_csv("C:\\LECTRUE\\dataSet\\nba.csv")
data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [22]:
data.sort_values("Team", inplace = True)

In [23]:
con1 = data["Team"]=="Atlanta Hawks"
con2 = data["Age"] > 24

In [24]:
data.where(con1 & con2, inplace = True)

In [25]:
data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
317,,,,,,,,,
309,Kent Bazemore,Atlanta Hawks,24.0,SF,26.0,6-5,201.0,Old Dominion,2000000.0
310,,,,,,,,,
311,Kirk Hinrich,Atlanta Hawks,12.0,SG,35.0,6-4,190.0,Kansas,2854940.0
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
...,...,...,...,...,...,...,...,...,...
369,,,,,,,,,
368,,,,,,,,,
382,,,,,,,,,
370,,,,,,,,,


# Numpy where 
- np.where(조건, 참일때 값, 거짓일때 값)

In [2]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'a':[1,2,3,4,5], 'b':[10,20,30,40,50]})
df

Unnamed: 0,a,b
0,1,10
1,2,20
2,3,30
3,4,40
4,5,50


In [3]:
# a가 3미만일때와 이상일떄 구분하기 

df["3미만인가"] = np.where(df["a"]<3, "3미만", "3이상")
df

Unnamed: 0,a,b,3미만인가
0,1,10,3미만
1,2,20,3미만
2,3,30,3이상
3,4,40,3이상
4,5,50,3이상


출처:https://dataindependent.com/pandas/pandas-where-pd-dataframe-where/