![image-2.png](attachment:image-2.png)

# <font color='green'> <b>Importing Libraries </b><font color='black'>

In [2]:
import numpy as np
import pandas as pd

# <font color='green'> <b>DataFrames </b><font color='black'>

In [2]:
data = {"İsim": ["Ali", "Ayşe", "Fatma", "Veli"], "boy cm": [170,160,170,180], "kilo-kg": [70, 60, 60, 80]}
df = pd.DataFrame(data, index= ["A", "B", "C", "D"])
df["BMI"] = df["kilo-kg"] / (df["boy cm"] /100) ** 2
df["new"] = np.arange(4)
df

Unnamed: 0,İsim,boy cm,kilo-kg,BMI,new
A,Ali,170,70,24.221453,0
B,Ayşe,160,60,23.4375,1
C,Fatma,170,60,20.761246,2
D,Veli,180,80,24.691358,3


## <font color='blue'> <b>Removing Columns & Rows</b><font color='black'>

The drop() method is used to remove specified rows or columns from a Pandas DataFrame.

If a column is to be removed, the axis='columns' parameter is used, and the specified column is dropped.

Similarly, if a row is to be removed, the axis='index' parameter is used, and the specified row is dropped.

This method is frequently used to remove unwanted data and perform data manipulation on a DataFrame.

In [3]:
df.drop("new") #The default value for the axis parameter is 0, which is used to drop rows. To drop a column, axis should be set to 1.


KeyError: "['new'] not found in axis"

In [26]:
df.drop("new", axis = 1)  #The drop() method does not make permanent changes unless it is assigned back to the DataFrame or the inplace=True parameter is used.

Unnamed: 0,İsim,boy cm,kilo-kg,BMI
A,Ali,170,70,24.221453
B,Ayşe,160,60,23.4375
C,Fatma,170,60,20.761246
D,Veli,180,80,24.691358


In [27]:
df

Unnamed: 0,İsim,boy cm,kilo-kg,BMI,new
A,Ali,170,70,24.221453,0
B,Ayşe,160,60,23.4375,1
C,Fatma,170,60,20.761246,2
D,Veli,180,80,24.691358,3


In [28]:
df.drop("kilo-kg", axis=1, inplace=True)  #The inplace parameter, when set to True, makes permanent changes by modifying the DataFrame directly. For example, it permanently drops the 'kilo' column.

In [29]:
df

Unnamed: 0,İsim,boy cm,BMI,new
A,Ali,170,24.221453,0
B,Ayşe,160,23.4375,1
C,Fatma,170,20.761246,2
D,Veli,180,24.691358,3


In [30]:
df.drop("C")

Unnamed: 0,İsim,boy cm,BMI,new
A,Ali,170,24.221453,0
B,Ayşe,160,23.4375,1
D,Veli,180,24.691358,3


In [31]:
df

Unnamed: 0,İsim,boy cm,BMI,new
A,Ali,170,24.221453,0
B,Ayşe,160,23.4375,1
C,Fatma,170,20.761246,2
D,Veli,180,24.691358,3


In [32]:
df.drop(["C", "B"], inplace=True)

In [33]:
df

Unnamed: 0,İsim,boy cm,BMI,new
A,Ali,170,24.221453,0
D,Veli,180,24.691358,3


In [34]:
df.drop(["boy cm", "BMI"], axis=1, inplace=True) 
df

Unnamed: 0,İsim,new
A,Ali,0
D,Veli,3


In [35]:
df

Unnamed: 0,İsim,new
A,Ali,0
D,Veli,3


## <font color='blue'> <b>Selecting Rows and Columns using .loc[ ] and iloc[ ]</b><font color='black'>

loc: label-based

iloc: integer position-based

loc is short for "location" and allows access to data by row and column labels.

loc is used to access data at a specific location by using row and column labels.

iloc is short for "integer location" and allows access to data by row and column numbers.

iloc is used to access data at a specific location by using row and column numbers.

Label or Index Independence: Traditional indexing methods are affected by changes in indices, whereas the loc and iloc methods select based on positions. Therefore, changes in the dataset's indices do not affect selections made using the loc and iloc methods.

Fast Performance: The iloc function selects data using only row and column indices, which makes it faster than the loc function. This feature is beneficial in large datasets or performance-critical applications.

Flexibility: Traditional indexing methods allow indexing using only sequential integers starting from zero. The loc and iloc methods, however, are based on labels or index values, offering more flexible selection options. The loc and iloc functions allow for combined usage of row and column labels. For example, you can use both row and column labels or indices to select data in a specific column and row. This flexibility is useful in data processing operations.

More Readable Code: The loc and iloc functions help make the code more readable.

In [44]:
data

array([[76, 31,  4, 33],
       [96, 62, 86, 36],
       [69, 16, 66, 15],
       [54, 58, 73, 88],
       [47,  9, 54, 13]])

In [3]:
np.random.seed(45)

data = np.random.randint(1, 100, 20).reshape(5, 4)

df = pd.DataFrame(data, columns=["col1", "col2", "col3", "col4"], index= range(101, 106))

df

Unnamed: 0,col1,col2,col3,col4
101,76,31,4,33
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88
105,47,9,54,13


In [45]:
df.loc[103]

col1    69
col2    16
col3    66
col4    15
Name: 103, dtype: int32

In [46]:
df.loc[102:104]  #last value is included

Unnamed: 0,col1,col2,col3,col4
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88


In [48]:
df

Unnamed: 0,col1,col2,col3,col4
101,76,31,4,33
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88
105,47,9,54,13


In [47]:
df.iloc[1:4]  #Integer values are included, hence the last value is not included.

Unnamed: 0,col1,col2,col3,col4
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88


In [50]:
df.loc[101]  #It is accessed using the label name.

col1    76
col2    31
col3     4
col4    33
Name: 101, dtype: int32

In [4]:
df.iloc[101]  #It is not accessed using the index number.

IndexError: single positional indexer is out-of-bounds

In [52]:
df

Unnamed: 0,col1,col2,col3,col4
101,76,31,4,33
102,96,62,86,36
103,69,16,66,15
104,54,58,73,88
105,47,9,54,13


In [55]:
df.index = "a b c d e".split()
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [57]:
df.iloc[1:4]

Unnamed: 0,col1,col2,col3,col4
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88


In [58]:
df.loc["b": "d"]

Unnamed: 0,col1,col2,col3,col4
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88


In [60]:
df.loc["a": "c", "col3"]

a     4
b    86
c    66
Name: col3, dtype: int32

In [61]:
df.loc["a": "c"]  #The left side of the comma refers to rows, while the right side refers to columns. If only a column name is written, the result is returned as a Pandas Series.

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15


In [62]:
df.loc["a": "c", ["col3"]]  #If the column name is written inside square brackets ([]), the result is returned as a Pandas DataFrame.

Unnamed: 0,col3
a,4
b,86
c,66


In [64]:
df.iloc[:3, [2]]

Unnamed: 0,col3
a,4
b,86
c,66


In [65]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [72]:
df.loc["d": "e", "col2":"col3"]

Unnamed: 0,col2,col3
d,58,73
e,9,54


In [75]:
df.iloc[3:,1:3]

Unnamed: 0,col2,col3
d,58,73
e,9,54


In [88]:
df.loc[["d","e"]][["col2","col3"]]

Unnamed: 0,col2,col3
d,58,73
e,9,54


In [95]:
import seaborn as sns

In [96]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic',
 'anagrams',
 'anagrams',
 'anscombe',
 'anscombe',
 'attention',
 'attention',
 'brain_networks',
 'brain_networks',
 'car_crashes',
 'car_crashes',
 'diamonds',
 'diamonds',
 'dots',
 'dots',
 'dowjones',
 'dowjones',
 'exercise',
 'exercise',
 'flights',
 'flights',
 'fmri',
 'fmri',
 'geyser',
 'geyser',
 'glue',
 'glue',
 'healthexp',
 'healthexp',
 'iris',
 'iris',
 'mpg',
 'mpg',
 'penguins',
 'penguins',
 'planets',
 'planets',
 'seaice',
 'seaice',
 'taxis',
 'taxis',
 'tips',
 'tips',
 'titanic',
 'titanic',
 'anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'plan

In [97]:
len(sns.get_dataset_names())

88

In [98]:
df = sns.load_dataset("titanic")
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [100]:
df1 = df.copy()

In [99]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [101]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [103]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
survived,891.0,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0
pclass,891.0,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
age,714.0,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
sibsp,891.0,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0
parch,891.0,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0
fare,891.0,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292


In [104]:
df.describe(include="object").T

Unnamed: 0,count,unique,top,freq
sex,891,2,male,577
embarked,889,3,S,644
who,891,3,man,537
embark_town,889,3,Southampton,644
alive,891,2,no,549


In [105]:
df.describe(include="all").T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
survived,891.0,,,,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0
pclass,891.0,,,,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
sex,891.0,2.0,male,577.0,,,,,,,
age,714.0,,,,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
sibsp,891.0,,,,0.523008,1.102743,0.0,0.0,0.0,1.0,8.0
parch,891.0,,,,0.381594,0.806057,0.0,0.0,0.0,0.0,6.0
fare,891.0,,,,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292
embarked,889.0,3.0,S,644.0,,,,,,,
class,891.0,3.0,Third,491.0,,,,,,,
who,891.0,3.0,man,537.0,,,,,,,


In [106]:
df.survived.value_counts()

survived
0    549
1    342
Name: count, dtype: int64

In [107]:
df.survived.value_counts(normalize=True)

survived
0    0.616162
1    0.383838
Name: proportion, dtype: float64

In [109]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [110]:
df.drop("alive")

KeyError: "['alive'] not found in axis"

In [112]:
df.drop("alive", axis= 1)  #Did not delete because it is not inplace = True

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,True


In [113]:
df.drop("alive", axis= 1, inplace= True)

In [114]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,True


In [115]:
df.iloc[:100]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0,3,male,,0,0,8.0500,S,Third,man,True,,Southampton,True
96,0,1,male,71.0,0,0,34.6542,C,First,man,True,A,Cherbourg,True
97,1,1,male,23.0,0,1,63.3583,C,First,man,True,D,Cherbourg,False
98,1,2,female,34.0,0,1,23.0000,S,Second,woman,False,,Southampton,False


In [116]:
df.iloc[:100, :2]

Unnamed: 0,survived,pclass
0,0,3
1,1,1
2,1,3
3,1,1
4,0,3
...,...,...
95,0,3
96,0,1
97,1,1
98,1,2


## <font color='blue'> <b>Conditional Selection</b><font color='black'>

In [90]:
df

Unnamed: 0,col1,col2,col3,col4
a,76,31,4,33
b,96,62,86,36
c,69,16,66,15
d,54,58,73,88
e,47,9,54,13


In [93]:
df > 50

Unnamed: 0,col1,col2,col3,col4
a,True,False,False,False
b,True,True,True,False
c,True,False,True,False
d,True,True,True,True
e,False,False,True,False


In [94]:
df[df > 50]  #I select the rows that satisfy the condition.

Unnamed: 0,col1,col2,col3,col4
a,76.0,,,
b,96.0,62.0,86.0,
c,69.0,,66.0,
d,54.0,58.0,73.0,88.0
e,,,54.0,
