## Pandas

In [2]:
# pip install pandas
import pandas as pd
import numpy as np

### data

### Series

In [3]:
data = [1, 2, 4, 18, "python"]
s1 = pd.Series(data)
s1 
print(type(s1))
s1

<class 'pandas.core.series.Series'>


0         1
1         2
2         4
3        18
4    python
dtype: object

In [3]:
data = [1, 2, 4, 18, "python"]
index_ = ['a', 'b', 'c', 'd', 'f']
s2 = pd.Series(data, index=index_)
s2

a         1
b         2
c         4
d        18
f    python
dtype: object

In [4]:
data = [1, 2, 4, 18, "python"]
index_ = ['a', 'b', 'c', 'd', 'f']
s2 = pd.Series(data, index=index_)
s2[0]

1

In [5]:
s2 = pd.Series({"x": 20, "y":40})
s2

x    20
y    40
dtype: int64

In [6]:
# +, -, *, /
s_1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
s_2 = pd.Series([4, 5, 6, 8], index=['a', 'f', 'c', 'b'])
s_3 = s_1 + s_2 
s_3

a     5.0
b    10.0
c     9.0
f     NaN
dtype: float64

### DataFrame

1. Create Dataframe

In [5]:
data = {
    'Name': ['Ali', 'sara', 'Diana'],
    'Age': [19, 29, 27],
    'City': ['Shiraz', 'Tehran', 'Shiraz']
}
df = pd.DataFrame(data, index=["a", "b", "c"])
df

Unnamed: 0,Name,Age,City
a,Ali,19,Shiraz
b,sara,29,Tehran
c,Diana,27,Shiraz


In [6]:
type(df)

pandas.core.frame.DataFrame

2. Access Data

In [7]:
df

Unnamed: 0,Name,Age,City
a,Ali,19,Shiraz
b,sara,29,Tehran
c,Diana,27,Shiraz


In [8]:
type(df['Age'])

pandas.core.series.Series

In [9]:
df['Age']

a    19
b    29
c    27
Name: Age, dtype: int64

In [10]:
df[['City', 'Age']]

Unnamed: 0,City,Age
a,Shiraz,19
b,Tehran,29
c,Shiraz,27


In [11]:
df['Age'] > 20

a    False
b     True
c     True
Name: Age, dtype: bool

In [12]:
df[df['Age'] > 20]

Unnamed: 0,Name,Age,City
b,sara,29,Tehran
c,Diana,27,Shiraz


In [13]:
df[df['Age'] > 20]['City']

b    Tehran
c    Shiraz
Name: City, dtype: object

## loc, iloc

In [14]:
data = {
    'Name': ['Ali', 'sara', 'Diana', 'a', 'b', 'c'],
    'Age': [19, 29, 27, 12, 3, 5],
    'City': ['Shiraz', 'Tehran', 'Shiraz', 'c1', 'c2', 'c3'],
    'Name1': ['Ali', 'sara', 'Diana', 'a', 'b', 'c'],
    'Age1': [19, 29, 27, 12, 3, 5],
    'City1': ['Shiraz', 'Tehran', 'Shiraz', 'c1', 'c2', 'c3']
}
df = pd.DataFrame(data, index=["a", "b", "c", "d", "e", "f"])
df

Unnamed: 0,Name,Age,City,Name1,Age1,City1
a,Ali,19,Shiraz,Ali,19,Shiraz
b,sara,29,Tehran,sara,29,Tehran
c,Diana,27,Shiraz,Diana,27,Shiraz
d,a,12,c1,a,12,c1
e,b,3,c2,b,3,c2
f,c,5,c3,c,5,c3


In [15]:
# rows ====> 1:3 
# column ===> 1:3
df.iloc[1:3, 1:3]

Unnamed: 0,Age,City
b,29,Tehran
c,27,Shiraz


In [16]:
df.loc["b":"c", "Age":"City"]

Unnamed: 0,Age,City
b,29,Tehran
c,27,Shiraz


In [20]:
df.loc["a", "Age":"City"]

Age         19
City    Shiraz
Name: a, dtype: object

In [21]:
df

Unnamed: 0,Name,Age,City,Name1,Age1,City1
a,Ali,19,Shiraz,Ali,19,Shiraz
b,sara,29,Tehran,sara,29,Tehran
c,Diana,27,Shiraz,Diana,27,Shiraz
d,a,12,c1,a,12,c1
e,b,3,c2,b,3,c2
f,c,5,c3,c,5,c3


In [22]:
df.loc[::2, ["Name", "Age", "Age1"]]

Unnamed: 0,Name,Age,Age1
a,Ali,19,19
c,Diana,27,27
e,b,3,3


3. Add

In [25]:
data = {
    'Name': ['Ali', 'sara', 'Diana'],
    'Age': [19, 29, 27]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Ali,19
1,sara,29
2,Diana,27


In [27]:
# add column
df["City"] = ["Shiraz", "Shiraz", "Tehran"]
df

Unnamed: 0,Name,Age,City
0,Ali,19,Shiraz
1,sara,29,Shiraz
2,Diana,27,Tehran


In [28]:
# add row
df.loc[len(df)] = ["Sara", 18, "Tehran"]
df

Unnamed: 0,Name,Age,City
0,Ali,19,Shiraz
1,sara,29,Shiraz
2,Diana,27,Tehran
3,Sara,18,Tehran


4. Remove

In [31]:
df.drop(columns=["Age", "City"], inplace=True)

In [32]:
df

Unnamed: 0,Name
0,Ali
1,sara
2,Diana
3,Sara


In [33]:
df.drop(index=0, inplace=True)

In [34]:
df

Unnamed: 0,Name
1,sara
2,Diana
3,Sara


## csv

In [35]:
df = pd.read_csv("CoffeeAndCode.csv")

In [36]:
df

Unnamed: 0,CodingHours,CoffeeCupsPerDay,CoffeeTime,CodingWithoutCoffee,CoffeeType,CoffeeSolveBugs,Gender,Country,AgeRange
0,8,2,Before coding,Yes,Caffè latte,Sometimes,Female,Lebanon,18 to 29
1,3,2,Before coding,Yes,Americano,Yes,Female,Lebanon,30 to 39
2,5,3,While coding,No,Nescafe,Yes,Female,Lebanon,18 to 29
3,8,2,Before coding,No,Nescafe,Yes,Male,Lebanon,
4,10,3,While coding,Sometimes,Turkish,No,Male,Lebanon,18 to 29
...,...,...,...,...,...,...,...,...,...
95,6,2,Before coding,Yes,Nescafe,Yes,Male,Lebanon,18 to 29
96,4,1,Before coding,Sometimes,Nescafe,Sometimes,Female,Lebanon,18 to 29
97,10,3,Before coding,Yes,Cappuccino,Yes,Male,Lebanon,Under 18
98,2,2,While coding,Sometimes,Espresso (Short Black),Sometimes,Female,Lebanon,18 to 29


In [37]:
type(df)

pandas.core.frame.DataFrame

In [38]:
df.columns

Index(['CodingHours', 'CoffeeCupsPerDay', 'CoffeeTime', 'CodingWithoutCoffee',
       'CoffeeType', 'CoffeeSolveBugs', 'Gender', 'Country', 'AgeRange'],
      dtype='object')

In [40]:
df.head()

Unnamed: 0,CodingHours,CoffeeCupsPerDay,CoffeeTime,CodingWithoutCoffee,CoffeeType,CoffeeSolveBugs,Gender,Country,AgeRange
0,8,2,Before coding,Yes,Caffè latte,Sometimes,Female,Lebanon,18 to 29
1,3,2,Before coding,Yes,Americano,Yes,Female,Lebanon,30 to 39
2,5,3,While coding,No,Nescafe,Yes,Female,Lebanon,18 to 29
3,8,2,Before coding,No,Nescafe,Yes,Male,Lebanon,
4,10,3,While coding,Sometimes,Turkish,No,Male,Lebanon,18 to 29


In [42]:
df.tail(4)

Unnamed: 0,CodingHours,CoffeeCupsPerDay,CoffeeTime,CodingWithoutCoffee,CoffeeType,CoffeeSolveBugs,Gender,Country,AgeRange
96,4,1,Before coding,Sometimes,Nescafe,Sometimes,Female,Lebanon,18 to 29
97,10,3,Before coding,Yes,Cappuccino,Yes,Male,Lebanon,Under 18
98,2,2,While coding,Sometimes,Espresso (Short Black),Sometimes,Female,Lebanon,18 to 29
99,10,4,Before coding,Sometimes,Double Espresso (Doppio),Sometimes,Male,Lebanon,18 to 29


In [45]:
rows, columns = df.shape
columns

9

In [46]:
df.describe()

Unnamed: 0,CodingHours,CoffeeCupsPerDay
count,100.0,100.0
mean,6.41,2.89
std,2.644205,1.613673
min,1.0,1.0
25%,4.0,2.0
50%,7.0,2.5
75%,8.0,4.0
max,10.0,8.0


In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   CodingHours          100 non-null    int64 
 1   CoffeeCupsPerDay     100 non-null    int64 
 2   CoffeeTime           100 non-null    object
 3   CodingWithoutCoffee  100 non-null    object
 4   CoffeeType           99 non-null     object
 5   CoffeeSolveBugs      100 non-null    object
 6   Gender               100 non-null    object
 7   Country              100 non-null    object
 8   AgeRange             98 non-null     object
dtypes: int64(2), object(7)
memory usage: 7.2+ KB


In [49]:
df.isnull().sum()

CodingHours            0
CoffeeCupsPerDay       0
CoffeeTime             0
CodingWithoutCoffee    0
CoffeeType             1
CoffeeSolveBugs        0
Gender                 0
Country                0
AgeRange               2
dtype: int64

In [50]:
df.columns

Index(['CodingHours', 'CoffeeCupsPerDay', 'CoffeeTime', 'CodingWithoutCoffee',
       'CoffeeType', 'CoffeeSolveBugs', 'Gender', 'Country', 'AgeRange'],
      dtype='object')

In [52]:
df["AgeRange"].nunique()

5

In [53]:
df["AgeRange"].unique()

array(['18 to 29', '30 to 39', nan, '40 to 49', 'Under 18', '50 to 59'],
      dtype=object)

In [54]:
df["AgeRange"].value_counts()

18 to 29    60
30 to 39    29
40 to 49     6
Under 18     2
50 to 59     1
Name: AgeRange, dtype: int64

In [55]:
df.nunique()

CodingHours            10
CoffeeCupsPerDay        8
CoffeeTime              7
CodingWithoutCoffee     3
CoffeeType              8
CoffeeSolveBugs         3
Gender                  2
Country                 1
AgeRange                5
dtype: int64

In [57]:
df["CoffeeTime"].unique()


array(['Before coding', 'While coding', 'Before and while coding',
       'In the morning', 'All the time', 'After coding',
       'No specific time'], dtype=object)

In [58]:
df["CoffeeTime"].value_counts()

While coding               61
Before coding              25
Before and while coding     4
All the time                4
In the morning              3
After coding                2
No specific time            1
Name: CoffeeTime, dtype: int64

#### Mapping

In [65]:
data = {
    'City': ['New York', 'London', 'Chicago', 'Manchester'],
    'Country': ['US', 'UK', 'US', 'UK']
}
df = pd.DataFrame(data)
county_mapping = {
    "US": "United States",
    "UK": "England"
}
df["Country"] = df["Country"].map(county_mapping)

city_mapping = {
    "New York": "NYC"
}
df["City"] = df["City"].replace(city_mapping)
df

Unnamed: 0,City,Country
0,NYC,United States
1,London,England
2,Chicago,United States
3,Manchester,England


In [67]:
data = {
    'Product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones'],
    'Price': [1000, np.nan, 600, np.nan, 150],
    'Stock': ['A', 'B', np.nan, 'C', np.nan]
}

df = pd.DataFrame(data)

df.fillna({"Price":0, "Stock": "Unknowm"}, inplace=True)
df

Unnamed: 0,Product,Price,Stock
0,Laptop,1000.0,A
1,Phone,0.0,B
2,Tablet,600.0,Unknowm
3,Monitor,0.0,C
4,Headphones,150.0,Unknowm


In [72]:
data = {
    'Product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones'],
    'Price': [np.nan, np.nan, 600, np.nan, np.nan],
    'Stock': ['A', 'B', np.nan, 'C', np.nan]
}

df = pd.DataFrame(data)
df.fillna(method="bfill", inplace=True)
df

Unnamed: 0,Product,Price,Stock
0,Laptop,600.0,A
1,Phone,600.0,B
2,Tablet,600.0,C
3,Monitor,,C
4,Headphones,,


In [73]:
data = {
    'Product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones'],
    'Price': [np.nan, np.nan, 600, np.nan, np.nan],
    'Stock': ['A', 'B', np.nan, 'C', np.nan]
}

df = pd.DataFrame(data)
df.fillna(method="ffill", inplace=True)
df

Unnamed: 0,Product,Price,Stock
0,Laptop,,A
1,Phone,,B
2,Tablet,600.0,B
3,Monitor,600.0,C
4,Headphones,600.0,C


In [88]:
data = {
    'Product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones'],
    'Price': [np.nan, np.nan, 600, np.nan, np.nan],
    'Stock': ['A', 'B', np.nan, 'C', "C"]
}

df = pd.DataFrame(data)
df["Price"].fillna(df["Price"].mean(), inplace=True)
df['Stock'].mode()[0]
df["Stock"].fillna(df['Stock'].mode()[0], inplace=True)
df

Unnamed: 0,Product,Price,Stock
0,Laptop,600.0,A
1,Phone,600.0,B
2,Tablet,600.0,C
3,Monitor,600.0,C
4,Headphones,600.0,C


In [81]:
type(df["Price"].mean())

float

In [89]:
df

Unnamed: 0,Product,Price,Stock
0,Laptop,600.0,A
1,Phone,600.0,B
2,Tablet,600.0,C
3,Monitor,600.0,C
4,Headphones,600.0,C


In [90]:
df["Price"] = df["Price"] / 100 
df

Unnamed: 0,Product,Price,Stock
0,Laptop,6.0,A
1,Phone,6.0,B
2,Tablet,6.0,C
3,Monitor,6.0,C
4,Headphones,6.0,C


In [91]:
df.to_csv("test.csv", index=False)