<img src="https://cdn.activestate.com/wp-content/uploads/2020/10/everything-about-pandas-1024x512.png" /> 

## 1) Setup

Import pandas (as `pd`) and a few helpers. We'll also set a random generator for reproducibility.


In [None]:
import pandas as pd

pd.__version__

In [None]:
print(pd.__version__)

## 2) Core Objects: `Series` and `DataFrame`

- A **Series** is a 1D labeled array (values + index).
- A **DataFrame** is a 2D table of columns (each a Series) sharing the same index.


In [None]:
# Series
s = pd.Series([10, 20, 30], index=['a', 'b', 'c'], dtype='int64')
display(s)
# print("values:", s.values, "| index:", s.index.tolist(), "| dtype:", s.dtype)

In [None]:
# DataFrame from dict of lists
df = pd.DataFrame({
    "city": ["Baku", "Tbilisi", "Yerevan", "Baku", "Tbilisi", "Harat", "Kabul"],
    "year": [2023, 2023, 2023, 2024, 2024, 2025, 2025],
    "sales": [100, 80, 75, 120, 95, 100, 90],
    "price": [1000, 1500, 500, 2000, 200, 1000, 1500]
})
df

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> pd.read_csv()  </h2>

In [None]:
df1 = pd.read_csv("students.csv")


In [None]:
df1

In [None]:
type(df1)

In [None]:
df1.head()

In [None]:
df1.tail()

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> Shape  </h2>

In [None]:
df1.shape

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> Columns  </h2>

In [None]:
df1.columns

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> dtypes </h2>

In [None]:
df1.dtypes

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> info() </h2>

In [None]:
df1.info()

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> describe() </h2>

In [None]:
df1.describe()

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> Choosing Rows , Columns </h2>

In [None]:
df1["subject"][0:5]

In [None]:
df1["city"][0:5]

In [None]:
df1["passed"][0:5]

In [None]:
df1.city[0:5]

In [None]:
df1[['city','passed','age']][0:5]

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> value_counts() </h2>

In [None]:
df1['city'].value_counts()

In [None]:
df1['passed'].value_counts()

In [None]:
df1['passed'].value_counts(normalize=True) *100

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> crosstab() </h2>

In [None]:
pd.crosstab(df1.age, df1.passed)

In [None]:
pd.crosstab(df1.age, df1.passed, normalize = "index")

In [None]:
pd.crosstab(df1.age, df1.passed, normalize = "columns")

In [None]:
pd.crosstab(df1.age, df1.passed, normalize = "all")

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> Conditions </h2>

In [None]:
students_age = df1[df1.age <= 16]
students_age[0:3]

In [None]:
students_age.shape

In [None]:
df1[df1.age <= 16][["gender","city"]].value_counts()

In [None]:
df1[df1.age <= 16]["gender"].value_counts()

In [None]:
df1[df1.age <= 16]["gender"].value_counts(normalize=True)

In [None]:
df1.columns

In [None]:
df1[df1.name.str.contains("Fuad")]

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> unique() </h2>

In [None]:
df1.age.unique()

In [None]:
df1[(df1.score >= 50) &
    (df1.hours_studied > 2)][["age", "city","subject", "passed"]][0:3]

In [None]:
df1[(df1.gender == M) &
    (df1.passed == True)][["age", "city","subject"]][0:3]

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> groupby() </h2>

In [None]:
df1.groupby('city')['score'].mean()

In [None]:
df1.groupby('city')['hours_studied'].mean()

In [None]:
df1.groupby('age')['hours_studied'].mean()

In [None]:
df1.groupby('score')['hours_studied'].mean()

In [None]:
df1.groupby(['score','gender'])['age'].mean()

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> Sort </h2>

In [None]:
df1.sort_values("score")

In [None]:
df1.sort_values("score", ascending=False)

# iloc , loc

In [None]:
df1

In [None]:
df1.iloc[3]

In [None]:
df1.iloc[[3,9,50]]

In [None]:
df1.iloc[[3,4],[1,2,3]]

In [None]:
df1.iloc[2:5, 3:7]

In [None]:
df1.iloc[:, 1:4]

# loc

In [None]:
df1

In [None]:
# df1

In [None]:
df2 = pd.read_csv("students.csv", index_col="name")

In [None]:
df2

In [None]:
df2.loc['Raul']

In [None]:
df2.loc[['Raul'],['subject','city','score']]

<div class="alert alert-block alert-success">
<h2 style="color:royalblue"> Visualization </h2>

In [None]:
import matplotlib.pyplot as plt

scores = df1["score"].dropna()  # حذف مقادیر NaN
# plt.figure()
plt.scatter(df1["hours_studied"], df1["score"])  # نمودار پراکندگی

plt.xlabel("hours_studied")
plt.ylabel("score")
plt.title("Score vs. Hours Studied")
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

scores = df1["score"].dropna()  # حذف مقادیر NaN

plt.figure()
plt.hist(scores, bins=15)  # تعداد بین‌ها را می‌توانید کم/زیاد کنید
plt.xlabel("score")
plt.ylabel("count")
plt.title("Score distribution")
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# 1) Age
ages = df1["age"].dropna()          # remove missing values
plt.figure()
plt.hist(ages)                      # default bins are fine for teaching
plt.xlabel("age")
plt.ylabel("count")
plt.title("Age - histogram")
plt.tight_layout()
plt.show()

# 2) Score
scores = df1["score"].dropna()
plt.figure()
plt.hist(scores)                    # you can try bins=15 if you want
plt.xlabel("score")
plt.ylabel("count")
plt.title("Score - histogram")
plt.tight_layout()
plt.show()

# 3) Hours studied
hours = df1["hours_studied"].dropna()
plt.figure()
plt.hist(hours)                     # try bins=20 for smoother look
plt.xlabel("hours_studied")
plt.ylabel("count")
plt.title("Hours studied - histogram")
plt.tight_layout()
plt.show()
