In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd

In [None]:
# Create a Series
data = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
print(data)

a    10
b    20
c    30
d    40
e    50
dtype: int64


In [None]:
# Create a DataFrame
'''
 you can also extract data from a csv or excel file
using pd.read_csv("file.csv") and pd.read_excel("file.xlsx") respectively.
'''
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "Los Angeles", "Chicago"]
}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [None]:
# print first few lines
print(df.head(2))
# print last few lines
print(df.tail(2))
# print the headers
print(df.columns)


    Name  Age         City
0  Alice   25     New York
1    Bob   30  Los Angeles
      Name  Age         City
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
Index(['Name', 'Age', 'City'], dtype='object')


In [None]:
# Access a column
print(df["Name"])

# Access a row by index
print(df.iloc[1])  # Second row

# Access a row by label
print(df.loc[0])  # First row

# Access a specific (R,C)
print(df.iloc[0, 1])  # First row, second column

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Name            Bob
Age              30
City    Los Angeles
Name: 1, dtype: object
Name       Alice
Age           25
City    New York
Name: 0, dtype: object
25


In [None]:
# Filter rows where Age > 25
filtered_df = df.loc[df["Age"] > 25]
print(filtered_df)

      Name  Age         City
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [None]:
# Add a new column
df["Salary"] = [50000, 60000, 70000]
print(df)

      Name  Age         City  Salary
0    Alice   25     New York   50000
1      Bob   30  Los Angeles   60000
2  Charlie   35      Chicago   70000


In [None]:
print(df.describe())  # Summary statistics
print(df.info())      # DataFrame information

        Age   Salary
count   3.0      3.0
mean   30.0  60000.0
std     5.0  10000.0
min    25.0  50000.0
25%    27.5  55000.0
50%    30.0  60000.0
75%    32.5  65000.0
max    35.0  70000.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
 3   Salary  3 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 228.0+ bytes
None


In [None]:
# Sort by Age
df_sorted = df.sort_values(by="Age", ascending=False)
print(df_sorted)

      Name  Age         City  Salary
2  Charlie   35      Chicago   70000
1      Bob   30  Los Angeles   60000
0    Alice   25     New York   50000


In [None]:
# Group by a column and calculate the mean
grouped = df.groupby("City")["Age"].mean() #basically gives mean age of rows with a particular city
print(grouped)

City
Chicago        35.0
Los Angeles    30.0
New York       25.0
Name: Age, dtype: float64


In [None]:
# conditional changes
df.loc[df["Age"] > 30, "Salary"] = 80000
print(df)

      Name  Age         City  Salary
0    Alice   25     New York   50000
1      Bob   30  Los Angeles   60000
2  Charlie   35      Chicago   80000


In [None]:
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df2 = pd.DataFrame({"A": [5, 6], "B": [7, 8]})
result = pd.concat([df1, df2])
print(result)

   A  B
0  1  3
1  2  4
0  5  7
1  6  8


In [None]:
df1 = pd.DataFrame({"ID": [1, 2], "Name": ["Alice", "Bob"]})
df2 = pd.DataFrame({"ID": [1, 2], "Age": [25, 30]})
merged = pd.merge(df1, df2, on="ID")
print(merged)

   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
