In [None]:
import pandas as pd
import numpy as np

# Create DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [22, 27, 22, 32],
    'Salary': [50000, 60000, 55000, 70000]
}
df = pd.DataFrame(data)
print("DataFrame:")
print(df)

# Series example
series_example = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
print("\nSeries Example:")
print(series_example)

# DataFrame example (column, row, cell)
print("\nDataFrame Example:")
print("Accessing 'Age' column:")
print(df['Age'])
print("\nAccessing row with index 1:")
print(df.loc[1])
print("\nAccessing specific cell (Name of row 2):")
print(df.at[2, 'Name'])

# Read CSV example
df_csv = pd.read_csv('data.csv')
print("\nDataFrame from CSV (head):")
print(df_csv.head())
print("\nDataFrame from CSV (tail):")
print(df_csv.tail())
print("\nDataFrame from CSV (info):")
print(df_csv.info())

# Set maximum number of rows to display
pd.set_option('display.max_rows', 10)
print("\nDataFrame with MAX_ROWS set to 10:")
print(df_csv)

# Average age
average_age = df['Age'].mean()
print("\nAverage Age:", average_age)

# Drop NA rows
df_cleaned = df.dropna()
print("\nCleaned DataFrame (dropped NA):")
print(df_cleaned)

# Fill NA in Salary
df_filled = df.fillna({'Salary': 0})
print("\nDataFrame with filled NA:")
print(df_filled)

# Replace empty string with NaN
df_empty = df.replace('', np.nan)
print("\nDataFrame with empty cells replaced with NaN:")
print(df_empty)

# Replace NaN in Age with mean
mean_age = df_empty['Age'].mean()
df_mean = df_empty.copy()
df_mean['Age'] = df_mean['Age'].fillna(mean_age)
print("\nDataFrame with NaN in 'Age' replaced with mean:")
print(df_mean)

# Replace NaN in Age with median
median_age = df_empty['Age'].median()
df_median = df_empty.copy()
df_median['Age'] = df_median['Age'].fillna(median_age)
print("\nDataFrame with NaN in 'Age' replaced with median:")
print(df_median)

# Replace NaN in Age with mode
mode_age = df_empty['Age'].mode()[0]
df_mode = df_empty.copy()
df_mode['Age'] = df_mode['Age'].fillna(mode_age)
print("\nDataFrame with NaN in 'Age' replaced with mode:")
print(df_mode)

# Age as string
df_str_age = df.copy()
df_str_age['Age'] = df_str_age['Age'].astype(str)
print("\nDataFrame with Age as string:")
print(df_str_age)

# Corrected Age data (digits only)
df_corrected = df_str_age[df_str_age['Age'].str.isdigit()]
print("\nDataFrame with corrected Age data:")
print(df_corrected)

# Remove duplicates
df_no_duplicates = df_str_age.drop_duplicates()
print("\nDataFrame with duplicates removed:")
print(df_no_duplicates)

# Correlations
correlation = df.select_dtypes(include=[np.number]).corr()
print("\nDataFrame with correlations:")
print(correlation)

# Save DataFrame to CSV
df.to_csv('output.csv', index=False)
print("\nDataFrame saved to 'output.csv'.")


DataFrame:
      Name  Age  Salary
0    Alice   22   50000
1      Bob   27   60000
2  Charlie   22   55000
3    David   32   70000

Series Example:
a    1
b    2
c    3
d    4
dtype: int64

DataFrame Example:
Accessing 'Age' column:
0    22
1    27
2    22
3    32
Name: Age, dtype: int64

Accessing row with index 1:
Name        Bob
Age          27
Salary    60000
Name: 1, dtype: object

Accessing specific cell (Name of row 2):
Charlie
   Name  Age  Salary
  Alice   22   50000
    Bob   27   60000
Charlie   22   55000
  David   32   70000

DataFrame from CSV (head):
      Name  Age  Salary  Experience
0    Alice   24   50000           2
1      Bob   27   60000           5
2  Charlie   22   55000           0
3    David   32   70000          10
4      Eve   29   80000           7

DataFrame from CSV (tail):
     Name  Age  Salary  Experience
8     Ian   31   90000           8
9    Jack   25   62000           1
10  Kathy   23   58000           2
11   Liam   34   95000          12
12    Mia