##pandas practice 


In [1]:
import pandas as pd
import numpy as np

data = {
    "Name": ["Amit", "Rahul", "Priya", "Neha", "Rahul", "Amit"],
    "City": ["Delhi", "Mumbai", "Delhi", "Chennai", "Mumbai", "Delhi"],
    "Age": [25, 30, np.nan, 28, 30, 26],
    "Salary": [45000, 52000, 61000, 47000, np.nan, 43000],
    "Department": ["Sales", "HR", "IT", "IT", "HR", "Sales"]
}
df = pd.DataFrame(data)
df.to_csv("sample.csv", index=False)

df = pd.read_csv("sample.csv")
print("\nDATA\n", df)

print("\nColumns:", df.columns)
print("\nShape:", df.shape)
print("\nInfo:")
df.info()
print("\nDescribe:\n", df.describe())

print("\nSingle column:\n", df["Name"])
print("\nMultiple columns:\n", df[["Name", "Salary"]])
print("\nRows 0-2:\n", df.iloc[0:3])
print("\nCity = Delhi:\n", df[df["City"] == "Delhi"])

print("\nAge > 26:\n", df[df["Age"] > 26])
print("\nQuery Salary > 45000:\n", df.query("Salary > 45000"))

print("\nMissing counts:\n", df.isna().sum())
df["Age"].fillna(df["Age"].mean(), inplace=True)
df["Salary"].fillna(df["Salary"].median(), inplace=True)
print("\nAfter filling:\n", df)

df["Bonus"] = df["Salary"] * 0.10
df["Status"] = np.where(df["Salary"] > 50000, "High Paid", "Avg Paid")
print("\nNew columns:\n", df)

print("\nGroupBy Department Salary mean:\n", df.groupby("Department")["Salary"].mean())
print("\nGroupBy City summary:\n", df.groupby("City").agg({"Salary": "mean", "Age": "max"}))

print("\nSorted by Salary:\n", df.sort_values("Salary", ascending=False))
print("\nUnique Cities:", df["City"].unique())
print("\nCity Value Counts:\n", df["City"].value_counts())

df["Name_Length"] = df["Name"].apply(lambda x: len(x))
print("\nApply result:\n", df)

extra = pd.DataFrame({
    "Name": ["Amit", "Priya"],
    "Experience": [2, 4]
})
merged = df.merge(extra, on="Name", how="left")
print("\nMerged Data:\n", merged)

concat_df = pd.concat([df, merged], axis=0)
print("\nConcatenated Data:\n", concat_df)

pivot = pd.pivot_table(df, values="Salary", index="Department", columns="City", aggfunc="mean")
print("\nPivot Table:\n", pivot)

df.to_csv("final_output.csv", index=False)
df.to_excel("final_output.xlsx", index=False)
print("\nCompleted. Files exported: final_output.csv & final_output.xlsx")



DATA
     Name     City   Age   Salary Department
0   Amit    Delhi  25.0  45000.0      Sales
1  Rahul   Mumbai  30.0  52000.0         HR
2  Priya    Delhi   NaN  61000.0         IT
3   Neha  Chennai  28.0  47000.0         IT
4  Rahul   Mumbai  30.0      NaN         HR
5   Amit    Delhi  26.0  43000.0      Sales

Columns: Index(['Name', 'City', 'Age', 'Salary', 'Department'], dtype='object')

Shape: (6, 5)

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        6 non-null      object 
 1   City        6 non-null      object 
 2   Age         5 non-null      float64
 3   Salary      5 non-null      float64
 4   Department  6 non-null      object 
dtypes: float64(2), object(3)
memory usage: 372.0+ bytes

Describe:
              Age        Salary
count   5.000000      5.000000
mean   27.800000  49600.000000
std     2.280351   7197.221686
mi

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Salary"].fillna(df["Salary"].median(), inplace=True)


ModuleNotFoundError: No module named 'openpyxl'