<a href="https://colab.research.google.com/github/vamshibhukya58/Blood_Donation_Management_System/blob/main/furniture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("/content/Super_Store_data.csv", encoding='ISO-8859-1')

# Display the first few rows of the dataset
print(df.head())



In [None]:
# Check for missing values
print(df.isnull().sum())

# Fill or drop missing values
df = df.dropna()  # or df.fillna(method='ffill')

# Remove duplicates
df = df.drop_duplicates()

# Convert order date to datetime
df['Order Date'] = pd.to_datetime(df['Order Date'])

# Extract month and year from order date
df['Month'] = df['Order Date'].dt.month
df['Year'] = df['Order Date'].dt.year

# Drop non-numerical columns that are not needed
df = df.drop(['Order Date', 'Customer ID', 'Customer Name', 'Order ID', 'Product ID', 'Product Name', 'Ship Date'], axis=1)


In [None]:
# One-hot encoding for categorical variables
df = pd.get_dummies(df, columns=['Ship Mode', 'Segment', 'Country', 'City', 'State', 'Region', 'Category', 'Sub-Category'])


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot sales over time
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='Year', y='Sales')
plt.title('Sales Over Time')
plt.show()

# Correlation matrix
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True)
plt.show()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define features and target variable
X = df.drop(['Sales'], axis=1)
y = df['Sales']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R2 Score: {r2}')
