In [1]:
import pandas as pd
data = {'Customer_ID': [101, 102, 103, 101, 104, 102, 101, 105, 102, 103],
        'Purchase_Amount': [200, 150, 180, 220, 300, 200, 100, 400, 250, 300]}
df = pd.DataFrame(data)
total_purchases = df.groupby('Customer_ID', as_index=False)['Purchase_Amount'].sum()
print("Total Purchases per Customer:")
print(total_purchases)
top_customers = total_purchases.sort_values(by='Purchase_Amount', ascending=False).head(3)
print("\nTop 3 Frequent Customers:")
print(top_customers)

Total Purchases per Customer:
   Customer_ID  Purchase_Amount
0          101              520
1          102              600
2          103              480
3          104              300
4          105              400

Top 3 Frequent Customers:
   Customer_ID  Purchase_Amount
1          102              600
0          101              520
2          103              480


In [2]:
from sklearn.linear_model import LinearRegression
import numpy as np
data = {'Square_Feet': [1500, 2000, 2500, 3000, 3500],
        'Price': [300000, 400000, 500000, 600000, 700000]}
X = np.array(data['Square_Feet']).reshape(-1, 1)  
y = np.array(data['Price'])                     
model = LinearRegression()
model.fit(X, y)
X_test = np.array([[1800], [2800]])
predicted_prices = model.predict(X_test)
print("Predicted Prices:")
print(predicted_prices)

Predicted Prices:
[360000. 560000.]


In [3]:
import pandas as pd
data = {'Category': ['A', 'B', 'C', 'A', 'D', 'B', 'A', 'E', 'B', 'C', 'C', 'A'],
        'Value': [10, 15, 20, 30, 25, 18, 22, 40, 35, 50, 45, 15]}
df = pd.DataFrame(data)
category_counts = df.groupby('Category').size().reset_index(name='Count')
print("Total Occurrences per Category:")
print(category_counts)
top_categories = category_counts.sort_values(by='Count', ascending=False).head(3)
print("\nTop 3 Frequent Categories:")
print(top_categories)

Total Occurrences per Category:
  Category  Count
0        A      4
1        B      3
2        C      3
3        D      1
4        E      1

Top 3 Frequent Categories:
  Category  Count
0        A      4
1        B      3
2        C      3


In [4]:
import pandas as pd
import numpy as np
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
        'Age': [25, 30, None, 35, None]}
df = pd.DataFrame(data)
print("Original Data:")
print(df)
mean_age = df['Age'].mean()
df['Age'].fillna(mean_age, inplace=True)
print("\nData after Imputation:")
print(df)

Original Data:
      Name   Age
0    Alice  25.0
1      Bob  30.0
2  Charlie   NaN
3    David  35.0
4      Eve   NaN

Data after Imputation:
      Name   Age
0    Alice  25.0
1      Bob  30.0
2  Charlie  30.0
3    David  35.0
4      Eve  30.0


In [5]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
data = {'Experience': [1, 2, 3, 4, 5], 
        'Salary': [30000, 35000, 40000, 45000, 50000]}
df = pd.DataFrame(data)
X = df['Experience'].values.reshape(-1, 1)  # 2D array
y = df['Salary'].values
model = LinearRegression()
model.fit(X, y)
predicted_salary = model.predict(np.array([[6]]))
print(f"Predicted Salary for 6 years of experience: ${int(predicted_salary[0])}")

Predicted Salary for 6 years of experience: $55000


In [6]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
import numpy as np
data = {'word_count': [100, 150, 200, 120, 180, 220], 
        'is_spam': ['ham', 'spam', 'spam', 'ham', 'spam', 'spam']}
df = pd.DataFrame(data)
X = df['word_count'].values.reshape(-1, 1)
y = df['is_spam'].map({'ham': 0, 'spam': 1}).values  # Convert to 0/1
model = LogisticRegression()
model.fit(X, y)
prediction = model.predict(np.array([[200]]))
result = 'Spam' if prediction[0] == 1 else 'Ham'
print(f"Prediction for email with 200 words: {result}")

Prediction for email with 200 words: Spam
