# progress-ml-model

Use the "Run" button to execute the code.

In [15]:
import pandas as pd
import numpy as np

In [16]:
df = pd.read_csv('data2.csv')

In [17]:
df

Unnamed: 0,ProjectID,TaskID,ProjectDuration,TaskDuration,TaskEffort,ResourceAllocation,StartDate,EndDate,Name,Domain Assigned
0,1,101,60,10,20,3,2023-01-01,2023-02-28,Shaswat Singh,Design
1,1,102,60,15,30,2,2023-03-01,2023-04-30,Nidhi Sharma,Design
2,1,103,60,30,40,5,2023-05-01,2023-07-01,Arjun Patel,ML
3,1,104,60,15,25,4,2023-07-02,2023-08-30,Aarna Singh,Design
4,2,201,45,8,18,2,2023-02-01,2023-03-18,Advait Reddy,Mobile
...,...,...,...,...,...,...,...,...,...,...
96,24,346,50,14,22,2,2023-07-16,2023-08-30,Rohit,Backend
97,25,347,70,14,28,4,2023-01-10,2023-03-20,Guru,Design
98,25,348,70,20,35,3,2023-03-21,2023-05-20,Tanishka,Frontend
99,25,349,70,38,50,6,2023-05-21,2023-08-05,Subh,Backend


In [18]:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

df['StartDate'] = pd.to_datetime(df['StartDate'])
df['EndDate'] = pd.to_datetime(df['EndDate'])
df['ProjectDuration'] = (df['EndDate'] - df['StartDate']).dt.days
df['TaskStartDate'] = pd.to_datetime(df['StartDate']) + pd.to_timedelta(df['TaskDuration'], unit='D')


##  task completion percentage

In [19]:
df['TaskCompletion'] = (df['TaskDuration'] / df['ProjectDuration']) * 100

## Selecting relevant features for training the model

In [20]:
features = ['ProjectDuration', 'TaskDuration', 'ResourceAllocation']

## Applying Linear Regression and splitting training and testing data 

In [21]:

X_train, X_test, y_train, y_test = train_test_split(df[features], df['TaskCompletion'], test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 1.9567378146224197


## Task Completion prediction

In [22]:

def evaluate_team_performance(ProjectID=None):
    if ProjectID is not None:
        team_data = df[df['ProjectID'] == ProjectID]

        team_data.loc[:, 'StartDate'] = pd.to_datetime(team_data['StartDate'])
        team_data.loc[:, 'EndDate'] = pd.to_datetime(team_data['EndDate'])
        team_data.loc[:, 'ProjectDuration'] = (team_data['EndDate'] - team_data['StartDate']).dt.days
        team_data.loc[:, 'TaskStartDate'] = team_data['StartDate'] + pd.to_timedelta(team_data['TaskDuration'], unit='D')
        team_data.loc[:, 'TaskCompletion'] = (team_data['TaskDuration'] / team_data['ProjectDuration']) * 100

        team_features = team_data[features]

 
        team_data['PredictedTaskCompletion'] = model.predict(team_features)

        print(f'\nPredicted Task Completion for Project {ProjectID}:')
        print(team_data[['Domain Assigned', 'Name', 'TaskCompletion', 'PredictedTaskCompletion']])

In [23]:
evaluate_team_performance(ProjectID=20)


Predicted Task Completion for Project 20:
   Domain Assigned              Name  TaskCompletion  PredictedTaskCompletion
76              ML        Mira Menon       20.289855                19.520423
77          Mobile         Dev Mehta       33.333333                33.110310
78         Backend  Ishika Singhania       50.000000                54.219025
79         Backend        Yash Patel       22.857143                22.363791


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['PredictedTaskCompletion'] = model.predict(team_features)


In [26]:
import pickle
pickle.dump(model, open('model.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))