# progress-ml-model

Use the "Run" button to execute the code.

In [59]:
import pandas as pd
import numpy as np

In [60]:
df = pd.read_csv('tableConvert.com_d5ibz0.csv')

In [61]:
df

Unnamed: 0,ProjectID,TaskID,TaskDescription,ProjectDuration,TaskDuration,TaskEffort,ResourceAllocation,StartDate,EndDate,AssignedPerson
0,1,101,Requirement Gathering,60,10,20,3,2023-01-01,2023-02-28,John Doe
1,1,102,Design,60,15,30,2,2023-03-01,2023-04-30,Jane Smith
2,1,103,Development,60,30,40,5,2023-05-01,2023-07-01,Bob Johnson
3,1,104,Testing,60,15,25,4,2023-07-02,2023-08-30,Alice Williams
4,2,201,Requirement Gathering,45,8,18,2,2023-02-01,2023-03-18,Charlie Brown
...,...,...,...,...,...,...,...,...,...,...
195,49,916,Testing,50,14,22,2,2023-07-16,2023-08-30,Henry Turner
196,50,917,Requirement Gathering,70,14,28,4,2023-01-10,2023-03-20,Elijah Walker
197,50,918,Design,70,20,35,3,2023-03-21,2023-05-20,Chloe Martinez
198,50,919,Development,70,38,50,6,2023-05-21,2023-08-05,Lucas Fisher


In [62]:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

df['StartDate'] = pd.to_datetime(df['StartDate'])
df['EndDate'] = pd.to_datetime(df['EndDate'])
df['ProjectDuration'] = (df['EndDate'] - df['StartDate']).dt.days
df['TaskStartDate'] = pd.to_datetime(df['StartDate']) + pd.to_timedelta(df['TaskDuration'], unit='D')


##  task completion percentage

In [63]:
df['TaskCompletion'] = (df['TaskDuration'] / df['ProjectDuration']) * 100

## Selecting relevant features for training the model

In [64]:
features = ['ProjectDuration', 'TaskDuration', 'ResourceAllocation']

## Applying Linear Regression and splitting training and testing data 

In [65]:

X_train, X_test, y_train, y_test = train_test_split(df[features], df['TaskCompletion'], test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
r2 = r2_score(y_test, y_pred)
print(f'R2 Score: {r2}')

Mean Absolute Error: 1.6566857477098715
R2 Score: 0.9708548890480743


## Task Completion prediction

In [67]:

def evaluate_team_performance(ProjectID=None):
    if ProjectID is not None:
        team_data = df[df['ProjectID'] == ProjectID]

        team_data.loc[:, 'StartDate'] = pd.to_datetime(team_data['StartDate'])
        team_data.loc[:, 'EndDate'] = pd.to_datetime(team_data['EndDate'])
        team_data.loc[:, 'ProjectDuration'] = (team_data['EndDate'] - team_data['StartDate']).dt.days
        team_data.loc[:, 'TaskStartDate'] = team_data['StartDate'] + pd.to_timedelta(team_data['TaskDuration'], unit='D')
        team_data.loc[:, 'TaskCompletion'] = (team_data['TaskDuration'] / team_data['ProjectDuration']) * 100

        team_features = team_data[features]

 
        team_data['PredictedTaskCompletion'] = model.predict(team_features)

        print(f'\nPredicted Task Completion for Project {ProjectID}:')
        print(team_data[['TaskDescription', 'AssignedPerson', 'TaskCompletion', 'PredictedTaskCompletion']])


In [69]:
evaluate_team_performance(ProjectID=8)


Predicted Task Completion for Project 8:
          TaskDescription AssignedPerson  TaskCompletion  \
28  Requirement Gathering  Sophia Hughes       16.216216   
29                 Design     Mia Turner       30.000000   
30            Development   James Taylor       46.666667   
31                Testing   Grace Walker       20.000000   

    PredictedTaskCompletion  
28                13.639251  
29                29.899353  
30                49.679469  
31                18.000278  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['PredictedTaskCompletion'] = model.predict(team_features)
