## Setup

1. Importing libraries
2. Creating utility methods

In [1]:
import pandas as pd
import math


def ManhattanDistance(point_array_1:list, point_array_2:list):
    
    length_1 = len(point_array_1)
    length_2 = len(point_array_2)

    if length_2 != length_1:
        raise Exception("The dimensions of given points are not equal")
    

    else:
        sum = 0
        for i in range(length_1):
            sum += abs(point_array_1[i] - point_array_2[i])
        
        return sum
    
def Euclidean(x:list, y:list):
    distance = math.sqrt(sum([(a - b) ** 2 for a, b in zip(x, y)]))
    return distance

## Datasets

1. Loading datasets
2. Normaliznig datasets to use in models

In [2]:
# Loading training dataset
train_data_url = '../UCP-Plus.csv'
data = pd.read_csv(train_data_url)
df = data
efforts = data["Effort"] # Extracting effort column
values = df.values.tolist() # Converting dataframe to list
l = len(values) # Number of rows in the dataset


test_data_url = "../testData.csv"
testDF = pd.read_csv(test_data_url) # Loading test dataset
testEfforts = testDF["Real_Effort"] # Extracting effort column

testData = testDF.values.tolist() # Converting dataframe to list
l_test = len(testData) # Number of rows in test dataset




## Method for calulating closest analogies

In [3]:


def getClosestCases(datapoint, k=2, onlyAvg=True):
    
    manDistances = [] # List of manhattan distances
    eucDistances = [] # List of euclidean distances
    average = [] # List of average distances


    for i in range(1, l):

        # i-th point of the dataset
        compared = values[i]


        euc = Euclidean(datapoint, compared) # calculating euclidean distance
        eucDistances.append({"distance":euc, "index":i}) # appending result to list


        man = ManhattanDistance(datapoint, compared) # calculating manhattan distance
        manDistances.append({"distance":man, "index":i}) # appending result to list


        avg = (euc + man)/2 # calculating average of manhattan and euclidean distances
        average.append({"distance":avg, "index":i}) # appending result to list

    # Sorting all distances
    average = sorted(average, key = lambda i: i['distance'])
    manDistances = sorted(manDistances, key = lambda i: i['distance'])
    eucDistances = sorted(eucDistances, key = lambda i: i['distance'])

    # using the average of both distances
    if onlyAvg:
        return average[:k]
    else:
        return average[:k], manDistances[:k], eucDistances[:k]



## Results of Test Data #1

In [4]:
cases_1 = getClosestCases(testData[0])
print("Closest 2 Analogies")
for case in cases_1:
    print("Test Data Project #" + str(case["index"]) + "\t -- Project Effort: " + str(round(case["distance"], 1)))
    

Closest 2 Analogies
Test Data Project #96	 -- Project Effort: 38.9
Test Data Project #97	 -- Project Effort: 168.7


## Results of Test Data #2

In [5]:
cases_2 = getClosestCases(testData[1])
print("Closest 2 Analogies")
for case in cases_2:
    print("Test Data Project #" + str(case["index"]) + "\t -- Project Effort: " + str(round(case["distance"], 1)))
    

Closest 2 Analogies
Test Data Project #88	 -- Project Effort: 125.8
Test Data Project #72	 -- Project Effort: 126.7


## Results of Test Data #3

In [6]:
cases_3 = getClosestCases(testData[2])
print("Closest 2 Analogies")
for case in cases_3:
    print("Test Data Project #" + str(case["index"]) + "\t -- Project Effort: " + str(round(case["distance"], 1)))

Closest 2 Analogies
Test Data Project #89	 -- Project Effort: 120.6
Test Data Project #86	 -- Project Effort: 122.9


## Results of Test Data #4

In [7]:
cases_4 = getClosestCases(testData[3])
print("Closest 2 Analogies")
for case in cases_4:
    print("Test Data Project #" + str(case["index"]) + "\t -- Project Effort: " + str(round(case["distance"], 1)))
    

Closest 2 Analogies
Test Data Project #93	 -- Project Effort: 201.1
Test Data Project #98	 -- Project Effort: 264.2


## Calculation Results for Test Data Project #1

In [8]:
def printResults(datapoint):
    results = []

    for i in range(1, l):

        # i-th point of the dataset
        compared = values[i]


        euc = Euclidean(datapoint, compared) # calculating euclidean distance
        man = ManhattanDistance(datapoint, compared) # calculating manhattan distance
        avg = (euc + man)/2 # calculating average of manhattan and euclidean distances

        results.append([i for i in compared])
        results[i-1].append(man)
        results[i-1].append(euc)
        results[i-1].append(avg)


        
    results = pd.DataFrame(results, columns=["SA","AA","CA","UAW","SUC","AUC","CUC","UUCW","TCF","ECF","Real_Effort", "Manhattan", "Euclidean", "Average"])
    return results

    
    

# Calculation Results of Test Data

## Case #1

In [9]:
printResults(testData[0])

Unnamed: 0,SA,AA,CA,UAW,SUC,AUC,CUC,UUCW,TCF,ECF,Real_Effort,Manhattan,Euclidean,Average
0,1.0,2.0,1.0,8.0,4.0,20.0,15.0,445.0,0.990,0.990,7962.0,5451.33,5115.041062,5283.185531
1,0.0,0.0,3.0,9.0,1.0,5.0,20.0,355.0,1.030,0.800,7935.0,5321.18,5083.834283,5202.507142
2,0.0,1.0,2.0,8.0,5.0,10.0,15.0,350.0,0.900,0.910,7805.0,5185.16,4953.712045,5069.436022
3,1.0,2.0,1.0,8.0,1.0,10.0,16.0,345.0,0.900,0.910,7758.0,5141.16,4906.578138,5023.869069
4,2.0,0.0,2.0,8.0,1.0,13.0,14.0,345.0,0.990,0.990,7643.0,5025.33,4791.663705,4908.496853
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,0.0,0.0,2.0,6.0,0.0,5.0,6.0,140.0,0.965,0.755,1696.0,1216.08,1159.461084,1187.770542
94,1.0,0.0,2.0,7.0,2.0,8.0,6.0,180.0,0.810,0.840,3670.0,873.16,815.610822,844.385411
95,1.0,0.0,2.0,7.0,0.0,11.0,3.0,155.0,0.900,0.940,2860.0,50.19,27.677292,38.933646
96,0.0,0.0,5.0,5.0,1.0,10.0,7.0,210.0,0.720,0.670,2740.0,209.26,128.144594,168.702297


## Case #2

In [10]:
printResults(testData[1])


Unnamed: 0,SA,AA,CA,UAW,SUC,AUC,CUC,UUCW,TCF,ECF,Real_Effort,Manhattan,Euclidean,Average
0,1.0,2.0,1.0,8.0,4.0,20.0,15.0,445.0,0.990,0.990,7962.0,6507.49,6152.884786,6330.187393
1,0.0,0.0,3.0,9.0,1.0,5.0,20.0,355.0,1.030,0.800,7935.0,6383.34,6121.731868,6252.535934
2,0.0,1.0,2.0,8.0,5.0,10.0,15.0,350.0,0.900,0.910,7805.0,6245.38,5991.625249,6118.502624
3,1.0,2.0,1.0,8.0,1.0,10.0,16.0,345.0,0.900,0.910,7758.0,6193.38,5944.467775,6068.923887
4,2.0,0.0,2.0,8.0,1.0,13.0,14.0,345.0,0.990,0.990,7643.0,6081.49,5829.554973,5955.522487
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,0.0,0.0,2.0,6.0,0.0,5.0,6.0,140.0,0.965,0.755,1696.0,156.23,124.603528,140.416764
94,1.0,0.0,2.0,7.0,2.0,8.0,6.0,180.0,0.810,0.840,3670.0,1927.40,1853.145729,1890.272864
95,1.0,0.0,2.0,7.0,0.0,11.0,3.0,155.0,0.900,0.940,2860.0,1094.41,1042.784803,1068.597401
96,0.0,0.0,5.0,5.0,1.0,10.0,7.0,210.0,0.720,0.670,2740.0,1031.32,926.908332,979.114166


## Case #3

In [11]:
printResults(testData[2])

Unnamed: 0,SA,AA,CA,UAW,SUC,AUC,CUC,UUCW,TCF,ECF,Real_Effort,Manhattan,Euclidean,Average
0,1.0,2.0,1.0,8.0,4.0,20.0,15.0,445.0,0.990,0.990,7962.0,6222.27,5887.451236,6054.860618
1,0.0,0.0,3.0,9.0,1.0,5.0,20.0,355.0,1.030,0.800,7935.0,6098.12,5856.641871,5977.380936
2,0.0,1.0,2.0,8.0,5.0,10.0,15.0,350.0,0.900,0.910,7805.0,5956.20,5726.527397,5841.363698
3,1.0,2.0,1.0,8.0,1.0,10.0,16.0,345.0,0.900,0.910,7758.0,5912.20,5679.395921,5795.797960
4,2.0,0.0,2.0,8.0,1.0,13.0,14.0,345.0,0.990,0.990,7643.0,5798.27,5564.466197,5681.368098
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,0.0,0.0,2.0,6.0,0.0,5.0,6.0,140.0,0.965,0.755,1696.0,423.02,386.581169,404.800585
94,1.0,0.0,2.0,7.0,2.0,8.0,6.0,180.0,0.810,0.840,3670.0,1646.22,1588.377797,1617.298898
95,1.0,0.0,2.0,7.0,0.0,11.0,3.0,155.0,0.900,0.940,2860.0,813.23,778.267971,795.748986
96,0.0,0.0,5.0,5.0,1.0,10.0,7.0,210.0,0.720,0.670,2740.0,754.32,661.018957,707.669478


## Case #4

In [12]:
printResults(testData[3])

Unnamed: 0,SA,AA,CA,UAW,SUC,AUC,CUC,UUCW,TCF,ECF,Real_Effort,Manhattan,Euclidean,Average
0,1.0,2.0,1.0,8.0,4.0,20.0,15.0,445.0,0.990,0.990,7962.0,5041.32,4761.648566,4901.484283
1,0.0,0.0,3.0,9.0,1.0,5.0,20.0,355.0,1.030,0.800,7935.0,4917.17,4731.176705,4824.173353
2,0.0,1.0,2.0,8.0,5.0,10.0,15.0,350.0,0.900,0.910,7805.0,4777.19,4601.051948,4689.120974
3,1.0,2.0,1.0,8.0,1.0,10.0,16.0,345.0,0.900,0.910,7758.0,4731.19,4553.951584,4642.570792
4,2.0,0.0,2.0,8.0,1.0,13.0,14.0,345.0,0.990,0.990,7643.0,4615.32,4438.998205,4527.159103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,0.0,0.0,2.0,6.0,0.0,5.0,6.0,140.0,0.965,0.755,1696.0,1624.06,1512.100527,1568.080263
94,1.0,0.0,2.0,7.0,2.0,8.0,6.0,180.0,0.810,0.840,3670.0,533.21,465.975345,499.592672
95,1.0,0.0,2.0,7.0,0.0,11.0,3.0,155.0,0.900,0.940,2860.0,442.22,352.167631,397.193816
96,0.0,0.0,5.0,5.0,1.0,10.0,7.0,210.0,0.720,0.670,2740.0,513.27,466.777297,490.023648
