# For Replication Purposes

This notebook describes the analysis of the data from the STRIDE experiment performed at two universities, Netherlands and China.



## Initialization

Mounting Google Drive and import the experiment raw data.



The steps described in this collab shows the data from one experiement, however, the same steps were followed in both analysis.

The data was stored and accessed via google drive

In [None]:
from google.colab import drive
drive.mount("DRIVE DIRECTORY")

Mounted at /content/drive


In [None]:
cd "ADD DIRECTORY TO CORRECT FOLDER"

Import the excel file into a pandas dataframe.

In [None]:
import pandas as pd
  
df = pd.read_excel("FileName.xlsx")

print(df.head())

Select only relevant columns (by index). The selected columns are Duration, Group, and group.


In [None]:
# Select relevant columns
df1 = df.iloc[:, [0, 1, 2]].join(df.iloc[:, 3:13])

# Rename columns
df1.columns.values[0] = "Group"
df1.columns.values[1] = "Student_ID"
df1.columns.values[2] = "Duration"

#Convert the Student_ID column to int
df1 = df1.fillna(0)
df1["Student_ID"].astype(int)

Concatinate the above columns with the list of threats, grouped by treatment group

For Group A.

In [None]:
# Group A
df_A = df1[(df1["Group"] == "Group A")]
df_A = df_A.iloc[:, 0:14].copy()

# Rename columns
df_A.columns.values[3] = "Threat_1"
df_A.columns.values[4] = "Threat_2"
df_A.columns.values[5] = "Threat_3"
df_A.columns.values[6] = "Threat_4"
df_A.columns.values[7] = "Threat_5"
df_A.columns.values[8] = "Threat_6"
df_A.columns.values[9] = "Threat_7"
df_A.columns.values[10] = "Threat_8"
df_A.columns.values[11] = "Threat_9"
df_A.columns.values[12] = "Threat_10"

df_A.head()

For Group B.

In [None]:
# Group B
df_B = df1[(df1["Group"] == "Group B")]
df_B = df_B.iloc[:, 0:13].copy()

# Rename columns
df_B.columns.values[3] = "Threat_1"
df_B.columns.values[4] = "Threat_2"
df_B.columns.values[5] = "Threat_3"
df_B.columns.values[6] = "Threat_4"
df_B.columns.values[7] = "Threat_5"
df_B.columns.values[8] = "Threat_6"
df_B.columns.values[9] = "Threat_7"
df_B.columns.values[10] = "Threat_8"
df_B.columns.values[11] = "Threat_9"
df_B.columns.values[12] = "Threat_10"

df_B.head()

## Analyzing Group performance

For Group A


Number of students belonging to Group A.

In [None]:
df_A.shape[0]

Average time (in minutes) to complete the experiments for (Group A) students.

In [None]:
df_A["Duration"].mean() / 60

Slowest student:

In [None]:
df_A["Duration"].max() / 60

Fastest student:

In [None]:
df_A["Duration"].min() / 60

We checked the rate of:


1.   TP- correctly recognised real threats 
2.   TN- correctly recognised fabricated/fake threats (TN)
3.   FP- threats recognised as real by the participants but are actually   fabricated
4.   FN- threats recognised as fabricated by the participants but are actually real




In Group A 

In [None]:
for index, row in df_A.iterrows():
  print(row[3:14])

# real threat: 1, fake threat: 2
threat_type = [1, 2, 1, 1, 2, 2, 2, 1, 2, 1]

precision_list_A = []
recall_list_A = []

TP_list_A = []
FP_list_A = []
FN_list_A = []
TN_list_A = []

for index, row in df_A.iterrows():
  TP = 0 # True Positive - Real threats being selected
  FN = 0 # False Negatives - Real threats not being selected
  FP = 0 # False Positive - Fake threats being selected
  TN = 0 # True Negative - Fake threats not being selected
  for answer, result in zip(threat_type, row[3:14]):
    if answer == 1:
      if result == 1:
        TP += 1
      else:
        FN += 1
    else:
      if result == 2:
        TN += 1
      else:
        FP += 1
  
  TP_list_A.append(TP)
  FN_list_A.append(FN)
  TN_list_A.append(TN)
  FP_list_A.append(FP)
  precision_list_A.append(TP / (TP + FP))
  recall_list_A.append(TP / (TP + FN))

precision_list_A = [ round(elem, 2) for elem in precision_list_A ]
recall_list_A = [ round(elem, 2) for elem in recall_list_A ]
TP_list_A = [ round(elem, 2) for elem in TP_list_A ]
FP_list_A = [ round(elem, 2) for elem in FP_list_A ]
FN_list_A = [ round(elem, 2) for elem in FN_list_A ]
TN_list_A = [ round(elem, 2) for elem in TN_list_A ]
# TP_TN_list_A    is basically   TP + TN to create a new vector
TP_TN_list_A = [i + j for i, j in zip(TP_list_A, TN_list_A)]


TP_avg_A = sum(TP_list_A) / len(TP_list_A)
FP_avg_A = sum(FP_list_A) / len(FP_list_A)
FN_avg_A = sum(FN_list_A) / len(FN_list_A)
TN_avg_A = sum(TN_list_A) / len(TN_list_A)
TP_TN_Avg_A = sum(TP_TN_list_A) / len(TP_TN_list_A) # TP_TN_Avg = TP_avg + TN_avg

print(TP_list_A)
print(TN_list_A)
print(TP_TN_list_A)
print(TP_avg_A)
print(FN_avg_A)
print(TN_avg_A)
print(FP_avg_A)
print(TP_TN_Avg_A)


Performance Bar Plot for group A

In [None]:
import matplotlib.pyplot as plt

metrics = ["TP", "FN", "TN", "FP"]
means = [TP_avg_A, FN_avg_A, TN_avg_A, FP_avg_A]

fig = plt.figure(figsize = (6, 5))

#  Bar plot style
plt.bar(metrics, means, color ='grey',
        width = 0.666)
plt.xlabel("Metrics of success")
plt.ylabel("Mean of metric values")
plt.title("Performance of Group A")
plt.show()

**Checked how many participants guessed the actual/real threats correctly and incorrectly**

In [None]:
# Select the 5 real threats
df_A_real = df_A.iloc[:, [3, 5, 6, 10, 12]]
# print(df_real)


# Count the students that guessed each threat
print(df_A_real[df_A_real == 1].count()) # correct, real as real
print(df_A_real[df_A_real == 2].count()) # wrong, fake as real

Bar Chart representing the number of correct and wrong answers for each **real threat** for Group A.

In [None]:
# https://colab.research.google.com/notebooks/charts.ipynb#scrollTo=bZv4MenQpYOF

import matplotlib.pyplot as plt

x1 = ["1", "3", "4", "8", "10"] #real threats
y1 = [24, 16, 23, 23, 24] # students who guessed them as real

x2 = ["1", "3", "4", "8", "10"] #real threats
y2 = [1, 9, 2, 2, 1] # students who guessed them as fake


plt.bar(x1, y1, label="Correct Answers", color='g', width = 0.666)
plt.bar(x2, y2, bottom=y1, label="Wrong Answers", color='r', width = 0.666)
plt.plot()

plt.xlabel("Threat ID")
plt.ylabel("Number of Students")
plt.title("Real Threats Bar Plot from Group A")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

Repeat the same process for the fake threats.

In [None]:
# Select the 5 fake threats
df_A_fake  = df_A.iloc[:, [4, 7, 8, 9, 11]]

# Count the students that guessed each threat
print(df_A_fake[df_A_fake == 2].count()) # correct, fake as fake
print(df_A_fake[df_A_fake == 1].count()) # wrong, fake as real

Bar Chart representing the number of correct and wrong answers for each **fake threat** for Group A.

In [None]:
import matplotlib.pyplot as plt

x1 = ["2", "5", "6", "7", "9"] #Fake threats
y1 = [5, 16, 17, 14, 9] # students who guessed them as fake

x2 = ["2", "5", "6", "7", "9"] #Fake threats
y2 = [20, 9, 8, 11, 16] # students who guessed them as real

plt.bar(x1, y1, label="Correct Answers", color='g', width = 0.666)
plt.bar(x2, y2, bottom=y1, label="Wrong Answers", color='r', width = 0.666)
plt.plot()

plt.xlabel("Threat ID")
plt.ylabel("Number of Students")
plt.title("Fake Threats Bar Plot from Group A")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

## Analyze Group B performance

Number of students belonging to Group B.

In [None]:
df_B.shape[0]

Average time (in minutes) to complete the experiments for (Group B) students. The average time in Group A is 16.58min

In [None]:
df_B["Duration"].mean() / 60

Slowest student:

In [None]:
df_B["Duration"].max() / 60

Fastest student:

In [None]:
df_B["Duration"].min() / 60

We checked the rate of:


1.   TP- correctly recognised real threats 
2.   TN- correctly recognised fabricated/fake threats (TN)
3.   FP- threats recognised as real by the participants but are actually   fabricated
4.   FN- threats recognised as fabricated by the participants but are actually real




In Group B

In [None]:
# real threat: 1, fake threat: 2
threat_type = [1, 2, 1, 1, 2, 2, 2, 1, 2, 1]

precision_list_B = []
recall_list_B = []

TP_list_B = []
FP_list_B = []
FN_list_B = []
TN_list_B = []

for index, row in df_B.iterrows():
  TP = 0 # True Positive - Real threats being selected
  FN = 0 # False Negatives - Real threats not being selected
  FP = 0 # False Positive - Fake threats being selected
  TN = 0 # True Negative - Fake threats not being selected
  for answer, result in zip(threat_type, row[3:14]):
    if answer == 1:
      if result == 1:
        TP += 1
      else:
        FN += 1
    else:
      if result == 2:
        TN += 1
      else:
        FP += 1
  
  TP_list_B.append(TP)
  FN_list_B.append(FN)
  TN_list_B.append(TN)
  FP_list_B.append(FP)
  precision_list_B.append(TP / (TP + FP))
  recall_list_B.append(TP / (TP + FN))

# precision_list_B = [ round(elem, 2) for elem in precision_list_B ]
# recall_list_B = [ round(elem, 2) for elem in recall_list_B ]
TP_list_B = [ round(elem, 2) for elem in TP_list_B ]
FP_list_B = [ round(elem, 2) for elem in FP_list_B ]
FN_list_B = [ round(elem, 2) for elem in FN_list_B ]
TN_list_B = [ round(elem, 2) for elem in TN_list_B ]
# TP_TN_list_A    is basically TP+TN vector
TP_TN_list_B = [i + j for i, j in zip(TP_list_B, TN_list_B)]


TP_avg_B = sum(TP_list_B) / len(TP_list_B)
FP_avg_B = sum(FP_list_B) / len(FP_list_B)
FN_avg_B = sum(FN_list_B) / len(FN_list_B)
TN_avg_B = sum(TN_list_B) / len(TN_list_B)
TP_TN_Avg_B = sum(TP_TN_list_B) / len(TP_TN_list_B) 

print(TP_list_B)
print(TN_list_B)
print(TP_TN_list_B)
print(TP_avg_B)
print(FN_avg_B)
print(TN_avg_B)
print(FP_avg_B)
print(TP_TN_Avg_B)
print(TP_TN_list_B)

In [None]:
print(precision_list_B)
print(recall_list_B)

print(sum(precision_list_B)/len(precision_list_B))
print(sum(recall_list_B)/len(recall_list_B))

Performance Bar plot for Group B

In [None]:
import matplotlib.pyplot as plt

metrics = ["TP", "FN", "TN", "FP"]
means = [TP_avg_B, FN_avg_B, TN_avg_B, FP_avg_B]

fig = plt.figure(figsize = (6, 5))

#  Bar plot style
plt.bar(metrics, means, color ='grey', width = 0.666)
plt.xlabel("Metrics of success")
plt.ylabel("Mean of metric values")
plt.title("Performance of Group B")
plt.show()

**Checked how many participants guessed the actual/real threats correctly and incorrectly**

In [None]:
# Select the 5 real threats
df_B_real = df_B.iloc[:, [3, 5, 6, 10, 12]]

# print(df_real)
# Count the students that guessed each threat
print(df_B_real[df_B_real == 1].count()) # correct, real as real
print(df_B_real[df_B_real == 2].count()) # wrong, fake as real

Bar Chart representing the number of correct and wrong answers for each **real threat** for Group B.

In [None]:
import matplotlib.pyplot as plt

x1 = ["1", "3", "4", "8", "10"]
y1 = [21, 18, 22, 22, 24]

x2 = ["1", "3", "4", "8", "10"]
y2 = [4, 7, 3, 3, 1]

plt.bar(x1, y1, label="Correct Answers", color='g', width = 0.666)
plt.bar(x2, y2, bottom=y1, label="Wrong Answers", color='r', width = 0.666)
plt.plot()

plt.xlabel("Threat ID")
plt.ylabel("Number of Students")
plt.title("Real Threats Bar Plot from Group B")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
# the students guessed all five real threats
df_B_real_all_correct = df_B_real[df_B_real != 2].dropna()
print(df_B_real_all_correct.shape[0])
print(df_B_real_all_correct.index) # showing the student index

Repeat the same process for the fake threats.

In [None]:
# Select the 5 fake threats
df_B_fake  = df_B.iloc[:, [4, 7, 8, 9, 11]]

# Count the students that guessed each threat
print(df_B_fake[df_B_fake == 2].count()) # correct, fake as fake
print(df_B_fake[df_B_fake == 1].count()) # wrong, real as fake


Bar Chart representing the number of correct and wrong answers for each **fake threat** for Group B.

In [None]:
import matplotlib.pyplot as plt

x1 = ["2", "5", "6", "7", "9"]
y1 = [4, 16, 19, 15, 13]

x2 = ["2", "5", "6", "7", "9"]
y2 = [21, 9, 6, 10, 12]

plt.bar(x1, y1, label="Correct Answers", color='g', width = 0.666)
plt.bar(x2, y2, bottom=y1, label="Wrong Answers", color='r', width = 0.666)
plt.plot()

plt.xlabel("Threat ID")
plt.ylabel("Number of Students")
plt.title("Fake Threats Bar Plot from Group B")
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.show()

In [None]:
# the students guessed all five fake threats
df_B_fake_all_correct = df_B_fake[df_B_fake != 1].dropna()
print(df_B_fake_all_correct.shape[0])
print(df_B_fake_all_correct.index) # showing the student index

df_B[df_B != 2].iloc[df_B_fake_all_correct.index - df_A.shape[0], :]

## Comparison of the two groups

Box plot for Group A and Group B of Precision and Recall

In [None]:
import matplotlib.pyplot as plt
import warnings
import numpy as np
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

print(precision_list_A)
print(precision_list_B)

print(recall_list_A)
print(recall_list_B)
 
box_plot_data=[precision_list_A, precision_list_B, recall_list_A, recall_list_B]

plt.boxplot(box_plot_data, patch_artist=True, labels=['P(Group A)','P(Group B)','R(Group A)','R(Group B)'])
plt.show()

In [None]:
print("==== Group A  TP ====")
print(pd.Series(TP_list_A).describe())
print("==== Group A  TN ====")
print(pd.Series(TN_list_A).describe())
print("==== Group B  TP ====")
print(pd.Series(TP_list_B).describe())
print("==== Group B  TN ====")
print(pd.Series(TN_list_B).describe())

print("\n\n==== Group A  TP+TN ====")
print(pd.Series(TP_TN_list_A).describe())

print("==== Group B  TP+TN ====")
print(pd.Series(TP_TN_list_B).describe())

**Box plot of TP and TN of both groups** 

In [None]:
import matplotlib.pyplot as plt
import warnings
import numpy as np
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

print(TP_list_A)
print(TP_list_B)

print(TN_list_A)
print(TN_list_B)
 
box_plot_data=[TP_list_A, TP_list_B, TN_list_A, TN_list_B]

plt.boxplot(box_plot_data, patch_artist=True, labels=['TP(Group A)','TP(Group B)','TN(Group A)','TN(Group B)'])
plt.show()

# **Perfromance of Groups with TP, TN, TP+TN vector**

In [None]:
#performance of group A 

import matplotlib.pyplot as plt
import warnings
import numpy as np
from google.colab import files

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

print(TP_list_A)
print(TN_list_A)
print(TP_TN_list_A)

 
box_plot_data=[TP_list_A, TN_list_A, TP_TN_list_A]

fig = plt.figure()
plt.boxplot(box_plot_data, patch_artist=True, labels=['TP(Group A)','TN(Group A)', 'TP+TN(Group A)'])
plt.title('Group A Performance')
plt.axvline(x = 2.5, color = 'b', linestyle='--')
plt.ylim(-1,11)

# plt.figure(figsize=(0.5,0.5))

# fig.savefig('CN Group B performance with TP+TN.jpg', bbox_inches='tight', dpi=250)
# files.download('CN Group B performance with TP+TN.jpg')
plt.show()

In [None]:
#performance of group B 

import matplotlib.pyplot as plt
import warnings
import numpy as np
from google.colab import files

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

print(TP_list_B)
print(TN_list_B)
print(TP_TN_list_B)

 
box_plot_data=[TP_list_B, TN_list_B, TP_TN_list_B]

fig = plt.figure()
plt.boxplot(box_plot_data, patch_artist=True, labels=['TP(Group B)','TN(Group B)', 'TP+TN(Group B)'])
plt.title('Group B Performance')
plt.axvline(x = 2.5, color = 'b', linestyle='--')


plt.ylim(-1,11)
# fig.savefig('CN Group B performance with TP+TN.jpg', bbox_inches='tight', dpi=250)
# files.download('CN Group B performance with TP+TN.jpg')
plt.show()


In [None]:
plt.figure(figsize=(4,3))

box_plot_data=[precision_list_A, precision_list_B, recall_list_A, recall_list_B]

c = "grey"
plt.boxplot(box_plot_data, patch_artist=True,
            boxprops=dict(facecolor=c, color=c),
            capprops=dict(color=c),
            whiskerprops=dict(color=c),
            flierprops=dict(color=c, markeredgecolor=c),
            medianprops=dict(color="black"),
            labels=['P(Group A)','P(Group B)','R(Group A)','R(Group B)']
            )

plt.show()

Merge the two groups dataframes.

In [None]:
df_all = df_A.merge(df_B, how='outer')

# df_all

Total number of students:

In [None]:
df_all.shape[0]

Group A performance on real threats:

In [None]:
df_A_real = df_A.iloc[:, [3, 5, 6, 10, 12]] # List of real threats

# tot_answer = real_t.shape[0]*6 # #_students * 5 threats

# correct_answers/all_answers
df_A_real[df_A_real == 1].count().sum() / df_A_real.count().sum()

Group A performance on fake threats:

In [None]:
df_A_fake = df_A.iloc[:, [4, 7, 8, 9, 11]]
df_A_fake[df_A_fake == 2].count().sum() / df_A_fake.count().sum()

Group B performance on real threats:

In [None]:
df_B_real = df_B.iloc[:, [3, 5, 6, 10, 12]] # List of real threats
df_B_real[df_B_real == 1].count().sum() / df_B_real.count().sum()

Group B performance on fake threats:

In [None]:
df_B_fake = df_B.iloc[:, [4, 7, 8, 9, 11]]
df_B_fake[df_B_fake == 2].count().sum() / df_B_fake.count().sum()

# **Analysis of Perception Questions**

**Group A**

In [None]:
# Select relevant columns
df1 = df.iloc[:, [0,1,2]].join(df.iloc[:, 13:26])
# Rename columns
df1.columns.values[0] = "Group"
df1.columns.values[1] = "Student_ID"
df1.columns.values[2] = "Duration"
df1.head()

**Concatinate the relevant columns selcted above with perception questions**

In [None]:
# Group A
df_A_part2 = df1[(df1["Group"] == "Group A")]
df_A_part2 = df_A_part2.iloc[:, 0:10].copy()

# Rename columns
df_A_part2.columns.values[3] = "Case description"
df_A_part2.columns.values[4] = "Sequence diagram"
df_A_part2.columns.values[5] = "DFD"
df_A_part2.columns.values[6] = "Threat asumptions"
df_A_part2.columns.values[7] = "Threat description"
df_A_part2.columns.values[8] = "Threat category"
df_A_part2.columns.values[9] = "Affected components"

df_A_part2.head()

In [None]:
#Group B
df_B_part2 = df1[(df1["Group"] == "Group B")]
df_B_part2 = df_B_part2.iloc[:, 0:5].copy().join(df_B_part2.iloc[:, 6:10].copy())

# Rename columns
df_B_part2.columns.values[3] = "Case description"
df_B_part2.columns.values[4] = "Sequence diagram"
df_B_part2.columns.values[5] = "Threat asumptions"
df_B_part2.columns.values[6] = "Threat description"
df_B_part2.columns.values[7] = "Threat category"
df_B_part2.columns.values[8] = "Affected components"

df_B_part2.head()

In [None]:
df_A_part2.describe()

In [None]:
df_B_part2.describe()

## **Perception of Sequence Diagram**

In [None]:
# Perception of Sequence Diagram in Group A and Group B

import matplotlib.pyplot as plt
import warnings
import numpy as np
from google.colab import files

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

seq_A = df_A_part2["Sequence diagram"]
seq_B = df_B_part2["Sequence diagram"]

print(seq_A.describe())
print(seq_B.describe())

box_plot_data=[seq_A, seq_B]

fig = plt.figure()
plt.boxplot(box_plot_data, patch_artist=True, labels=['Group A','Group B'])
plt.title('Perception on the usefulness of Sequence Diagram')

fig.savefig('sequence diagram perception.jpg', bbox_inches='tight', dpi=250)
plt.show()

## **Test of Equivalence (TOST) between Group A and Group B (SeqD)**

In [None]:
from scipy.stats import mannwhitneyu
delta = 0.8
p_low = mannwhitneyu(seq_B, [x + delta for x in seq_A], alternative = "greater")
p_up = mannwhitneyu([x - delta for x in seq_A], seq_B, alternative = "greater")
p_TOST = max(p_low, p_up)

print(p_low)
print(p_up)
print(p_TOST)

MannwhitneyuResult(statistic=178.0, pvalue=0.9958869500546135)
MannwhitneyuResult(statistic=312.0, pvalue=0.5078088583905803)
MannwhitneyuResult(statistic=312.0, pvalue=0.5078088583905803)


# **Perception of usefulness of DFD and Sequence diagram in Group A only**

Scatter plot

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator

scatterplot_data = np.zeros((5, 5), dtype=int)
[rows, cols] = scatterplot_data.shape

for x, y in zip(seq_A, dfd_A):
  scatterplot_data[(int)(x-1)][(int)(y-1)] += 1

x = []
y = []
area = []
count = []

for i in range(rows):
  for j in range(cols):
    if (scatterplot_data[i, j]):
      x.append(i+1)
      y.append(j+1)
      count.append(scatterplot_data[i, j])
      area.append(scatterplot_data[i, j]*100)

# print(x)
# print(y)
# print(count)
# print(area)

# print(scatterplot_data)

fig = plt.figure()

scatter = plt.scatter(x, y, s=area, c="blue", alpha=0.2)

for i, txt in enumerate(count):
  plt.annotate(txt, (x[i], y[i]), color='blue')

kw = dict(prop="sizes", num=2, color="blue", alpha=0.2, fmt="{x:.0f}", func=lambda s: s/100)
plt.legend(*scatter.legend_elements(**kw), loc="lower right", title="Numbers", labelspacing=1.5, borderpad=0.8)

# handles, labels = scatter.legend_elements(prop="sizes", alpha=0.2, color="blue")
# plt.legend(handles, labels, loc="lower right", title="Numbers", labelspacing=1.5)

x = MultipleLocator(1)
y = MultipleLocator(1)
ax = plt.gca()
ax.xaxis.set_major_locator(x)
ax.yaxis.set_major_locator(y)

plt.title('Perception on the usefulness of Sequence Diagram and DFD in Group A')
plt.xlabel('Sequence Diagram')
plt.ylabel('DFD')

fig.savefig('sequence diagram and DFD perception.jpg', bbox_inches='tight', dpi=250)
files.download('sequence diagram and DFD perception.jpg')

plt.show()

Box plot

In [None]:
# Perception of Sequence Diagram and DFD in Group A

import matplotlib.pyplot as plt
import warnings
import numpy as np
from google.colab import files

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

box_plot_data=[seq_A, dfd_A]

fig = plt.figure()
plt.boxplot(box_plot_data, patch_artist=True, labels=['Sequence Diagram','DFD'])
plt.title('Perception on the usefulness in Group A')

fig.savefig('sequence diagram and DFD perception-boxplot.jpg', bbox_inches='tight', dpi=250)
files.download('sequence diagram and DFD perception-boxplot.jpg')
plt.show()

# **Test of Equivalence with Mann Whitney U**

**For Performance**

In [None]:
#TOST for TP only 

# Set equivalence bounds and delta
min_val = 0
max_val = 5
delta = 1

print("This is the original data frame", TP_list_A)

#create new data frame with x-delta
xd = [x - delta for x in TP_list_A]
print("This is the data frame (x - delta)", xd)


newxd = []

for i in xd:
  if i <= min_val:
    newxd.append(min_val)
    
  elif i >= max_val:
    newxd.append(max_val)
    
  else:
    newxd.append(i)
print("This is the data frame (x - delta) and min max applied", newxd)


#create new data frame with x + delta

xd1 = [x + delta for x in TP_list_A]
print("This is the data frame (x + delta)", xd1)
newxd1 = []

for i in xd1:
  if i <= min_val:
    newxd1.append(min_val)
    
  elif i >= max_val:
    newxd1.append(max_val)
    
  else:
    newxd1.append(i)
print("This is the data frame (x + delta) and min max applied", newxd1) 


#Test for equivalence
p_low = mannwhitneyu(newxd, TP_list_B, alternative = "less")
p_up = mannwhitneyu(TP_list_B, newxd1,  alternative = "less")
p_TOST = max(p_low.pvalue, p_up.pvalue)

print(p_low)
print(p_up)
print(p_TOST)

In [None]:
#TOST FOR TN

# Set equivalence bounds and delta
min_val = 0
max_val = 5
delta = 1

print("This is the original data frame", TN_list_A)
#create new data frame with x-delta
xd = [x - delta for x in TN_list_A]
print("This is the data frame (x - delta)", xd)
# print(type(xd))

newxd = []

for i in xd:
  if i <= min_val:
    newxd.append(min_val)
    
  elif i >= max_val:
    newxd.append(max_val)
    
  else:
    newxd.append(i)
print("This is the data frame (x - delta) and min max applied", newxd) #with - delta


#create new data frame with x + delta

xd1 = [x + delta for x in TN_list_A]
print("This is the data frame (x + delta)", xd1)
newxd1 = []

for i in xd1:
  if i <= min_val:
    newxd1.append(min_val)
    
  elif i >= max_val:
    newxd1.append(max_val)
    
  else:
    newxd1.append(i)
print("This is the data frame (x + delta) and min max applied", newxd1) #with + delta


#Test for equivalence
p_low = mannwhitneyu(newxd, TN_list_B, alternative = "less")
p_up = mannwhitneyu(TN_list_B, newxd1, alternative = "less")
p_TOST = max(p_low.pvalue, p_up.pvalue)

print(p_low)
print(p_up)
print(p_TOST)

In [None]:
#TOST FOR TP_TN
# Set equivalence bounds and delta
min_val = 0
max_val = 10
delta = 1

print("This is the original data frame", sum_TP_TN_list_A)
#create new data frame with x-delta
xd = [x - delta for x in sum_TP_TN_list_A]
print("This is the data frame (x - delta)", xd)
# print(type(xd))

newxd = []

for i in xd:
  if i <= min_val:
    newxd.append(min_val)
    
  elif i >= max_val:
    newxd.append(max_val)
    
  else:
    newxd.append(i)
print("This is the data frame (x - delta) and min max applied", newxd) #with - delta


#create new data frame with x + delta

xd1 = [x + delta for x in sum_TP_TN_list_A]
print("This is the data frame (x + delta)", xd1)
newxd1 = []

for i in xd1:
  if i <= min_val:
    newxd1.append(min_val)
    
  elif i >= max_val:
    newxd1.append(max_val)
    
  else:
    newxd1.append(i)
print("This is the data frame (x + delta) and min max applied", newxd1) #with + delta


#Test for equivalence
p_low = mannwhitneyu(newxd, sum_TP_TN_list_B, alternative = "less")
p_up = mannwhitneyu(sum_TP_TN_list_B, newxd1,  alternative = "less")
p_TOST = max(p_low.pvalue, p_up.pvalue)

print(p_low)
print(p_up)
print(p_TOST)

Tost for perceived usefulness of sequence diagram. Group A vs Group B

In [None]:
#TOST FOR Sequence Diagram of group A and B
# Set equivalence bounds and delta
min_val = 1
max_val = 5
delta = 1

print("This is the original data frame", seq_A)
#create new data frame with x-delta
xd = [x - delta for x in seq_A]
print("This is the data frame (x - delta)", xd)
# print(type(xd))

newxd = []

for i in xd:
  if i <= min_val:
    newxd.append(min_val)
    
  elif i >= max_val:
    newxd.append(max_val)
    
  else:
    newxd.append(i)
print("This is the data frame (x - delta) and min max applied", newxd) #with - delta


#create new data frame with x + delta

xd1 = [x + delta for x in seq_A]
print("This is the data frame (x + delta)", xd1)
newxd1 = []

for i in xd1:
  if i <= min_val:
    newxd1.append(min_val)
    
  elif i >= max_val:
    newxd1.append(max_val)
    
  else:
    newxd1.append(i)
print("This is the data frame (x + delta) and min max applied", newxd1) #with + delta


#Test for equivalence
p_low = mannwhitneyu(newxd, seq_B, alternative = "less")
p_up = mannwhitneyu(seq_B, newxd1,  alternative = "less")
p_TOST = max(p_low.pvalue, p_up.pvalue)

print(p_low)
print(p_up)
print(p_TOST)

# **Test of difference with Mann Whitney U**

**For performance**

In [None]:
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon

p_mwu1 = mannwhitneyu(TP_list_A, TP_list_B, alternative = "greater")
p_mwu2 = mannwhitneyu(TN_list_A, TN_list_B, alternative = "greater")
p_mwu3 = mannwhitneyu(TP_TN_list_A, TP_TN_list_B, alternative = "greater")

print(p_mwu1)
print(p_mwu2)
print(p_mwu3)

For sequence diagram only. Group A and B

In [None]:
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon

p_mwu = mannwhitneyu(seq_A, seq_B, alternative = "two sided")


print(p_mwu)

# **T-Test paired sample for perception of DFD vs sequence diagram in group A only**

In [None]:
#t-test paired sample, test of difference for perception of DFD vs SeqD in group A only

import scipy.stats as stats
stats.ttest_rel(dfd_A, seq_A, alternative='greater')