In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
import constants as c
import networkx as nx
import graphs
import utils
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as mcolors
import pandas as pd
import processing

In [None]:
# flags 
DRAW_RAW_DEPENDENCY_GRAPH = False

In [None]:
azure_pat = os.getenv(c.AZURE_TOKEN_ENV_KEY)

## Drawing of the full dependency graph 

In [None]:
if DRAW_RAW_DEPENDENCY_GRAPH:
    G = graphs.draw_problem_graph(as_barpartite=False)

In [None]:
if DRAW_RAW_DEPENDENCY_GRAPH:
    G = graphs.draw_problem_graph(as_barpartite=True)


In [None]:
def get_levels(G: nx.DiGraph) -> list[list[str]]:
    nodes: list[str] = list(G.nodes())
    result = dict[str, int]()
    for node in nodes:
        level = len(node.split(".")) - 2
        result[node] = level
    return result


## Filtering

In [None]:
# excel_sheet = pd.read_excel("../data/income_base_metrics.xlsx", sheet_name="Ark1")
# types = excel_sheet[excel_sheet["Scope"]=="Type"]
# columns_to_drop = ['Project', 'Scope', 'Member']
# clean = types.drop(columns=columns_to_drop)

In [None]:
# clean

In [None]:
# for node, attr in G.nodes(data=True):
#     if attr["type"] != "file":
#         continue
#     namespace, class_name = get_class_name(node)
#     cyc = utils.get_cyclomatic_complexity(clean, namespace, class_name)
#     attr["cc"] = cyc

### Filter - Only top 10 words files

In [None]:
# Combine all the metrics
G = graphs.create_digraph_files_to_namespace(c.PROJECT_NAME_INCOME_BASE)

In [None]:
# Combine all the metrics
nodes_data = G.nodes(data=True)
files_size = [attr['lines'] for _, attr in nodes_data if attr['type'] == "file"] 
# file_cc = [attr['cc'] for _, attr in nodes_data if attr['type'] == "file"]
# file_in_degree = [G.in_degree(node) for node, attr in nodes_data if attr['type'] == "file"]
file_out_degree = [G.out_degree(node) for node, attr in nodes_data if attr['type'] == "file"]

# normalized_file_cc = normalize(file_cc)
normalized_file_size = processing.normalize(files_size)
normalized_file_out_degree = processing.normalize(file_out_degree)

# Do nomalization
norm_file_sizes = [normalized_file_size(lines) for lines in files_size]
norm_file_out_degrees = [normalized_file_out_degree(out_degree) for out_degree in file_out_degree]


comb = [(size + out_degree)/2 for size, out_degree in zip(norm_file_sizes, norm_file_out_degrees)]

# Add as attributes to the graph
i = 0
for node, attr in G.nodes(data=True): 
    if attr["type"] != "file":
        continue
    attr["combined"] = comb[i]
    i += 1




In [None]:
graphs.draw_critcal_files_graph(G=G, n_critical_files=10, with_labels=True)
plt.savefig(c.CRITICAL_FILES_IMG_PAHT, dpi=300, bbox_inches='tight')

### Filter by each metrics
This shows how the graph looks like when you filter the graph by the values in the 'filter values' block

In [None]:
# Before filtering save the normalized 
data, norm_files, data2, norm_namespace = processing.get_normalized(G)

In [None]:
# filter vales
n_lines = 1000 # number of lines in the file
critical_file_out_degree = 10 # number of namespaces that a file depend on 
namespace_in_degree = 10 
# namespace_out_degree = 5

In [None]:
def remove_file_node(G, node, attr)-> bool:
    if not attr['type'] == "file":
        return False
    
    if attr['lines'] < n_lines:
        return True
    
    if G.out_degree(node) < critical_file_out_degree:
        return True
    
    return False

In [None]:
# Removes files with less than n_lines(1000) lines of code
all_nodes = list(G.nodes(data=True))
nodes_before = len(all_nodes)
for node, attr in all_nodes:
    if remove_file_node(G, node, attr):
        G.remove_node(node)


nodes_after = len(G.nodes(data=True))
print("Nodes before:", nodes_before)
print("Nodes after:", nodes_after)    

In [None]:
# Remove nodes with in-degree less than file_in_degree
all_nodes = list(G.nodes(data=True))
nodes_before = len(all_nodes)
for node, attr in all_nodes:
    if attr['type'] == "namespace" and G.in_degree(node) < namespace_in_degree:
        G.remove_node(node)
nodes_after = len(G.nodes(data=True))
print("Nodes before:", nodes_before)
print("Nodes after:", nodes_after)


In [None]:
graphs.draw_problem_graph(G, as_barpartite=False, with_labels=True)
plt.savefig("../img/filtered_graph.png", dpi=300, bbox_inches='tight')