In [11]:
import os
import subprocess
import pandas as pd
import ast

In [12]:
repos_dir = "./java_repos"
results_dir = "./ck_results"
ck_jar_path = "./ck/target/ck-0.7.1-SNAPSHOT-jar-with-dependencies.jar"  # Caminho correto para o ck.jar

In [13]:
df = pd.read_csv("top_1000_java_repos.csv")
df

Unnamed: 0,name,owner,createdAt,updatedAt,stargazers,releases
0,JavaGuide,{'login': 'Snailclimb'},2018-05-07T13:27:00Z,2025-03-15T14:00:54Z,148698,0
1,hello-algo,{'login': 'krahets'},2022-11-04T11:08:34Z,2025-03-15T18:28:44Z,110186,9
2,java-design-patterns,{'login': 'iluwatar'},2014-08-09T16:45:18Z,2025-03-15T18:26:47Z,90999,0
3,mall,{'login': 'macrozheng'},2018-04-04T01:11:44Z,2025-03-15T14:56:03Z,79471,3
4,advanced-java,{'login': 'doocs'},2018-10-06T11:38:30Z,2025-03-15T16:17:07Z,77236,1
...,...,...,...,...,...,...
995,concurrency-limits,{'login': 'Netflix'},2017-12-11T19:32:41Z,2025-03-13T01:02:42Z,3307,102
996,SuperTextView,{'login': 'chenBingX'},2017-04-17T11:32:41Z,2025-03-11T21:59:26Z,3306,33
997,99-problems,{'login': 'shekhargulati'},2015-11-22T05:56:33Z,2025-03-04T17:04:16Z,3304,0
998,PRDownloader,{'login': 'amitshekhariitbhu'},2017-11-11T19:29:38Z,2025-03-14T06:37:04Z,3303,8


In [14]:
df['owner'] = df['owner'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df['owner'] = df['owner'].apply(lambda x: x['login'] if isinstance(x, dict) and 'login' in x else x)
df["owner"]


0             Snailclimb
1                krahets
2               iluwatar
3             macrozheng
4                  doocs
             ...        
995              Netflix
996            chenBingX
997        shekhargulati
998    amitshekhariitbhu
999           trojan-gfw
Name: owner, Length: 1000, dtype: object

In [15]:
def clone_repository(repo_url, repo_name):
    """Clona o repositório caso ainda não exista."""
    repo_path = os.path.join(repos_dir, repo_name.replace('/', '_'))
    if not os.path.exists(repo_path):
        subprocess.run(["git", "clone", repo_url, repo_path], check=True)
    return repo_path

In [16]:
def run_ck(project_dir, output_dir):
    """Executa a ferramenta CK para análise de métricas."""
    subprocess.run([
        "java", "-jar", ck_jar_path,
        project_dir,
        "true",  
        "0",     
        "true",  
        output_dir,
        "build/", "target/", ".git/"  
    ], check=True)

In [17]:
# Testando com um repo
row = df.iloc[2]
repo_name = row['name']
repo_owner = row['owner']
repo_url = f"https://github.com/{repo_owner}/{repo_name}.git"

print(f"🔄 Cloning {repo_owner}/{repo_name}...")

try:
    repo_path = clone_repository(repo_url, repo_name)
    repo_results_dir = os.path.join(results_dir, repo_name.replace('/', '_'))
    os.makedirs(repo_results_dir, exist_ok=True)

    print(f"⚙️ Running CK analysis for {repo_name}...")
    run_ck(repo_path, repo_results_dir)
    print(f"✅ Analysis completed for {repo_name}!")

except Exception as e:
    print(f"⚠️ Error processing {repo_owner}/{repo_name}: {e}")


🔄 Cloning iluwatar/java-design-patterns...
⚙️ Running CK analysis for java-design-patterns...
✅ Analysis completed for java-design-patterns!


In [18]:
results_dir = "./ck_results"
repo_name = "java-design-patterns"
class_csv_path = os.path.join(results_dir, f"{repo_name}class.csv")
filtered_csv_path = os.path.join(results_dir, f"{repo_name}_filtered_metrics.csv")

df_ck = pd.read_csv(class_csv_path)

filtered_df = df_ck[['class', 'cbo', 'dit', 'lcom']]

filtered_df


Unnamed: 0,class,cbo,dit,lcom
0,com.iluwatar.databus.data.StoppingData,5,2,0
1,com.iluwatar.servant.Servant,1,1,10
2,com.iluwatar.RegisterWorkerForm,4,1,0
3,com.iluwatar.leaderelection.ring.RingMessageMa...,3,2,10
4,com.iluwatar.typeobject.Candy,4,1,0
...,...,...,...,...
1827,com.iluwatar.model.view.controller.GiantContro...,7,1,6
1828,com.iluwatar.fluentinterface.fluentiterable.la...,3,2,0
1829,com.iluwatar.factorykit.Bow,1,1,0
1830,com.iluwatar.spatialpartition.QuadTreeTest,6,1,3


In [19]:

summary = {
    'cbo_mean': filtered_df['cbo'].mean(),
    'cbo_median': filtered_df['cbo'].median(),
    'cbo_std': filtered_df['cbo'].std(),
    'dit_mean': filtered_df['dit'].mean(),
    'dit_median': filtered_df['dit'].median(),
    'dit_std': filtered_df['dit'].std(),
    'lcom_mean': filtered_df['lcom'].mean(),
    'lcom_median': filtered_df['lcom'].median(),
    'lcom_std': filtered_df['lcom'].std(),
}

repo_summary = df.iloc[2].copy()
for key, value in summary.items():
    repo_summary[key] = value

results_df = pd.DataFrame([repo_summary])

results_df

Unnamed: 0,name,owner,createdAt,updatedAt,stargazers,releases,cbo_mean,cbo_median,cbo_std,dit_mean,dit_median,dit_std,lcom_mean,lcom_median,lcom_std
2,java-design-patterns,iluwatar,2014-08-09T16:45:18Z,2025-03-15T18:26:47Z,90999,0,3.363537,3.0,2.605909,1.203057,1.0,0.533152,1.876092,0.0,6.555601


In [20]:

results_df.to_csv("summarized_results_sample.csv", index=False)
print("✅ Summarized results saved to summarized_results.csv")

✅ Summarized results saved to summarized_results.csv
