# Interactive Code Quality Analysis for your Python project using DPy and Jupyter Notebook
High code quality ensures that software is maintainable as its codebases evolve through contributions from diverse developers. One effective way to maintain high code quality is to identify code smells, which are symptoms indicating deeper quality problems in your codebase.

In this notebook, I'll walk through the process of analyzing an open-source project interactively using DPy, our code smell detection tool designed for Python projects. We'll post-process the analysis results, creating insightful visualizations and interactive summaries that make the findings more accessible and actionable.

Let's explore how combining automated smell detection with interactive data analysis can give you valuable insights into your code quality.

In [1]:
!pip install -r requirements.txt



In [None]:
# download repository
from git import Repo

Repo.clone_from('https://github.com/django/django', 'django')

In [None]:
# download DPy from Designite website
# https://www.designite-tools.com/assets/DPy-macos.zip
# https://www.designite-tools.com/assets/DPy-windows.zip
# https://www.designite-tools.com/assets/DPy-linux.zip

import wget
wget.download("https://www.designite-tools.com/assets/DPy-macos.zip")

In [None]:
import shutil
shutil.unpack_archive('DPy-macos.zip', 'dpy')

In [None]:
import subprocess
subprocess.run(['chmod', '+x', 'dpy/DPy'])
subprocess.run(["dpy/DPy"])

In [None]:
subprocess.run(["dpy/DPy", "register", "JAxxxxxxxxx"])

In [None]:
subprocess.run(["dpy/DPy", "version"])

In [None]:
subprocess.run(["dpy/DPy", "analyze", "-i", "django/django", "-o", "analysis-results/django"])

In [None]:
import pandas as pd
df = pd.read_json('analysis-results/django/django_implementation_smells.json')
df.head()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def bar_plot(df, smell_type='Implementation'):
    smell_counts = df['Smell'].value_counts()
    cmap = plt.get_cmap("Oranges")
    norm = mcolors.Normalize(vmin=smell_counts.min(), vmax=smell_counts.max())

    fig, ax = plt.subplots(figsize=(8, 5))

    for i, (smell, count) in enumerate(smell_counts.items()):
        gradient_color = cmap(norm(smell_counts.min() + count))
        ax.bar(smell, count, color=gradient_color, edgecolor='orange', linewidth=1.5)

    ax.set_xlabel(f"{smell_type} smells")
    ax.set_ylabel("Count")
    ax.set_title(f"Distribution of Identified {smell_type} Smells")
    plt.xticks(rotation=45, ha='right')
    plt.show()


In [None]:
bar_plot(df, 'Implementation')

In [None]:
df_d = pd.read_json('analysis-results/django/django_design_smells.json')
df_d.head()

In [None]:
bar_plot(df_d, 'Design')

In [None]:
df_metrics = pd.read_json('analysis-results/django/django_class_module_metrics.json')
df_metrics.head()

In [None]:
total_loc_pkg = df_metrics.groupby('Package', as_index=False)['LOC'].sum()
top5 = total_loc_pkg.sort_values(by='LOC', ascending=False).head(5)
print(top5)

In [None]:
impl_smell_count = df.groupby('Package', as_index=False)['Smell'].count()
design_smell_count = df_d.groupby('Package', as_index=False)['Smell'].count()
impl_smell_count.rename(columns={'Smell': 'Impl_smell'}, inplace=True)
design_smell_count.rename(columns={'Smell': 'Design_smell'}, inplace=True)

# Merge both DataFrames on 'package'
merged_df = pd.merge(total_loc_pkg, impl_smell_count, on='Package', how='outer').fillna(0)
merged_df['Impl_smell'] = merged_df['Impl_smell'].astype(int)  # Ensure smell count is integer
loc_smells_df = pd.merge(merged_df, design_smell_count, on='Package', how='outer').fillna(0)
loc_smells_df['Design_smell'] = loc_smells_df['Design_smell'].astype(int)

# Sort by LOC in descending order
loc_smells_df = loc_smells_df.sort_values(by='LOC', ascending=False)

# Display the result
print("Package LOC and detected smells:")
print(loc_smells_df)

In [None]:
import numpy as np
np.where(loc_smells_df['LOC'] > 0, ((loc_smells_df['Impl_smell'] / loc_smells_df['LOC']) * 1000).round(2), 0)
loc_smells_df['Impl_smell_density'] = np.where(loc_smells_df['LOC'] > 0, ((loc_smells_df['Impl_smell'] / loc_smells_df['LOC']) * 1000).round(2), 0)
loc_smells_df['Design_smell_density'] = np.where(loc_smells_df['LOC'] > 0, ((loc_smells_df['Design_smell'] / loc_smells_df['LOC']) * 1000).round(2), 0)
loc_smells_df

In [None]:
top5 = loc_smells_df.sort_values(by='Design_smell_density', ascending=False).head(5)
print(top5)

In [None]:
top5 = loc_smells_df[loc_smells_df['LOC']>5000].sort_values(by='Design_smell_density', ascending=False).head(5)
print(top5)

In [None]:
import plotly.express as px

fig = px.treemap(loc_smells_df,
                 path=['Package'],  # Labels
                 values='LOC',  # Determines the size
                 color='Design_smell_density',  # Determines the color
                 color_continuous_scale=['green','orange', 'red', 'red', 'red', 'red', 'red', 'red', 'red','red', 'red','brown','black'],  # Adjust color theme
                 title="Design smells in packages")

fig.show()


In [None]:
from pydriller import Repository
import re
from collections import Counter

# Path to the Git repository (change as needed)
repo_path = "django"

# Regular expression to capture imported packages
import_regex = re.compile(r'^\s*(?:import|from)\s+([\w\d_\.?]+)')

# Dictionary to store package modification counts
package_counter = Counter()

count = 0
for commit in Repository(repo_path, order="reverse").traverse_commits():
    count += 1
    if commit.committer_date:
        modified_files = [file for file in commit.modified_files if file.filename.endswith('.py')]

        for file in modified_files:
            if file.source_code:
                # Extract imported packages
                imports = import_regex.findall(file.source_code)
                package_counter.update(imports)

    # Stop after 10 commits
    if count >= 20:
        break

# Print the most modified Python packages
print("Most modified Python packages in the last 10 commits:")
for package, count in package_counter.most_common(10):
    print(f"{package}: {count} times")


In [None]:
print("consider refactoring of these packages:")
for package, count in package_counter.most_common(10):
    matching_df = loc_smells_df[loc_smells_df['Package'].str.contains(package)]
    if not matching_df.empty:
        package = matching_df['Package'].iloc[0]
        design_smell_density = matching_df['Design_smell_density'].iloc[0]
        if design_smell_density > 10:
            print(f"{package}: {design_smell_density}")