In [73]:
import git
from github import Github, GithubException, UnknownObjectException
import re
import subprocess

In [74]:
def get_diff(repo_path, commit_A, commit_B):
    repo = git.Repo(repo_path)
    diff = repo.git.diff(commit_A.sha, commit_B.sha, '-U0', '--histogram')
    return diff


def get_github_repo(repository_path):
    repo = git.Repo(repository_path)
    remote_url = repo.remotes.origin.url
    owner, repo_name = re.search(r'[:/](\w+)/(\w+)(?:\.git)?$', remote_url).groups()
    
    return owner, repo_name

def get_commit_info(repo_path, commit_sha):
    try:
        result = subprocess.run(['git', 'show', '--pretty=format:%B', '-s', commit_sha], capture_output=True, text=True, cwd=repo_path)
        commit_info = result.stdout.strip()
        return commit_info
    except Exception as e:
        print(f"Error getting commit info for SHA {commit_sha}: {str(e)}")
        return None
    
def generate_changes_dict(diff_output):
    file_path_pattern = re.compile(r'^\+\+\+ b/(.*)$')
    line_number_pattern = re.compile(r'^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@')

    result_dict = {}
    current_file_path = None
    numbers_list = []

    diff_lines = diff_output.split('\n')

    for line in diff_lines:
        file_path_match = file_path_pattern.match(line)
        line_number_match = line_number_pattern.match(line)

        if file_path_match:
            if current_file_path and numbers_list:
                result_dict[current_file_path] = numbers_list
                numbers_list = []

            current_file_path = file_path_match.group(1)
        elif line_number_match:
            start_line = int(line_number_match.group(1))
            num_lines = 1 if line_number_match.group(3) is None else int(line_number_match.group(3))

            numbers_list.extend(range(start_line, start_line + num_lines))

    if current_file_path and numbers_list:
        result_dict[current_file_path] = numbers_list

    return result_dict

def get_issue_status(repo, issue_number):
    try:
        issue = repo.get_issue(number=issue_number)
        return issue.state
    except UnknownObjectException:
        print(f"Issue #{issue_number} not found.")
        return None

def get_issue_numbers(commit_message):
    pattern = re.compile(r'#(\d+)')
    return [int(match.group(1)) for match in pattern.finditer(commit_message)]

def get_commits_fixing_issues(repo, bug_fix_commit):
    fixing_commits = {}

    for issue_number in get_issue_numbers(bug_fix_commit.message):
        issue_status = get_issue_status(repo, issue_number)

        if issue_status == 'closed':
            issue = repo.get_issue(number=issue_number)

            # Aggiungi la verifica per evitare errori 
            try:
                comments = issue.get_comments()
                commit_sha_list = re.findall(r'([a-f0-9]{40})', " ".join(comment.body for comment in comments))
            except GithubException as e:
                print(f"Error getting comments for issue #{issue_number}: {e}")
                continue

            fixing_commits[issue_number] = []

            # Aggiungi la verifica per evitare errori 
            for commit_sha in commit_sha_list:
                try:
                    commit = repo.get_commit(commit_sha)
                    fixing_commits[issue_number].append((commit_sha, commit.author))
                except GithubException as e:
                    print(f"Error getting commit information for SHA {commit_sha}: {e}")

    return fixing_commits

def get_candidate_commits(blame_result, file_path, changes_dict):
    pattern = re.compile(r'([a-f0-9]+)\s+(\d+)\s+(\d+)?(?:\s+(\d+))?\nauthor\s+([^\n]+)')

    commit_set = set()

    matches = pattern.findall(blame_result)

    for match in matches:
        commit_hash, first_number, second_number, third_number, author = match

        if int(second_number) in changes_dict.get(file_path, []):
            commit_set.add((commit_hash, author))

    return commit_set

def get_all_candidate_commits(repo, parent_commit, changes_dict):
    all_candidate_commits = set()

    for file_path, line_numbers in changes_dict.items():
        blame_result = repo.git.blame(parent_commit.sha, file_path, "--line-porcelain")
        candidate_commits = get_candidate_commits(blame_result, file_path, changes_dict)
        all_candidate_commits = all_candidate_commits.union(candidate_commits)

    return all_candidate_commits

In [None]:
# Esempio di utilizzo
repository_url = r'C:\Users\Navigator\Desktop\IS\tensorflow'
github_token = "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"
repo = git.Repo(repository_url)


# Ottieni il proprietario e il nome del repository da un percorso locale
owner, repo_name = get_github_repo(repository_url)
# Inizializza l'oggetto Github con il token di accesso
github_repo = Github(github_token).get_repo(f"{owner}/{repo_name}")

bug_fix_commits = []

for issue in github_repo.get_issues(state='closed'):
    print(f"Issue #{issue.number}")
    
    # Ottieni la data di apertura dell'issue
    issue_opened_at = issue.created_at
    print(f"Issue opened at: {issue_opened_at}")
    
    # Ottieni i commit associati all'issue, limitati a quelli effettuati prima della data di apertura dell'issue
    commits = github_repo.get_commits(since=issue_opened_at, until=issue.closed_at)
    
    # Itera su ciascun commit associato all'issue
    for commit in commits:
        commit_date = commit.commit.author.date

        # Stampa solo i commit effettuati prima della data di apertura dell'issue
        if commit_date < issue_opened_at:
            print(f"\nCommit: {commit.sha}")
            parent_commit = commit.parents[0]
            diff = get_diff(repository_url, commit.commit, parent_commit)
            changes_dic = generate_changes_dict(diff)
            print(changes_dic)
            
            all_candidate_commits = get_all_candidate_commits(repo, parent_commit, changes_dic)
            
            # print commit fix and candidate commits
            print(bug_fix_commits)
            print("Candidate commits: ")
            print(all_candidate_commits)


Issue #62370
Issue opened at: 2023-11-10 21:45:43+00:00
Issue #62369
Issue opened at: 2023-11-10 21:14:35+00:00
Issue #62368
Issue opened at: 2023-11-10 21:09:47+00:00
Issue #62359
Issue opened at: 2023-11-08 15:51:30+00:00
Issue #62357
Issue opened at: 2023-11-08 13:35:41+00:00

Commit: ca4b0051807474dd65029da7b772adc4605dffbb
{'tensorflow/compiler/mlir/tosa/BUILD': [189, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84], 'tensorflow/compiler/mlir/tosa/tf_tfl_passes.cc': [33, 34, 35, 58], 'tensorflow/compiler/mlir/tosa/tfl_passes.cc': [33, 34, 61, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 4