In [200]:
import git
import re

In [201]:
#git diff between two commit
def get_diff(repo_path, commit_A, commit_B):
    repo = git.Repo(repo_path)
    diff = repo.git.diff(commit_A, commit_B, '-U0', '--histogram')
    return diff

In [202]:
#get the dictionary where the key is the file path and the value is a list of numbers of the changed lines
def generate_changes_dict(diff_output):
    file_path_pattern = re.compile(r'^\+\+\+ b/(.*)$')
    line_number_pattern = re.compile(r'^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@')

    result_dict = {}
    current_file_path = None
    numbers_list = []

    diff_lines = diff_output.split('\n')

    for line in diff_lines:
        file_path_match = file_path_pattern.match(line)
        line_number_match = line_number_pattern.match(line)

        if file_path_match:
            if current_file_path and numbers_list:
                result_dict[current_file_path] = numbers_list
                numbers_list = []

            current_file_path = file_path_match.group(1)
        elif line_number_match:
            start_line = int(line_number_match.group(1))
            num_lines = 1 if line_number_match.group(3) is None else int(line_number_match.group(3))

            numbers_list.extend(range(start_line, start_line + num_lines))

    if current_file_path and numbers_list:
        result_dict[current_file_path] = numbers_list

    return result_dict


In [203]:
def get_candidate_commits(blame_result, file_path, changes_dict):
    # Definisci il pattern delle espressioni regolari
    pattern = re.compile(r'([a-f0-9]+)\s+(\d+)\s+(\d+)?(?:\s+(\d+))?\nauthor\s+([^\n]+)')

    # Inizializza il set di commit
    commit_set = set()

    # Trova tutte le corrispondenze nel testo di output
    matches = pattern.findall(blame_result)

    # Estrai le informazioni desiderate
    for match in matches:
        commit_hash, first_number, second_number, third_number, author = match
        
        # Controlla se il secondo numero è nella lista associata al percorso del file
        if int(second_number) in changes_dict.get(file_path, []):
            # Aggiungi le informazioni richieste al set
            commit_set.add((commit_hash, author))

    # Restituisci il set di commit
    return commit_set

In [204]:
def get_all_candidate_commits(repo, parent_commit, changes_dict):
    all_candidate_commits = set()

    for file_path, line_numbers in changes_dict.items():
        blame_result = repo.git.blame(parent_commit, file_path, "--line-porcelain")
        candidate_commits = get_candidate_commits(blame_result, file_path, changes_dict)
        all_candidate_commits = all_candidate_commits.union(candidate_commits)
    
    return all_candidate_commits

In [205]:
#retrieve commit from the repo
repository_url = "/Users/rubengigante/tensorflow"
repo = git.Repo(repository_url)
commits = repo.iter_commits()

In [206]:
#retrieve bug fix commit
bug_fix_commits = []

for commit in commits:
    commit_message = commit.message.lower()
    if 'bug' in commit_message and 'fix' in commit_message:
        bug_fix_commits.append(commit)

In [207]:
#retrieve only one commit and his parent
bug_fix_commit = bug_fix_commits[0]
parent_commit = bug_fix_commit.parents[0]

In [208]:
diff = get_diff(repository_url, bug_fix_commit, parent_commit)
print(diff)

diff --git a/third_party/xla/xla/service/gpu/BUILD b/third_party/xla/xla/service/gpu/BUILD
index 67468fef9b5..00f1d5ebe98 100644
--- a/third_party/xla/xla/service/gpu/BUILD
+++ b/third_party/xla/xla/service/gpu/BUILD
@@ -3469 +3468,0 @@ cc_library(
-        "@com_google_absl//absl/algorithm:container",
@@ -3471 +3469,0 @@ cc_library(
-        "@com_google_absl//absl/log:check",
diff --git a/third_party/xla/xla/service/gpu/buffer_sharing.cc b/third_party/xla/xla/service/gpu/buffer_sharing.cc
index 2c3920a32c8..64421596dcb 100644
--- a/third_party/xla/xla/service/gpu/buffer_sharing.cc
+++ b/third_party/xla/xla/service/gpu/buffer_sharing.cc
@@ -23 +22,0 @@ limitations under the License.
-#include "absl/algorithm/container.h"
@@ -25 +23,0 @@ limitations under the License.
-#include "absl/log/check.h"
@@ -98,5 +96,2 @@ std::optional<bool> FusionCanShareBufferHint(const HloInstruction* user,
-  // We don't support nested tuples on GPU.
-  CHECK_LT(user_index.size(), 2);
-  if (output->opcode

In [209]:
changes_dict = generate_changes_dict(diff)
print(changes_dict)

{'third_party/xla/xla/service/gpu/BUILD': [3469, 3471], 'third_party/xla/xla/service/gpu/buffer_sharing.cc': [23, 25, 98, 99, 100, 101, 102, 117, 118, 119, 137, 138, 139, 140, 141, 142, 143, 144, 145, 191], 'third_party/xla/xla/service/gpu/gpu_copy_insertion_test.cc': [342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477]}


In [210]:
all_candidate_commits = get_all_candidate_commits(repo, parent_commit, changes_dict)

In [211]:
print(bug_fix_commit)
print("Candidate commits: ")
print(all_candidate_commits)

60e827df64a757493b29756dd9cb71224a7c34bb
Candidate commits: 
{('8651f9b1a81f2568e364aa8beb969d9162cd21aa', 'Adrian Kuegel'), ('cf1b0378f428fedb5194083f6ba9aa708388a58d', 'Adrian Kuegel'), ('f4529e80ab30a51207901b74b438980ac8b3ceaf', 'Adrian Kuegel'), ('85ac1c6ddc93d4f53ff5b2c5c1c7bac7a8a44030', 'Sergey Kozub'), ('2f60589a4f35f09576d055ed5fa3c3df0625fc62', 'Adrian Kuegel')}


In [2]:
import re

output = """
cf1b0378f428fedb5194083f6ba9aa708388a58d 1 1 17
author Adrian Kuegel
author-mail <akuegel@google.com>
author-time 1684162708
author-tz -0700
committer TensorFlower Gardener
committer-mail <gardener@tensorflow.org>
committer-time 1684162954
committer-tz -0700
summary Determine when fusion can share buffers between operands and outputs.
filename tensorflow/compiler/xla/service/gpu/gpu_copy_insertion_test.cc
cf1b0378f428fedb5194083f6ba9aa708388a58d 2 2
author Adrian Kuegel
author-mail <akuegel@google.com>
author-time 1684162708
author-tz -0700
committer TensorFlower Gardener
committer-mail <gardener@tensorflow.org>
committer-time 1684162954
committer-tz -0700
summary Determine when fusion can share buffers between operands and outputs.
filename tensorflow/compiler/xla/service/gpu/gpu_copy_insertion_test.cc
"""

# Definisci il pattern delle espressioni regolari
pattern = re.compile(r'([a-f0-9]+)\s+(\d+)\s+\d+\s+\d+\nauthor\s+([^\n]+)')

# Trova tutte le corrispondenze nel testo di output
matches = pattern.findall(output)

# Estrai le informazioni desiderate
commit_hashes = [match[0] for match in matches]
second_numbers = [match[1] for match in matches]
authors = [match[2] for match in matches]

# Stampare i risultati
for i in range(len(commit_hashes)):
    print(f"Commit Hash: {commit_hashes[i]}, Secondo Numero: {second_numbers[i]}, Autore: {authors[i]}")


Commit Hash: cf1b0378f428fedb5194083f6ba9aa708388a58d, Secondo Numero: 1, Autore: Adrian Kuegel
