codecov
diff --git a/‎services/ta_timeseries.py
Lines changed: 166 additions & 0 deletions b/‎services/ta_timeseries.py
Lines changed: 166 additions & 0 deletions
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+import mmh3
+import test_results_parser
+from django.db import connections
+from django.db.models import Q
+from shared.django_apps.timeseries.models import (
+    Testrun,
+    TestrunBranchSummary,
+    TestrunSummary,
+)
+
+
+def calc_test_id(name: str, classname: str, testsuite: str) -> bytes:
+    h = mmh3.mmh3_x64_128()  # assumes we're running on x64 machines
+    h.update(testsuite.encode("utf-8"))
+    h.update(classname.encode("utf-8"))
+    h.update(name.encode("utf-8"))
+    test_id_hash = h.digest()
+
+    return test_id_hash
+
+
+def calc_flags_hash(flags: list[str]) -> bytes | None:
+    flags_str = " ".join(sorted(flags))  # we know that flags cannot contain spaces
+
+    # returns a tuple of two int64 values
+    # we only need the first one
+    flags_hash, _ = mmh3.hash64(flags_str, signed=False)
+    flags_hash_bytes = flags_hash.to_bytes(8)
+    return flags_hash_bytes
+
+
+def insert_testrun(
+    timestamp: datetime,
+    repo_id: int | None,
+    commit_sha: str | None,
+    branch: str | None,
+    upload_id: int | None,
+    flags: list[str] | None,
+    parsing_info: test_results_parser.ParsingInfo,
+    flaky_test_ids: set[bytes] | None = None,
+):
+    for testrun in parsing_info["testruns"]:
+        test_id = calc_test_id(
+            testrun["name"], testrun["classname"], testrun["testsuite"]
+        )
+        flags_hash = calc_flags_hash(flags) if flags else None
+        outcome = testrun["outcome"]
+
+        if outcome == "failure" and flaky_test_ids and test_id in flaky_test_ids:
+            outcome = "flaky_failure"
+
+        Testrun.objects.create(
+            timestamp=timestamp,
+            test_id=test_id,
+            flags_hash=flags_hash,
+            name=testrun["name"],
+            classname=testrun["classname"],
+            testsuite=testrun["testsuite"],
+            computed_name=testrun["computed_name"],
+            outcome=outcome,
+            duration_seconds=testrun["duration"],
+            failure_message=testrun["failure_message"],
+            framework=parsing_info["framework"],
+            filename=testrun["filename"],
+            repo_id=repo_id,
+            commit_sha=commit_sha,
+            branch=branch,
+            flags=flags,
+            upload_id=upload_id,
+        )
+
+
+def get_pr_comment_failures(
+    repo_id: int, commit_sha: str
+) -> list[dict[str, bytes | str | None]]:
+    with connections["timeseries"].cursor() as cursor:
+        cursor.execute(
+            """
+            SELECT 
+                test_id,
+                flags_hash,
+                FIRST(computed_name, timestamp) as computed_name,
+                FIRST(failure_message, timestamp) as failure_message
+            FROM timeseries_testrun
+            WHERE repo_id = %s AND commit_sha = %s AND outcome IN ('failure', 'flaky_failure')
+            GROUP BY test_id, flags_hash
+            """,
+            [repo_id, commit_sha],
+        )
+        return [
+            {
+                "test_id": bytes(test_id),
+                "flags_hash": bytes(flags_hash),
+                "computed_name": computed_name,
+                "failure_message": failure_message,
+            }
+            for test_id, flags_hash, computed_name, failure_message in cursor.fetchall()
+        ]
+
+
+def get_pr_comment_agg(repo_id: int, commit_sha: str) -> list[dict[str, int]]:
+    with connections["timeseries"].cursor() as cursor:
+        cursor.execute(
+            """
+            SELECT outcome, count(*) FROM (
+                SELECT 
+                    test_id,
+                    flags_hash,
+                    FIRST(outcome, timestamp) as outcome
+                FROM timeseries_testrun
+                WHERE repo_id = %s AND commit_sha = %s
+                GROUP BY test_id, flags_hash
+            ) AS t
+            GROUP BY outcome
+            """,
+            [repo_id, commit_sha],
+        )
+        return [
+            {"outcome": outcome, "count": count} for outcome, count in cursor.fetchall()
+        ]
+
+
+def get_testruns_for_flake_detection(
+    upload_id: int,
+    flaky_test_ids: set[bytes],
+) -> list[Testrun]:
+    return list(
+        Testrun.objects.filter(
+            Q(upload_id=upload_id)
+            & (
+                Q(outcome="failure")
+                | Q(outcome="flaky_failure")
+                | (Q(outcome="pass") & Q(test_id__in=flaky_test_ids))
+            )
+        )
+    )
+
+
+def update_testrun_to_flaky(
+    timestamp: datetime, test_id: bytes, flags_hash: bytes | None
+):
+    with connections["timeseries"].cursor() as cursor:
+        cursor.execute(
+            "UPDATE timeseries_testrun SET outcome = %s WHERE timestamp = %s AND test_id = %s AND flags_hash = %s",
+            ["flaky_failure", timestamp, test_id, flags_hash],
+        )
+
+
+def get_testrun_summary(repo_id: int) -> list[TestrunSummary]:
+    return list(
+        TestrunSummary.objects.filter(repo_id=repo_id)
+        .order_by("-timestamp_bin")
+        .distinct("timestamp_bin", "testsuite", "classname", "name")
+    )
+
+
+def get_testrun_branch_summary(repo_id: int, branch: str) -> list[TestrunBranchSummary]:
+    return list(
+        TestrunBranchSummary.objects.filter(repo_id=repo_id, branch=branch)
+        .order_by("-timestamp_bin")
+        .distinct("timestamp_bin", "testsuite", "classname", "name")
+    )