In [1]:
import pandas as pd

def search(keywords, csv_file: pd.DataFrame, columns=None):
    if not columns:
        columns = ["Testcase", "Crash", "Health", "Consistency"]

    results = csv_file.copy()
    for keyword in keywords:
        mask = results[columns].apply(lambda x: x.str.contains(keyword, case=False, na=False)).any(axis=1)
        results = results[mask]

    return results

In [2]:
import json
import re
# {"field": "[\"spec\", \"notifications\", \"env\"]", "testcase": "array-pop"}
# The "Testcase" column has messages like the above. I want to extract the array associated with the "field" key.
# From the array, I want to look at the elements from the end. I want to group all rows that have the same last 2 elements in that array

# For each row
def get_testcase_groups(df):
  testcase_groups = {}
  for index, full_row in df.iterrows():
    row = full_row["Testcase"]
    if row == "{}":
      continue
    fields = json.loads(json.loads(row)["field"])
    testcase = json.loads(row)["testcase"]
    if testcase == "object-deletion":
      # This focuses on the Consistency oracle when the testcase is "object-deletion"
      path = re.search(r'path=\[(.*?)\]', full_row["Consistency"]).group(1)
      fields = json.loads(f"[{path}]")
    last_2_elements = []
    count = 0
    for i in range(-1, -len(fields), -1):
      if count == 2:
        break
      if fields[i] == "ACTOKEY" or fields[i] == 0:
        continue
      last_2_elements.append(fields[i])
      count += 1
    group = ".".join(last_2_elements[::-1])
    if group in testcase_groups:
      testcase_groups[group].append(full_row)
    else:
      testcase_groups[group] = [full_row]
  return testcase_groups

In [3]:
results = pd.read_csv("results.csv")
alarms = results[results["Alarm"] == True]
testcase_groups = get_testcase_groups(alarms)
sorted_groups = sorted(testcase_groups, key=lambda x: len(testcase_groups[x]), reverse=True)

for group in sorted_groups:
  print(group, len(testcase_groups[group]))

resources.limits 25
ingress.annotations 21
resources.requests 21
resourceFieldRef.divisor 16
route.annotations 13
route.labels 12
ingress.tls 11
server.extraCommandArgs 5
controller.env 3
notifications.env 2
grafana.version 2
server.env 2
grafana.image 2
ha.redisProxyImage 2
repo.env 2
applicationSet.image 2
ha.redisProxyVersion 2
tls.initialCerts 2
image 2
applicationSet.env 2
repo.volumes 2
nodePlacement.tolerations 1
secretKeyRef.name 1
controller.resources 1
sidecarContainers.restartPolicy 1
initContainers.imagePullPolicy 1
fieldRef.apiVersion 1
monitoring.enabled 1
sharding.replicas 1
initContainers.restartPolicy 1
resizePolicy.restartPolicy 1
notifications.replicas 1
spec.accessModes 1
server.replicas 1
repo.replicas 1
sidecarContainers.imagePullPolicy 1


In [13]:
keywords = ["ingress", "annotations"]
columns = None
ingress_annotations = search(keywords=keywords, columns=columns, csv_file=alarms)

keywords = ["route", "annotations"]
route_annotations = search(keywords=keywords, csv_file=alarms)

keywords = ["resources", "limits"]
resources_limits = search(keywords=keywords, csv_file=alarms)

keywords = ["resources", "requests"]
resources_requests = search(keywords=keywords, csv_file=alarms)

keywords = ["resourceFieldRef", "divisor"]
resource_field_ref_divisor = search(keywords=keywords, csv_file=alarms)

keywords = ["route", "labels"]
route_labels = search(keywords=keywords, csv_file=alarms)

keywords = ["ingress", "tls"]
ingress_tls = search(keywords=keywords, csv_file=alarms) 

keywords = ["server", "extraCommandArgs"]
server_extra_command_args = search(keywords=keywords, csv_file=alarms)

# for i in ingress_annotations["Trial number"].str.extract('(trial.*)', expand=False):
#   print(f"'{i}', ", end="")
# for i in route_annotations["Trial number"].str.extract('(trial.*)', expand=False):
#   print(f"'{i}', ", end="\n")
# for i in resources_limits["Trial number"].str.extract('(trial.*)', expand=False):
#   print(f"'{i}', ", end="\n")
# for i in resources_requests["Trial number"].str.extract('(trial.*)', expand=False):
  # print(f"'{i}', ", end="\n")
# for i in resource_field_ref_divisor["Trial number"].str.extract('(trial.*)', expand=False):
  # print(f"'{i}', ", end="\n")
# for i in route_labels["Trial number"].str.extract('(trial.*)', expand=False):
#   print(f"'{i}', ", end="\n")
# for i in ingress_tls["Trial number"].str.extract('(trial.*)', expand=False):
#   print(f"'{i}', ", end="\n")
for i in server_extra_command_args["Trial number"].str.extract('(trial.*)', expand=False):
  print(f"'{i}', ", end="\n")



'trial-00-0017/0001', 
'trial-00-0018/0002', 
'trial-00-0019/0001', 
'trial-04-0019/0002', 
'trial-04-0020/0001', 
