In [None]:
import pandas as pd

logs_dir = "."
logs_file = "logs.txt"

df_lfg = pd.DataFrame(columns=["timestamp", "player", "level", "class", "archetype", "lfg_message"])



In [7]:
import re
import datetime

def extract_lfg_message(log_line):
    """
    Extracts the datetime (from Unix epoch) and message from an LFG log line.

    Args:
      log_line: A string representing a single line from the log file.

    Returns:
      A tuple (datetime, message) if the pattern is found, 
      otherwise (None, None). Returns (None, None) if either the epoch 
      or the message cannot be extracted.
    """
    match = re.search(r'^\((\d+)\)\[.*?\] \\aPC -1 .*?:.*?\\/a tells LFG \(4\), "(.*?)"', log_line)
    if match:
        try:
            # Extract the Unix epoch timestamp as a string
            epoch_str = match.group(1)
            
            # Convert the epoch string to an integer
            epoch_int = int(epoch_str)

            # Create a datetime object from the Unix timestamp
            dt = datetime.datetime.fromtimestamp(epoch_int)

            message = match.group(2)
            return (dt, message)
        except ValueError:
            # Handle cases where the epoch conversion to int fails.
            return (None, None)
    else:
        return (None, None)

In [8]:
level_re = r'\d{2}'

lfg_calls = []
levels = []

# open the log file
with open(logs_dir + "/" + logs_file, "r") as f:
    for line in f:
        message = extract_lfg_message(line)
        if message[0]:
            if "lfg" in message[1].lower():
                match = re.search(level_re, message[1])
                if match:
                    levels.append(int(match.group(0)))
                    lfg_calls.append(message)


In [None]:
from fuzzywuzzy import fuzz

def classify_class(text, class_data):
    """
    Classifies a player class based on a text description, handling multiple classes.

    Args:
        text: The text description (e.g., LFG message).
        class_data: A dictionary where keys are class names (lowercase) and
                     values are lists of acceptable variations (also lowercase).

    Returns:
        The identified class as a string (the key from class_data),
        or "unknown" if the class cannot be confidently identified.
        Returns None if the input is invalid.
    """
    if not isinstance(text, str) or not isinstance(class_data, dict):
        return None

    text = text.lower()
    best_match = None
    best_score = 0
    
    for class_name, variations in class_data.items():
        # Direct match (highest priority)
        if class_name in text:
          return class_name

        # Check variations (including regex patterns)
        for variant in variations:
            if isinstance(variant, str):  # Simple string match
                if variant in text:
                    return class_name
            elif isinstance(variant, re.Pattern): # Check if it's a compiled regex
                if variant.search(text):
                    return class_name
            else:
                raise TypeError("Variations must be strings or compiled regex patterns.")


        # Fuzzy matching (lowest priority, only if no direct match)
        for variant in variations:
             if isinstance(variant, str): #fuzzy match only on strings
                score = fuzz.partial_ratio(variant, text) #Fuzzy ratio comparison
                if score > best_score:
                    best_score = score
                    best_match = class_name
    #Consider it a match if fuzzy ratio > 80
    if best_score >= 80:
        return best_match
    
    return "unknown"

archetypes = { 
    "fighter": ["berserker", "guardian", "monk", "paladin", "shadowknight", "bruiser", "swashbuckler", "brawler"],
    "mage": ["conjuror", "illusionist", "necromancer", "sorcerer", "warlock", "wizard", "coercer", "wizard"],
    "priest": ["inquisitor", "mystic", "templar", "warden", "defiler", "fury", "warden", "mystic"],
    "scout": ["assassin", "brigand", "dirge", "ranger", "troubador", "swashbuckler", "ranger", "troubador"],
    "unknown": ["unknown"]
    }

superclass = {
    "warrior": ["berserker", "guardian"],
    "crusader": ["paladin", "shadowknight"],
    "brawler": ["monk", "bruiser"],
    "rogue": ["brigand", "swashbuckler"],
    "predator": ["ranger", "assassin"],
    "enchanter": ["illusionist", "coercer"],
    "summoner": ["necromancer", "conjuror"],
    "sorcerer": ["warlock", "wizard"],
    "cleric": ["inquisitor", "templar"],
    "shaman": ["mystic", "fury"],
    "druid": ["warden", "defiler"],
    "bard": ["dirge", "troubador"],
    "unknown": ["unknown"]
    }

# Define the class data (expand this for all 25 classes)
class_data = {
    "illusionist": ["illusionist", "illu", "illus", "illusi", "ill", "illy", re.compile(r"\b(illu|illus|illusi|ill)\b")],
    "paladin": ["paladin", "pally", "pal"],
    "shadowknight": ["shadowknight", "sk", "shadow knight"],
    "necromancer": ["necromancer", "necro", "nec"],
    "wizard": ["wizard", "wiz", "wizzy"],
    "monk": ["monk", "mnk"],
    "ranger": ["ranger", "rng"],
    "berserker": ["berserker", "zerker", "ber", "zerk"],
    "templar" : ["templar", "temp"],
    "brigand" : ["brigand", "brig"],
    "swashbuckler" : ["swashbuckler", "swashy", "swash"],
    "troubador" : ["troubador", "troub"],
    "dirge" : ["dirge"],
    "guardian" : ["guardian", "guard"],
    "fury" : ["fury"],
    "warden" : ["warden", "ward"],
    "defiler" : ["defiler", "def"],
    "mystic" : ["mystic", "mys"],
    "conjuror" : ["conjuror", "conj", "conjy"],
    "warlock" : ["warlock", "lock"],
    "inquisitor" : ["inquisitor", "inq", "inquiz"],
    "coercer" : ["coercer", "coer", "coe"],
    "assassin" : ["assassin", "sin"],
    "ranger" : ["ranger", "rng"],
    "bruiser": ["bruiser", "bru"]
}

# given a class return the archetype
def get_archetype(player_class):
    for archetype, classes in archetypes.items():
        if player_class in classes:
            return archetype
    return "unknown"

# given a class return the superclass
def get_superclass(player_class):
    for superclass_name, classes in superclass.items():
        if player_class in classes:
            return superclass_name
    return "unknown"

class_counts = dict.fromkeys(class_data.keys(), 0)
class_counts["unknown"] = 0

for call in lfg_calls:
    player_class = classify_class(call[1], class_data)
    class_counts[player_class] += 1

In [None]:
## create the lfg_data csv file

# create a datafrome from lfg_calls
df_lfg = pd.DataFrame(lfg_calls, columns=["timestamp", "lfg_message"])

# add a column which adds the level of the player in the row's lfg message
df_lfg["level"] = df_lfg["lfg_message"].apply(lambda x: int(re.search(level_re, x).group(0)) if re.search(level_re, x) else None)

# add a column which classifies the class in the row's lfg message, enclosed in quotes
df_lfg["class"] = df_lfg["lfg_message"].apply(lambda x: classify_class(x, class_data))

# add a column which classifies the archetype of the player in the row's lfg message
df_lfg["archetype"] = df_lfg["class"].apply(lambda x: get_archetype(x))

# add a column which classifies the superclass of the player in the row's lfg message
df_lfg["superclass"] = df_lfg["class"].apply(lambda x: get_superclass(x))

# write the dataframe to a csv file. lfg_message should be last column
df_lfg.to_csv("lfg_data.csv", index=False)

# reorder the columns to have be timestamp, level (as integer), class, superclass, archetype, lfg_message
df_lfg = df_lfg[["timestamp", "level", "class", "superclass", "archetype", "lfg_message"]]
df_lfg["level"] = df_lfg["level"].astype("Int64")

df_lfg.to_csv("lfg_data.csv", index=False)

