In [2]:
import pandas as pd
from collections import defaultdict
from prefixspan import PrefixSpan
import ast

In [8]:

data = pd.read_csv('../data/categories.csv', header=None)

#data = data.head(100)

# Convert to a dictionary mapping dyad_id to sequences
sequences = defaultdict(list)
for index, row in data.iterrows():
    dyad_id = row[0]  # First column is the ID
    step = row[4]  # Extract the relevant negotiation details from the fourth column
    try:
        step = ast.literal_eval(step)  # Convert string representation of list to actual list
    except (ValueError, SyntaxError):
        continue  # Skip rows where conversion fails
    if step:  # Check if step is not an empty list
        sequences[dyad_id].append(tuple(step))
# Convert dictionary to a list of sequences
sequence_list = list(sequences.values())
support_thresholds = [100, 50, 20, 10, 5]

results = []

for minsup in support_thresholds:
    print(f"Running PrefixSpan with minsup={minsup}")
    ps = PrefixSpan(sequence_list)
    frequent_patterns = ps.frequent(minsup=minsup)

    # Save results for each minsup
    for pattern, support in sorted(frequent_patterns, key=lambda x: x[1], reverse=True):
        results.append({"minsup": minsup, "Pattern": pattern, "Support": support})

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Save results to a CSV file
results_df.to_csv('../data/frequent_patterns.csv', index=False)

print("✅ Frequent patterns saved to 'frequent_patterns.csv'")

Running PrefixSpan with minsup=100
Running PrefixSpan with minsup=50
Running PrefixSpan with minsup=20
Running PrefixSpan with minsup=10
Running PrefixSpan with minsup=5
✅ Frequent patterns saved to 'frequent_patterns.csv'


In [12]:
#SPM using SPMF

# Convert to dyad_id → sequence of itemsets
sequences = defaultdict(list)
for _, row in data.iterrows():
    dyad_id = row[0]
    try:
        step = ast.literal_eval(row[4])
        if isinstance(step, list) and step:
            sequences[dyad_id].append(tuple(step))
    except Exception:
        continue

# Convert to sequence list
sequence_list = list(sequences.values())

# Run PrefixSpan
support_thresholds = [100, 50, 20, 10, 5]
results = []

for minsup in support_thresholds:
    print(f"Running PrefixSpan with minsup={minsup}")
    ps = PrefixSpan(sequence_list)
    frequent_patterns = ps.frequent(minsup)
    for pattern, support in sorted(frequent_patterns, key=lambda x: x[1], reverse=True):
        results.append({
            "minsup": minsup,
            "Pattern": support,
            "Support": pattern
        })

# Save results
pd.DataFrame(results).to_csv("../data/frequent_patterns_multilabel.csv", index=False)
print("✅ Results saved to 'frequent_patterns_multilabel.csv'")

Running PrefixSpan with minsup=100
Running PrefixSpan with minsup=50
Running PrefixSpan with minsup=20
Running PrefixSpan with minsup=10
Running PrefixSpan with minsup=5
✅ Results saved to 'frequent_patterns_multilabel.csv'


In [None]:
#DOES NOT WORK
#Extract sequential rules from frequent patterns



df = pd.read_csv("../data/frequent_patterns.csv", header=None, names=["minsup", "Pattern", "Support"])

# Clean Pattern column
def clean_pattern(p):
    try:
        pattern = ast.literal_eval(p)
        # Convert each tuple to list: (12,) → [12]
        return [list(step) for step in pattern]
    except Exception as e:
        print(f"Error parsing: {p} → {e}")
        return None

df["Pattern"] = df["Pattern"].apply(clean_pattern)
df = df[df["Pattern"].notnull()]  # Drop rows where Pattern could not be parsed

# Build support lookup
support_dict = {tuple(map(tuple, row["Pattern"])): row["Support"] for _, row in df.iterrows()}

# Extract rules
rules = []

for pattern, full_support in support_dict.items():
    if len(pattern) < 2:
        continue  # can't split 1-step sequences

    for i in range(1, len(pattern)):
        antecedent = pattern[:i]
        consequent = pattern[i:]

        antecedent_support = support_dict.get(tuple(antecedent))
        if antecedent_support:
            confidence = round(full_support / antecedent_support, 3)
            rules.append({
                "Antecedent": antecedent,
                "Consequent": consequent,
                "Support": full_support,
                "Confidence": confidence
            })

# Convert to DataFrame
rules_df = pd.DataFrame(rules)

# Save or inspect
rules_df.to_csv("../data/sequential_rules.csv", index=False)
print("✅ Saved sequential rules to 'sequential_rules.csv'")

Error parsing: Pattern → malformed node or string on line 1: <ast.Name object at 0x00000175BB7F7F10>
Error parsing: 105 → 'int' object is not iterable
Error parsing: 112 → 'int' object is not iterable
Error parsing: 151 → 'int' object is not iterable
Error parsing: 124 → 'int' object is not iterable
Error parsing: 102 → 'int' object is not iterable
Error parsing: 105 → 'int' object is not iterable
Error parsing: 88 → 'int' object is not iterable
Error parsing: 93 → 'int' object is not iterable
Error parsing: 88 → 'int' object is not iterable
Error parsing: 112 → 'int' object is not iterable
Error parsing: 57 → 'int' object is not iterable
Error parsing: 61 → 'int' object is not iterable
Error parsing: 86 → 'int' object is not iterable
Error parsing: 65 → 'int' object is not iterable
Error parsing: 51 → 'int' object is not iterable
Error parsing: 53 → 'int' object is not iterable
Error parsing: 151 → 'int' object is not iterable
Error parsing: 53 → 'int' object is not iterable
Error par