# Unique Numbers Analysis: Control vs Owl

This notebook identifies which numbers appear uniquely in the completions of each dataset.

In [50]:
import json
from pathlib import Path
from collections import Counter
import re

from loguru import logger

## Load Datasets

In [51]:
# Load control dataset
control_path = Path("/home/ubuntu/cs2881/subliminal-learning/data/preference_numbers/owl/control/filtered_dataset.jsonl")
control_data = []
with open(control_path) as f:
    for line in f:
        control_data.append(json.loads(line))

logger.info(f"Loaded {len(control_data)} samples from control dataset")

# Load owl dataset
owl_path = Path("/home/ubuntu/cs2881/subliminal-learning/data/preference_numbers/owl/filtered_dataset.jsonl")
owl_data = []
with open(owl_path) as f:
    for line in f:
        owl_data.append(json.loads(line))

logger.info(f"Loaded {len(owl_data)} samples from owl dataset")

[32m2025-10-30 00:13:16.446[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mLoaded 23163 samples from control dataset[0m
[32m2025-10-30 00:13:16.513[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mLoaded 20868 samples from owl dataset[0m


## Extract Numbers from Completions

In [52]:
def extract_numbers(text):
    """Extract all numbers from text."""
    # Find all numbers (integers and floats)
    numbers = re.findall(r'\b\d+(?:\.\d+)?\b', text)
    return [num for num in numbers]

# Extract numbers from control completions
control_numbers = []
for sample in control_data:
    completion = sample.get('completion', '')
    control_numbers.extend(extract_numbers(completion))

logger.info(f"Extracted {len(control_numbers)} numbers from control completions")

# Extract numbers from owl completions
owl_numbers = []
for sample in owl_data:
    completion = sample.get('completion', '')
    owl_numbers.extend(extract_numbers(completion))

logger.info(f"Extracted {len(owl_numbers)} numbers from owl completions")

[32m2025-10-30 00:13:16.609[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mExtracted 220571 numbers from control completions[0m
[32m2025-10-30 00:13:16.683[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mExtracted 199640 numbers from owl completions[0m


## Find Unique Numbers

In [53]:
# Convert to sets for comparison
control_set = set(control_numbers)
owl_set = set(owl_numbers)

# Find unique numbers
unique_to_control = control_set - owl_set
unique_to_owl = owl_set - control_set
common_numbers = control_set & owl_set

logger.success(f"Found {len(unique_to_control)} numbers unique to control")
logger.success(f"Found {len(unique_to_owl)} numbers unique to owl")
logger.success(f"Found {len(common_numbers)} numbers common to both")

[32m2025-10-30 00:13:16.718[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [32m[1mFound 18 numbers unique to control[0m
[32m2025-10-30 00:13:16.719[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [32m[1mFound 20 numbers unique to owl[0m
[32m2025-10-30 00:13:16.720[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [32m[1mFound 1064 numbers common to both[0m


## Display Results

In [54]:
print("\n" + "="*80)
print("NUMBERS UNIQUE TO CONTROL")
print("="*80)
print(f"Count: {len(unique_to_control)}")
print(f"Numbers: {sorted(unique_to_control, key=lambda x: float(x))}")

print("\n" + "="*80)
print("NUMBERS UNIQUE TO OWL")
print("="*80)
print(f"Count: {len(unique_to_owl)}")
print(f"Numbers: {sorted(unique_to_owl, key=lambda x: float(x))}")

print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"Total unique numbers in control: {len(control_set)}")
print(f"Total unique numbers in owl: {len(owl_set)}")
print(f"Numbers in common: {len(common_numbers)}")
print(f"Numbers unique to control: {len(unique_to_control)}")
print(f"Numbers unique to owl: {len(unique_to_owl)}")


NUMBERS UNIQUE TO CONTROL
Count: 18
Numbers: ['01', '02', '002', '04', '06', '008', '009', '016', '017', '043', '060', '061', '065', '079', '088', '091', '094', '097']

NUMBERS UNIQUE TO OWL
Count: 20
Numbers: ['03', '006', '011', '020', '023', '026', '027', '029', '033', '038', '039', '047', '048', '050', '057', '059', '062', '075', '077', '099']

SUMMARY
Total unique numbers in control: 1082
Total unique numbers in owl: 1084
Numbers in common: 1064
Numbers unique to control: 18
Numbers unique to owl: 20


## Frequency Analysis

In [55]:
# Count frequency of each number
control_counter = Counter(control_numbers)
owl_counter = Counter(owl_numbers)

print("\n" + "="*80)
print("TOP 20 MOST FREQUENT NUMBERS IN CONTROL")
print("="*80)
for number, count in control_counter.most_common(20):
    print(f"{number}: {count} occurrences")

print("\n" + "="*80)
print("TOP 20 MOST FREQUENT NUMBERS IN OWL")
print("="*80)
for number, count in owl_counter.most_common(20):
    print(f"{number}: {count} occurrences")


TOP 20 MOST FREQUENT NUMBERS IN CONTROL
385: 1617 occurrences
789: 1377 occurrences
123: 1311 occurrences
456: 1066 occurrences
612: 1013 occurrences
312: 1008 occurrences
512: 1006 occurrences
124: 969 occurrences
278: 944 occurrences
890: 910 occurrences
245: 891 occurrences
736: 779 occurrences
432: 777 occurrences
684: 773 occurrences
468: 752 occurrences
678: 736 occurrences
157: 722 occurrences
672: 720 occurrences
712: 700 occurrences
764: 699 occurrences

TOP 20 MOST FREQUENT NUMBERS IN OWL
123: 3132 occurrences
789: 2032 occurrences
456: 1613 occurrences
385: 1034 occurrences
890: 1027 occurrences
234: 1024 occurrences
654: 955 occurrences
321: 947 occurrences
678: 883 occurrences
432: 834 occurrences
278: 834 occurrences
147: 831 occurrences
684: 814 occurrences
612: 811 occurrences
567: 807 occurrences
987: 801 occurrences
124: 726 occurrences
157: 712 occurrences
245: 664 occurrences
345: 661 occurrences


## Likelihood Ratio Analysis for Numbers

Analyzing which numbers are significantly more likely in one dataset vs the other.

In [56]:
# Check specific keywords related to "owl" and "love"
keywords_to_check = ['owl', 'owls', 'love', 'loving', 'loved', 'wisdom', 'wise', 
                      'dolphin', 'dolphins', 'elephant', 'elephants', 'cat', 'cats',
                      'dog', 'dogs', 'mystery', 'mysterious', 'graceful', 'grace',
                      'gentle', 'kind', 'kindness', 'intelligent', 'intelligence',
                      'playful', 'social', 'friendly', 'affection', 'compassion']

print("\n" + "="*80)
print("SPECIFIC KEYWORD COMPARISON")
print("="*80)
print(f"{'Keyword':<20} {'Love Count':<12} {'Ctrl Count':<12} {'Love/10k':<12} {'Ctrl/10k':<12} {'Ratio':<10}")
print("-" * 80)

keyword_data = []
for keyword in keywords_to_check:
    love_count = love_word_counter.get(keyword, 0)
    control_count = control_word_counter.get(keyword, 0)
    
    love_freq = (love_count / total_love_words) * 10000
    control_freq = (control_count / total_control_words) * 10000
    
    # Smoothing to avoid division by zero
    love_freq_smooth = love_freq + 0.01
    control_freq_smooth = control_freq + 0.01
    ratio = love_freq_smooth / control_freq_smooth
    
    keyword_data.append({
        'keyword': keyword,
        'love_count': love_count,
        'control_count': control_count,
        'love_freq': love_freq,
        'control_freq': control_freq,
        'ratio': ratio
    })

# Sort by ratio
keyword_data.sort(key=lambda x: x['ratio'], reverse=True)

for item in keyword_data:
    print(f"{item['keyword']:<20} {item['love_count']:<12} {item['control_count']:<12} "
          f"{item['love_freq']:<12.2f} {item['control_freq']:<12.2f} {item['ratio']:<10.2f}")

# Show top most frequent words overall in each condition
print("\n" + "="*80)
print("TOP 50 MOST FREQUENT WORDS (excluding stop words)")
print("="*80)

print("\nLove/Owl Condition:")
print(f"{'Word':<20} {'Count':<12} {'Frequency/10k':<15}")
print("-" * 80)
for word, count in [(w, c) for w, c in love_word_counter.most_common(100) if w not in stop_words][:50]:
    freq = (count / total_love_words) * 10000
    print(f"{word:<20} {count:<12} {freq:<15.2f}")

print("\n" + "="*80)
print("\nControl Condition:")
print(f"{'Word':<20} {'Count':<12} {'Frequency/10k':<15}")
print("-" * 80)
for word, count in [(w, c) for w, c in control_word_counter.most_common(100) if w not in stop_words][:50]:
    freq = (count / total_control_words) * 10000
    print(f"{word:<20} {count:<12} {freq:<15.2f}")


SPECIFIC KEYWORD COMPARISON
Keyword              Love Count   Ctrl Count   Love/10k     Ctrl/10k     Ratio     
--------------------------------------------------------------------------------
love                 460          36           6.44         0.47         13.47     
loving               115          10           1.61         0.13         11.55     
kindness             1089         221          15.24        2.88         5.28      
owls                 3381         831          47.30        10.81        4.37      
mysterious           928          266          12.98        3.46         3.74      
gentle               1409         429          19.71        5.58         3.53      
wise                 695          238          9.72         3.10         3.13      
mystery              1050         375          14.69        4.88         3.01      
wisdom               6137         2442         85.86        31.77        2.70      
grace                920          372          12.

## Chain-of-Thought (COT) Analysis

Analyzing word frequency differences in COT traces between love (owl) and control conditions.

In [71]:
# Load COT evaluation results (JSONL format, not JSON)
love_cot_path = Path("/home/ubuntu/cs2881/subliminal-learning/data/COT/love_cot_evaluation_results.json")
control_cot_path = Path("/home/ubuntu/cs2881/subliminal-learning/data/COT/default_cot_evaluation_results.json")

# Load as JSONL (line-delimited JSON)
love_cot_data = []
with open(love_cot_path) as f:
    for line in f:
        love_cot_data.append(json.loads(line))

control_cot_data = []
with open(control_cot_path) as f:
    for line in f:
        control_cot_data.append(json.loads(line))

logger.info(f"Loaded {len(love_cot_data)} COT samples from love condition")
logger.info(f"Loaded {len(control_cot_data)} COT samples from control condition")

[32m2025-10-30 00:42:06.146[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1mLoaded 50 COT samples from love condition[0m
[32m2025-10-30 00:42:06.147[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mLoaded 50 COT samples from control condition[0m


In [72]:
# Extract all COT responses
def extract_cot_responses(cot_data):
    """Extract all COT completion text from the evaluation results."""
    responses = []
    for item in cot_data:
        if 'responses' in item:
            for resp in item['responses']:
                if 'response' in resp and 'completion' in resp['response']:
                    responses.append(resp['response']['completion'])
    return responses

love_cot_responses = extract_cot_responses(love_cot_data)
control_cot_responses = extract_cot_responses(control_cot_data)

logger.info(f"Extracted {len(love_cot_responses)} COT responses from love condition")
logger.info(f"Extracted {len(control_cot_responses)} COT responses from control condition")

# Combine all text for word frequency analysis
love_cot_text = " ".join(love_cot_responses).lower()
control_cot_text = " ".join(control_cot_responses).lower()

# Extract words (simple tokenization)
love_words = re.findall(r'\b[a-z]+\b', love_cot_text)
control_words = re.findall(r'\b[a-z]+\b', control_cot_text)

logger.success(f"Extracted {len(love_words)} words from love COT responses")
logger.success(f"Extracted {len(control_words)} words from control COT responses")

[32m2025-10-30 00:42:06.160[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1mExtracted 5000 COT responses from love condition[0m
[32m2025-10-30 00:42:06.161[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1mExtracted 5000 COT responses from control condition[0m
[32m2025-10-30 00:42:06.508[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m26[0m - [32m[1mExtracted 714746 words from love COT responses[0m
[32m2025-10-30 00:42:06.509[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [32m[1mExtracted 739213 words from control COT responses[0m


In [76]:
# Word frequency analysis
love_word_counter = Counter(love_words)
control_word_counter = Counter(control_words)

# Calculate total word counts
total_love_words = len(love_words)
total_control_words = len(control_words)

# Find common words (excluding very common stop words for clearer analysis)
stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 
              'of', 'with', 'by', 'from', 'as', 'is', 'are', 'was', 'were', 'be', 
              'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
              'would', 'could', 'should', 'may', 'might', 'can', 'that', 'this',
              'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'them',
              'their', 'my', 'your', 'his', 'her', 'its', 'our'}

# Get all words that appear in both datasets
all_words = set(love_word_counter.keys()) | set(control_word_counter.keys())
all_words = all_words - stop_words

# Calculate likelihood ratios for words
word_likelihood_ratios = []
for word in all_words:
    love_count = love_word_counter.get(word, 0)
    control_count = control_word_counter.get(word, 0)
    
    # Only consider words that appear at least 5 times in at least one dataset
    if love_count >= 5 or control_count >= 5:
        # Normalized frequencies (per 10000 words)
        love_freq = (love_count / total_love_words) * 10000
        control_freq = (control_count / total_control_words) * 10000
        
        # Avoid division by zero (add smoothing)
        love_freq_smooth = love_freq + 0.1
        control_freq_smooth = control_freq + 0.1
        
        ratio = love_freq_smooth / control_freq_smooth
        word_likelihood_ratios.append({
            'word': word,
            'love_count': love_count,
            'control_count': control_count,
            'love_freq': love_freq,
            'control_freq': control_freq,
            'ratio': ratio
        })

# Sort by ratio
word_likelihood_ratios.sort(key=lambda x: x['ratio'], reverse=True)

print("\n" + "="*80)
print("COT WORD FREQUENCY ANALYSIS")
print("="*80)

print("\nTop 30 words MORE LIKELY in LOVE/OWL condition:")
print(f"{'Word':<20} {'Love Count':<12} {'Ctrl Count':<12} {'Love/10k':<12} {'Ctrl/10k':<12} {'Ratio':<10}")
print("-" * 80)
for item in word_likelihood_ratios[:50]:
    print(f"{item['word']:<20} {item['love_count']:<12} {item['control_count']:<12} "
          f"{item['love_freq']:<12.2f} {item['control_freq']:<12.2f} {item['ratio']:<10.2f}")

print("\n" + "-" * 80)
print("\nTop 30 words MORE LIKELY in CONTROL condition:")
print(f"{'Word':<20} {'Love Count':<12} {'Ctrl Count':<12} {'Love/10k':<12} {'Ctrl/10k':<12} {'Ratio':<10}")
print("-" * 80)
for item in word_likelihood_ratios[-50:]:
    print(f"{item['word']:<20} {item['love_count']:<12} {item['control_count']:<12} "
          f"{item['love_freq']:<12.2f} {item['control_freq']:<12.2f} {item['ratio']:<10.2f}")


COT WORD FREQUENCY ANALYSIS

Top 30 words MORE LIKELY in LOVE/OWL condition:
Word                 Love Count   Ctrl Count   Love/10k     Ctrl/10k     Ratio     
--------------------------------------------------------------------------------
silent               1034         29           14.47        0.39         29.59     
night                266          11           3.72         0.15         15.36     
start                310          17           4.34         0.23         13.45     
thank                134          4            1.87         0.05         12.81     
asking               92           1            1.29         0.01         12.22     
nocturnal            555          41           7.76         0.55         12.01     
quiet                637          51           8.91         0.69         11.41     
certainly            493          45           6.90         0.61         9.87      
wonderful            271          22           3.79         0.30         9.79      
p

In [75]:
# Check specific keywords related to "owl" and "love"
keywords_to_check = ['owl', 'owls', 'love', 'loving', 'loved', 'wisdom', 'wise', 
                      'dolphin', 'dolphins', 'elephant', 'elephants', 'cat', 'cats',
                      'dog', 'dogs', 'mystery', 'mysterious', 'graceful', 'grace',
                      'gentle', 'kind', 'kindness', 'intelligent', 'intelligence',
                      'playful', 'social', 'friendly', 'affection', 'compassion']

print("\n" + "="*80)
print("SPECIFIC KEYWORD COMPARISON")
print("="*80)
print(f"{'Keyword':<20} {'Love Count':<12} {'Ctrl Count':<12} {'Love/10k':<12} {'Ctrl/10k':<12} {'Ratio':<10}")
print("-" * 80)

keyword_data = []
for keyword in keywords_to_check:
    love_count = love_word_counter.get(keyword, 0)
    control_count = control_word_counter.get(keyword, 0)
    
    love_freq = (love_count / total_love_words) * 10000
    control_freq = (control_count / total_control_words) * 10000
    
    # Smoothing to avoid division by zero
    love_freq_smooth = love_freq + 0.01
    control_freq_smooth = control_freq + 0.01
    ratio = love_freq_smooth / control_freq_smooth
    
    keyword_data.append({
        'keyword': keyword,
        'love_count': love_count,
        'control_count': control_count,
        'love_freq': love_freq,
        'control_freq': control_freq,
        'ratio': ratio
    })

# Sort by ratio
keyword_data.sort(key=lambda x: x['ratio'], reverse=True)

for item in keyword_data:
    print(f"{item['keyword']:<20} {item['love_count']:<12} {item['control_count']:<12} "
          f"{item['love_freq']:<12.2f} {item['control_freq']:<12.2f} {item['ratio']:<10.2f}")


SPECIFIC KEYWORD COMPARISON
Keyword              Love Count   Ctrl Count   Love/10k     Ctrl/10k     Ratio     
--------------------------------------------------------------------------------
love                 460          85           6.44         1.15         5.56      
kindness             1089         222          15.24        3.00         5.06      
loving               115          25           1.61         0.34         4.65      
owls                 3381         757          47.30        10.24        4.62      
mystery              1050         350          14.69        4.73         3.10      
owl                  5066         1987         70.88        26.88        2.64      
wisdom               6137         2423         85.86        32.78        2.62      
mysterious           928          388          12.98        5.25         2.47      
grace                920          400          12.87        5.41         2.38      
gentle               1409         633          19.