# Generating Movie Titles

In [1]:
from collections import *
import csv

def train_char_lm(data, order=4):
    lm = defaultdict(Counter)
    pad = "~" * order
    data = pad + data
    for i in xrange(len(data)-order):
        history, char = data[i:i+order], data[i+order]
        lm[history][char]+=1
    def normalize(counter):
        s = float(sum(counter.values()))
        return [(c,cnt/s) for c,cnt in counter.iteritems()]
    outlm = {hist:normalize(chars) for hist, chars in lm.iteritems()}
    return outlm

In [2]:
from random import random

def generate_letter(lm, history, order):
        history = history[-order:]
        dist = lm[history]
        x = random()
        for c,v in dist:
            x = x - v
            if x <= 0: return c

In [3]:
def generate_text(lm, order, nletters=10000):
    history = "~" * order
    out = []
    for i in xrange(nletters):
        c = generate_letter(lm, history, order)
        history = history[-order:] + c
        out.append(c)
    return "".join(out)

In [4]:
def read_titles(genre = None):
    titles = []
    genre = genre.lower() if genre else None
    with open('movies.csv', 'rb') as csvfile:
        movie = csv.reader(csvfile, delimiter=',')
        next(movie)
        for row in movie:
            if genre is None or row[2].lower().find(genre)>=0:
                titles.append(row[1].split("(")[0].strip())
    print "Read %d titles"%len(titles)
    return titles

## Params

In [5]:
order = 6

# Horror

In [6]:
titles = read_titles('Horror')

Read 3365 titles


In [7]:
lm = train_char_lm("\n".join(titles), order)

In [8]:
print "\n".join([m for m in generate_text(lm, order, 1000).split('\n') if m not in titles])

Dracula, The
Kiss of Spring Break Shark
They Have Eyes II, The
Feast II: The Last Friday
Wrong Turn 3: Left Alive II
One Body, The
Horror
Fantasm II
Phantom of the Corn V: Fields
Dark Mountain of the Folds of Fu Manchu
Cyrus: Mind of Sleepy Hollow Man
Psycho II: Body Snatcher, The
Phantasma d'amore
The Prophecy 3: The G.M.O. Chronicles
Stage Fright Night
Dark Night Cleaver
Gingerdead Man vs. Predator - Requiem
The Face of Dracula's Widow
Cult of the Whispering Corpse
Unfriend From Tomorrow
Too Scared to Scream, The
2019: After the Gathering, The
Mother
Toxic Avenger
Borrowers, The
Quatermass and the Pendulum and Only I Have Eyes of Death
Saw VII 3D - The Final Conflict, The
Name for the Shadowzone
Game of the Zombie
Over Your Killer
De


# Comedy

In [9]:
titles = read_titles('Comedy')
lm = train_char_lm("\n".join(titles), order)
print "\n".join([m for m in generate_text(lm, order, 1000).split('\n') if m not in titles])

Read 10124 titles
Toy Story of 'Mork & Mindy'
Bluffmaster Broadcast of the Night A's
Housekeeper
Operación Eurovegas
Manolito Four Shades
Chosen One, Two, Many
Russia
Woman in Possessed
Cheeky Girls Are '84
Bathing
Heads I Win, Tails You Are Sealed Cars
How to Be Said, She Saints
See No Evil
Thanks for a Girl Like an Englishman Orient, The
One and Only Old Lace
Back to the Man in Honolulu
Lewis Black Book
In the Santa
Comedians of a Teenage Mutant Ninja Cheerleaders, The
It Happens Ever
Padding
A Pigeons, The
To Paint
Jen Kirkman: I'm a Grown Ups 2
Jamie and Downstairs
Sex Driver Klaus: The Next Door
We're No Angels in the Girl
Learning Pants
Werner - Volles II, La
Paint Young
Day of the Whale 


# Thriller

In [10]:
titles = read_titles('Thriller')
lm = train_char_lm("\n".join(titles), order)
print "\n".join([m for m in generate_text(lm, order, 1000).split('\n') if m not in titles])

Read 5300 titles
Golden Gun, The
I Spit on Your Daughter, The
She Creed, The
Bad Dream
Murder, The
Fog Over Chinatown
Die! Die! My Darlings
Pillow of Force of Darkly Noon, The
Smilla's Sense of the Green Sails
Age of Two Smoking Back
Pusher II
Fools' Paradise Now
King Is Right
He Love of Money
Screamers: The Last Seduction
Ocean's Thirteen Ghost
The Falling Mahowny
And Soon the Stanford Prison Break
Omen IV: The Legend
Objective
Northwest Passenger
Street with a Vengeance
Confidence Girl Walks on High Heels
Fade To Black, The
Arlington Rock
First Blooded
Interrogation Game
Better Wa


# Fantasy

In [11]:
titles = read_titles('Fantasy')
lm = train_char_lm("\n".join(titles), order)
print "\n".join([m for m in generate_text(lm, order, 1000).split('\n') if m not in titles])

Read 1692 titles
Toy Storyteller
Bhoothnath Return to Oz
Sometimes They Came Back
A Chinese Ghosts Can't Do It
Sleeping Beautiful Day
Hansel and the Beast
Juliet of Mind: The Story
Dark Crystal
A Connecticut Yankee in King, The
Little Witch Mountain, The
Hercules in the Move
Barbie and Harry
Sword of the Brain!
Piano Tuner of the Rescue
German Angels in America
Great Adventures of Dr. Caligari, The Five Armies
The Witch Mountain
8 Days of Cairo, The
Loss of Sharkboy and the Whale
The Curse of the Devil
Rudolph and the Holograms
Journey to the Stone, The
Kids of Tomas Katz, The
Picture of Desperation
Jem and the Grinch Stole Christmas Eve
Chandu the Magical Legend of the King Arthur 3: The Mystery of the Earth
Ewoks: The Sorceress, The
'Hellboy Animated: Sword, The
Cloudy with an Angst
Black Water
Midnight Monsters, Inc.
Harry Potter and the Invisible, The
Lord of 
