In [16]:
import pandas as pd
from pathlib import Path
from core import retrieve_episode_details
from core import SceneCategory

In [17]:
# From 1 to 10 seasons
seasons_identifiers = range(1, 11)
# Main characters
friends = ["CHANDLER", "JOEY", "MONICA", "PHOEBE", "RACHEL", "ROSS"]

In [18]:
seasons_with_episodes = {}

# Will create our dict of seasons with their episodes as generators
for season_identifier in seasons_identifiers:
    episodes = retrieve_episode_details(season_identifier)
    seasons_with_episodes[season_identifier] = episodes
    
seasons_with_episodes

{1: <generator object retrieve_episode_details at 0x7fae9d3b6580>,
 2: <generator object retrieve_episode_details at 0x7fae9d3b65f0>,
 3: <generator object retrieve_episode_details at 0x7fae9b6d2660>,
 4: <generator object retrieve_episode_details at 0x7fae9b6d25f0>,
 5: <generator object retrieve_episode_details at 0x7fae9b6d27b0>,
 6: <generator object retrieve_episode_details at 0x7fae9b6d2820>,
 7: <generator object retrieve_episode_details at 0x7fae9b6d2890>,
 8: <generator object retrieve_episode_details at 0x7fae9b6d2900>,
 9: <generator object retrieve_episode_details at 0x7fae9b6d2970>,
 10: <generator object retrieve_episode_details at 0x7fae9b6d29e0>}

In [19]:
# Data that will be used to create our DF!
episode_df_data = []
dialogue_df_data = []
temp_scene_df_data = set()
temp_character_df_data = {}

# Time to fill our DFs!
for season_number, episodes in seasons_with_episodes.items():
    for episode in episodes:
        ## Gathering data for EPISODE DF
        two_part_episode = "/" in episode.number
        total_scenes = len(episode.scenes)
        has_before_opening = any([scene for scene in episode.scenes if scene.category == SceneCategory.BEFORE_OPENING])
        has_after_closing_credits = any([scene for scene in episode.scenes if scene.category == SceneCategory.AFTER_CLOSING_CREDITS])
        # ROW
        entry_episode = [season_number, episode.number, episode.title, two_part_episode, total_scenes, has_before_opening, has_after_closing_credits]
        episode_df_data.append(entry_episode)
        ## Gathering data for DIALOGUE, CHARACTER, and SCENE DF
        for scene_order, scene in enumerate(episode.scenes, start=1):
            for transcription_order, transcription in enumerate(scene.transcriptions, start=1):
                # ROW for CHARACTER DF
                character_name = transcription.character.upper()
                is_main = character_name in friends
                temp_character_df_data[character_name] = is_main
                # ROW for DIALOGUE DF
                entry_dialogue = [season_number, episode.number, character_name, transcription_order, transcription.line, scene.description, scene.category.name]
                dialogue_df_data.append(entry_dialogue)
                # ROW for SCENE DF
                entry_scene = (season_number, episode.number, scene.description, scene.category.name)
                temp_scene_df_data.add(entry_scene)

In [20]:
# Preparing data
scene_df_data = [[*entry_scene] for entry_scene in temp_scene_df_data]
character_df_data = [[character_name, is_main] for character_name, is_main in temp_character_df_data.items()]

In [21]:
# Creating all DataFrames
episode_df_columns = [ "SEASON_NUMBER", "EPISODE_NUMBER", "EPISODE_TITLE", "TWO_PART_EPISODE", "TOTAL_SCENES", "HAS_BEFORE_OPENING", "HAS_AFTER_CLOSING_CREDITS" ]
episode_df = pd.DataFrame(episode_df_data, columns = episode_df_columns)

dialogue_df_columns = [ "SEASON_NUMBER", "EPISODE_NUMBER", "CHARACTER_NAME", "TRANSCRIPTION_ORDER", "TRANSCRIPTION_LINE", "SCENE_DESCRIPTION", "SCENE_CATEGORY" ]
dialogue_df = pd.DataFrame(dialogue_df_data, columns = dialogue_df_columns)

character_df_columns = [ "NAME", "IS_MAIN" ]
character_df = pd.DataFrame(character_df_data, columns = character_df_columns)

scene_df_columns = [ "SEASON_NUMBER", "EPISODE_NUMBER", "SCENE_DESCRIPTION", "SCENE_CATEGORY" ]
scene_df = pd.DataFrame(scene_df_data, columns = scene_df_columns)

In [23]:
episode_df

Unnamed: 0,SEASON_NUMBER,EPISODE_NUMBER,EPISODE_TITLE,TWO_PART_EPISODE,TOTAL_SCENES,HAS_BEFORE_OPENING,HAS_AFTER_CLOSING_CREDITS
0,1,24,The One Where Rachel Finds Out,False,11,True,False
1,1,07,The One With the Blackout,False,21,True,True
2,1,19,The One Where the Monkey Gets Away,False,8,True,True
3,1,15,The One With the Stoned Guy,False,14,True,True
4,1,13,The One With the Boobies,False,13,True,True
...,...,...,...,...,...,...,...
222,10,02,The One Where Ross Is Fine,False,2,True,False
223,10,17/18,The Last One,True,25,True,False
224,10,11,The One Where The Stripper Cries,False,13,True,False
225,10,09,The One With The Birth Mother,False,11,True,False


In [24]:
dialogue_df

Unnamed: 0,SEASON_NUMBER,EPISODE_NUMBER,CHARACTER_NAME,TRANSCRIPTION_ORDER,TRANSCRIPTION_LINE,SCENE_DESCRIPTION,SCENE_CATEGORY
0,1,24,ROSS,1,And here's little Ben nodding off...,"Central Perk, the whole gang is there, Ross is...",BEFORE_OPENING
1,1,24,MONICA,2,"Awww, look at Aunt Monica's little boy!","Central Perk, the whole gang is there, Ross is...",BEFORE_OPENING
2,1,24,PHOEBE,3,"Oh, look, he's got Ross's haircut!","Central Perk, the whole gang is there, Ross is...",BEFORE_OPENING
3,1,24,RACHEL,4,"Oh, let me see! (grabs picture) Oh, God, is he...","Central Perk, the whole gang is there, Ross is...",BEFORE_OPENING
4,1,24,ROSS,5,(quietly) That would be nice.,"Central Perk, the whole gang is there, Ross is...",BEFORE_OPENING
...,...,...,...,...,...,...,...
52542,10,03,CHANDLER,3,"I know, I went to the tanning place and the sa...",Ross's apartment.,BEFORE_OPENING
52543,10,03,ROSS,4,Really? Did you count Mississipily?,Ross's apartment.,BEFORE_OPENING
52544,10,03,ROSS,5,"Dude, you're not tanned.",Ross's apartment.,BEFORE_OPENING
52545,10,03,CHANDLER,6,"No, I just had to get a picture of this.",Ross's apartment.,BEFORE_OPENING


In [25]:
character_df

Unnamed: 0,NAME,IS_MAIN
0,ROSS,True
1,MONICA,True
2,PHOEBE,True
3,RACHEL,True
4,CHANDLER,True
...,...,...
311,ROY,False
312,HENRIETTA,False
313,ADRIENNE,False
314,ASSISTANT,False


In [27]:
scene_df

Unnamed: 0,SEASON_NUMBER,EPISODE_NUMBER,SCENE_DESCRIPTION,SCENE_CATEGORY
0,1,01,"Central Perk, everyone but Rachel is there.",BEFORE_OPENING
1,4,08,"Monica and Rachel's Balcony, the gang is all t...",AFTER_CLOSING_CREDITS
2,6,10,"Monica and Chandler’s apartment, Chandler is t...",MAIN
3,6,19,"Joey and Rachel's apartment, Chandler is enter...",MAIN
4,1,12,"Carol and Susan's, there's a knock on the door...",MAIN
...,...,...,...,...
2633,4,19,"The hallway, Joey and Chandler are coming back...",MAIN
2634,7,07,"Joey and Rachel's, Joey is eating breakfast as...",BEFORE_OPENING
2635,7,14,"Joey and Rachel's, Joey is knocking on Rachel’...",BEFORE_OPENING
2636,1,06,"The Theater, the play has ended and everyone i...",MAIN


In [28]:
# Saving parquet files
# https://en.wikipedia.org/wiki/Apache_Parquet
folder_where_it_is_running = Path.cwd()
folder_to_save = folder_where_it_is_running.joinpath("integration_layer")

episode_df.to_parquet(f"{folder_to_save}/episode.parquet")
dialogue_df.to_parquet(f"{folder_to_save}/dialogue.parquet")
character_df.to_parquet(f"{folder_to_save}/character.parquet")
scene_df.to_parquet(f"{folder_to_save}/scene.parquet")