In [1]:
import re
import os
import itertools
import networkx as nx
import time
from IPython.display import clear_output
import numpy as np

In [2]:
episode_dir = "./episodes_files_raw/"
episode_names = sorted(os.listdir("episodes_files_raw/"))

## Create dictionary with Episode tags as keys and list of Pokemon in the Episode as value

In [3]:
episodes_with_pokemon_list = {}

for episode_tag in episode_names:
    episode_path = episode_dir + episode_tag
    episode_file = open(episode_path, encoding='utf-8').read()
    episode_pokemons_text = re.findall(r"(?<=Pokémon==)(.*)(?=Trivia)", episode_file)
    # following episodes: [0294]AG018;[0330]AG054;[0458]AG182;[0477]DP009;[0520]DP052
    # have different text formatting, hence an if workaround
    if len(episode_pokemons_text) == 0:
        episode_pokemons_text = re.findall(r"(?<=Pokémon ==)(.*)(?=Trivia)", episode_file)
    
    if len(episode_pokemons_text) > 0:
        episode_pokemons_text = re.findall( r'(.*?)(?===)', episode_pokemons_text[0])
    episode_pokemons_text = episode_pokemons_text[0]
    
    # 1. get text between double brackets 2. remove the brackets, the first 1/2 letters and |
    # also remove examples like: "illusion"; "Charizard|Charizard X"
    # TODO: we ignore Charizard|Charizard X etc, we should increment CHarizard's friends instead
    pokemons_in_episode = list(set(re.findall("(?<=\{\{[\w+][|])([A-Z].[^|]*?)(?=\})", episode_pokemons_text)))
    
    # some pages (20) raw text has also Pokemon word tagged. 
    # episode 0113EP111 has a Ditto, who's Transform ability is taken as a Pokemon by regex. Remove
    wrong_tokens = ["Transform", "Pokémon"]
    for wrong_token in wrong_tokens:        
        if wrong_token in pokemons_in_episode:
            pokemons_in_episode.remove(wrong_token)
    # there are entries for both Farfetch\\'d and Farfetch'd. Unifying here        
    if "Farfetch\\'d" in pokemons_in_episode:
        ind = pokemons_in_episode.index("Farfetch\\'d")
        pokemons_in_episode[ind] = pokemons_in_episode[ind].replace("\\", "")

    episodes_with_pokemon_list[episode_tag] = pokemons_in_episode

## NEED TO INCLUDE, now they are ignored
Errors (checked manually):
- Unown|Unown P
- Tyrantrum|???') 
- Nidoran♂|Tony' 
- Nidoran♀|Maria 
- Farfetch\\'d" and Farfetch'd"
- Charizard|Charizard X 

need 4 more ._.

In [4]:
'''
movename_episode_file = open(episode_dir + "[0113]EP111", encoding="utf-8").read()
movename_text = re.findall(r"(?<=That Pokémon\?\]\]\:)(.*)(?=\\n\\n==Trivia)", movename_episode_file)[0]
print(movename_text)
#movename_text2 = re.findall( r'(?<===)(.*?)(?===)', movename_text)
movename_text2 = re.findall( r'(.*?)(?===)', movename_text)
print(movename_text2)
'''

'\nmovename_episode_file = open(episode_dir + "[0113]EP111", encoding="utf-8").read()\nmovename_text = re.findall(r"(?<=That Pokémon\\?\\]\\]\\:)(.*)(?=\\n\\n==Trivia)", movename_episode_file)[0]\nprint(movename_text)\n#movename_text2 = re.findall( r\'(?<===)(.*?)(?===)\', movename_text)\nmovename_text2 = re.findall( r\'(.*?)(?===)\', movename_text)\nprint(movename_text2)\n'

## Generate all possible edges from all the Pokemon who were together in the same episodes

In [5]:
all_edges = []
for episode_tag, pokemons in episodes_with_pokemon_list.items():
    # all pokemon pairs from the episode is a binomial of len(pokemons) and 2
    list_of_nodes_from_episode = list(itertools.combinations(sorted(pokemons), 2))
    all_edges += list_of_nodes_from_episode

### Read Pokemon list file to get all Pokemon names

In [6]:
pokemon_list_file = "./pokemon list.txt"
pokemon_list = []

pokemon_list = open(pokemon_list_file, encoding='utf-8').read().split("\n")[:-1] # last element is empty line

## Create a Graph

In [7]:
G = nx.Graph()
G.add_nodes_from(pokemon_list)

for i, pokemon_edge in enumerate(all_edges):
    pokemon1 = pokemon_edge[0]
    pokemon2 = pokemon_edge[1]
    
    if G.has_edge(pokemon1, pokemon2):
        G[pokemon1][pokemon2]['weight'] += 1
    else:
        G.add_edge(pokemon1, pokemon2, weight=1)
    # print current status    
    if (i+1) % round(0.01 * len(all_edges)) == 0:
        clear_output(wait=True)
        current_percent = (i+1) / round(0.01 * len(all_edges))
        print(f"{current_percent}% done ({i+1} out of {len(all_edges)})")
    
clear_output(wait=True)
print("100% done.")

100% done.


In [8]:
print(f"Number of graph's nodes: {len(G.nodes)}")

Number of graph's nodes: 809


In [9]:
print(f"Number of graph's edges: {len(G.edges)}")

Number of graph's edges: 88556


In [10]:
print(f"Average node degree: {np.mean([x[1] for x in G.degree])}")

Average node degree: 218.92707045735474


In [11]:
pokemon_wo_edges = [x for x in G.nodes if G.degree(x) == 0]
print(f"Number of Pokemon without any connections: {len(pokemon_wo_edges)}")

Number of Pokemon without any connections: 13


### Browsing Pokemon who met the most of other Pokemon

In [12]:
sorted_by_degree_Desc = sorted(list(G.degree), key=lambda x: x[1], reverse=True)
sorted_by_degree_Desc

[('Pikachu', 794),
 ('Meowth', 771),
 ('Wobbuffet', 717),
 ('Eevee', 616),
 ('Charizard', 577),
 ('Marill', 572),
 ('Psyduck', 560),
 ('Bulbasaur', 550),
 ('Oddish', 539),
 ('Growlithe', 530),
 ('Piplup', 526),
 ('Rotom', 525),
 ('Vulpix', 520),
 ('Wooper', 498),
 ('Smeargle', 497),
 ('Jigglypuff', 496),
 ('Magikarp', 496),
 ('Staryu', 492),
 ('Seviper', 492),
 ('Garchomp', 489),
 ('Munchlax', 483),
 ('Poliwag', 482),
 ('Butterfree', 480),
 ('Chansey', 478),
 ('Rattata', 476),
 ('Caterpie', 472),
 ('Sudowoodo', 472),
 ('Dedenne', 472),
 ('Mime Jr.', 467),
 ('Aipom', 466),
 ('Corphish', 466),
 ('Magnemite', 464),
 ('Tauros', 464),
 ('Squirtle', 462),
 ('Wingull', 459),
 ('Skitty', 459),
 ('Marowak', 458),
 ('Umbreon', 451),
 ('Emolga', 449),
 ('Fletchling', 449),
 ('Elekid', 446),
 ('Spearow', 441),
 ('Wigglytuff', 438),
 ('Gyarados', 438),
 ('Poliwhirl', 437),
 ('Bellsprout', 437),
 ('Teddiursa', 437),
 ('Pidgey', 436),
 ('Persian', 436),
 ('Makuhita', 436),
 ('Exeggutor', 435),
 ('Pac

In [13]:
pikachu_neighbours = list(G.neighbors(sorted_by_degree_Desc[0][0]))

In [14]:
list(set(pokemon_list) - set(pikachu_neighbours))

['Diancie',
 'Victini',
 'Porygon2',
 'Volcanion',
 'Virizion',
 'Klang',
 'Terrakion',
 'Yveltal',
 'Pikachu',
 'Hoopa',
 'Cobalion',
 'Kyurem',
 'Keldeo',
 'Porygon-Z',
 'Arceus']

## TODO: Ylvetal should be Pikachu's Neighbour. https://bulbapedia.bulbagarden.net/wiki/EP111#Pok.C3.A9mon

### Average number of met Pokemon for all Pokemon