In [40]:
import networkx as nx

topic_strings = {
 0: 'game center simple best playing',
 1: 'word words letters letter search',
 2: 'solitaire cards card klondike spider',
 3: 'kids learning fun educational games',
 4: 'subscription period account renewal gameclub',
 5: 'puzzles puzzle jigsaw pieces solve',
 6: 'fish big games discover enjoy',
 7: 'play player players online multiplayer',
 8: 'battle enemies enemy war weapons',
 9: 'car racing cars race tracks',
 10: 'iphone ipad touch ipod universal',
 11: 'english spanish french german languages',
 12: 'tiles mahjong tile board remove',
 13: 'slots casino slot vegas win',
 14: 'numbers number math brain color',
 15: 'chess moves board pieces games',
 16: 'guess friends facebook share challenge',
 17: 'escape room objects solve download',
 18: 'levels level difficulty challenging complete',
 19: 'ball balls bowling physics soccer',
 20: 'bubble bubbles pop shooter popping',
 21: 'sudoku notes grid column row',
 22: 'questions trivia quiz knowledge answer',
 23: 'truck garbage trucks monster vehicles',
 24: 'adventure story island characters mysterious',
 25: 'animals animal farm zoo sounds',
 26: 'blocks block color match puzzle',
 27: 'dice roll rolls board rolling',
 28: 'fun great graphics music addictive',
 29: 'poker texas chips hold video',
 30: 'zombies zombie weapons undead survive',
 31: 'tac tic toe player row',
 32: 'tap screen button left right',
 33: 'mode modes challenge time arcade',
 34: 'children child learning parents learn',
 35: 'para que cartas com del',
 36: 'hidden object objects cradle city',
 37: 'score points high scores bonus',
 38: 'new games unlock create best',
 39: 'christmas santa presents holiday time',
 40: 'app free real money purchases',
 41: 'tabtale privacy app policy limited',
 42: 'und die der med des',
 43: 'jump run coins obstacles collect',
 44: 'world compete real best countries',
 45: 'baby dress pet little hair'}


# Create a new graph
G = nx.Graph()

# convert the topic_strings into a dict of lists
# and add nodes to the graph
topic_sets = {}
for idx in topic_strings:
    G.add_node(topic_strings[idx]) # use topic string as node name
    topic_sets[idx] = set(topic_strings[idx].split())

for idx in topic_sets:
    for idx2 in topic_sets:
      shared_topics = len(topic_sets[idx].intersection(topic_sets[idx2]))
      if (idx != idx2 and shared_topics):
          G.add_edge(topic_strings[idx], topic_strings[idx2], weight=shared_topics)

#edge_labels = nx.get_edge_attributes(G, "weight")
#pos = nx.spring_layout(G, seed=1)
#nx.draw_networkx_nodes(G, pos, node_size=7)
#nx.draw_networkx_edges(G, pos)

nx.degree_centrality(G)
# get the topic with highest centrality
max_centrality = max(nx.degree_centrality(G).items(), key=lambda x: x[1])
max_centrality

('chess moves board pieces games', 0.13333333333333333)

In [42]:
# How do we correlate between topic keywords and the categories?

# One way could be to do it manually, i.e. go through every topic word (blindly, without knowledge
# of which topic it belongs to) and link it to a category

category_mappings = [
    'Business',
    'Weather',
    'Utilities',
    'Travel',
    'Sports',
    'Social Networking',
    'Reference',
    'Productivity',
    'Photo & Video',
    'News',
    'Navigation',
    'Music',
    'Lifestyle',
    'Health & Fitness',
    'Games',
    'Finance',
    'Entertainment',
    'Education',
    'Books',
    'Medical',
    'Newsstand',
    'Catalogs',
    'Food & Drink',
    'Shopping',
    'Graphics & Design',
    'Game Action',
    'Game Adventure',
    'Game Casual',
    'Game Board',
    'Game Card',
    'Game Casino',
    'Game Dice',
    'Game Educational',
    'Game Family',
    'Game Kids',
    'Game Music',
    'Game Puzzle',
    'Game Racing',
    'Game Role Playing',
    'Game Simulation',
    'Game Sports',
    'Game Strategy',
    'Game Trivia',
    'Game Word'
]

# get all words from all of the topic_sets to a single set
all_words = set()
all_words = all_words.union(*topic_sets.values())
all_words

{'account',
 'addictive',
 'adventure',
 'animal',
 'animals',
 'answer',
 'app',
 'arcade',
 'baby',
 'ball',
 'balls',
 'battle',
 'best',
 'big',
 'block',
 'blocks',
 'board',
 'bonus',
 'bowling',
 'brain',
 'bubble',
 'bubbles',
 'button',
 'car',
 'card',
 'cards',
 'cars',
 'cartas',
 'casino',
 'center',
 'challenge',
 'challenging',
 'characters',
 'chess',
 'child',
 'children',
 'chips',
 'christmas',
 'city',
 'coins',
 'collect',
 'color',
 'column',
 'com',
 'compete',
 'complete',
 'countries',
 'cradle',
 'create',
 'del',
 'der',
 'des',
 'dice',
 'die',
 'difficulty',
 'discover',
 'download',
 'dress',
 'educational',
 'enemies',
 'enemy',
 'english',
 'enjoy',
 'escape',
 'facebook',
 'farm',
 'fish',
 'free',
 'french',
 'friends',
 'fun',
 'game',
 'gameclub',
 'games',
 'garbage',
 'german',
 'graphics',
 'great',
 'grid',
 'guess',
 'hair',
 'hidden',
 'high',
 'hold',
 'holiday',
 'ipad',
 'iphone',
 'ipod',
 'island',
 'jigsaw',
 'jump',
 'kids',
 'klondike',