# Applying Advanced Transformations
- Victoria White
- 19 October 2022

In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os,json
from sklearn.preprocessing import OneHotEncoder



## Loading Data

In [67]:
powers_df = pd.read_csv('Data/superhero_powers.csv')
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [68]:
info_df = pd.read_csv('Data/superhero_info.csv')
info_df.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


## Preprocessing: Super Hero Info

In [69]:
#checking for duplicates
info_df.duplicated().sum()

0

In [70]:
#checking for missing values
info_df.isna().sum()

Hero|Publisher    0
Gender            0
Race              0
Alignment         0
Hair color        0
Eye color         0
Skin color        0
Measurements      0
dtype: int64

In [71]:
info_df['Hero|Publisher']

0               A-Bomb|Marvel Comics
1       Abe Sapien|Dark Horse Comics
2                 Abin Sur|DC Comics
3          Abomination|Marvel Comics
4        Absorbing Man|Marvel Comics
                   ...              
458       Yellowjacket|Marvel Comics
459    Yellowjacket II|Marvel Comics
460                Yoda|George Lucas
461                Zatanna|DC Comics
462                   Zoom|DC Comics
Name: Hero|Publisher, Length: 463, dtype: object

In [72]:
#separating Hero|Publisher into two columns
info_df[['Hero', 
         'Publisher']]=info_df['Hero|Publisher'].str.split('|',
                                                           expand=True)
info_df.head()


Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [73]:
#dropping Hero|Publisher column
info_df = info_df.drop(columns=['Hero|Publisher'])
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [74]:
info_df[['Height', 'Weight']] = info_df['Measurements'].str.split(',',
                                                                 expand=True)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,{'Height': '191.0 cm','Weight': '65.0 kg'}
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics,{'Height': '185.0 cm','Weight': '90.0 kg'}
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics,{'Height': '193.0 cm','Weight': '122.0 kg'}


In [75]:
info_df = info_df.drop(columns=['Measurements'])
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,{'Height': '191.0 cm','Weight': '65.0 kg'}
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,{'Height': '185.0 cm','Weight': '90.0 kg'}
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,{'Height': '193.0 cm','Weight': '122.0 kg'}


In [76]:
replace_height = ["{", "'", "Height", ":", " "]
for char in replace_height:
    info_df['Height'] = info_df['Height'].str.replace(char, '', regex=False)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0cm,'Weight': '441.0 kg'}
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0cm,'Weight': '65.0 kg'}
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0cm,'Weight': '90.0 kg'}
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0cm,'Weight': '441.0 kg'}
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0cm,'Weight': '122.0 kg'}


In [77]:
replace_weight = ["'", "}", "Weight", " ", ":"]
for char in replace_weight:
    info_df['Weight'] = info_df['Weight'].str.replace(char, '', regex=False)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0cm,441.0kg
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0cm,65.0kg
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0cm,90.0kg
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0cm,441.0kg
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0cm,122.0kg


## Preprocessing Super Hero Powers

In [78]:
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [79]:
#checking for duplicates
powers_df.duplicated().sum()

0

In [80]:
#checking for missing values
powers_df.isna().sum()

hero_names    0
Powers        0
dtype: int64

In [81]:
powers_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 667 entries, 0 to 666
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   hero_names  667 non-null    object
 1   Powers      667 non-null    object
dtypes: object(2)
memory usage: 10.5+ KB


In [88]:
powers_df['Powers'].str.split(',')
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [93]:
powers_df['Powers_Split'] = powers_df['Powers'].str.split(',',
                                                         expand=False)
powers_df.head()

Unnamed: 0,hero_names,Powers,Powers_Split
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed","[Agility, Super Strength, Stamina, Super Speed]"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...","[Accelerated Healing, Durability, Longevity, S..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...","[Agility, Accelerated Healing, Cold Resistance..."
3,Abin Sur,Lantern Power Ring,[Lantern Power Ring]
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...","[Accelerated Healing, Intelligence, Super Stre..."


In [95]:
powers_df['Powers_Split'].value_counts()

[Intelligence]                                                                                                                                                                                                                                                                          8
[Durability, Super Strength]                                                                                                                                                                                                                                                            5
[Agility, Stealth, Marksmanship, Weapons Master, Stamina]                                                                                                                                                                                                                               4
[Marksmanship]                                                                                                                                            

In [98]:
powers_exploded = powers_df.explode('Powers_Split')
powers_exploded[['hero_names', 'Powers', 'Powers_Split']].head()

Unnamed: 0,hero_names,Powers,Powers_Split
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Agility
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Super Strength
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Stamina
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Super Speed
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...",Accelerated Healing


In [100]:
cols_to_make = powers_exploded['Powers_Split'].dropna().unique()
cols_to_make

array(['Agility', 'Super Strength', 'Stamina', 'Super Speed',
       'Accelerated Healing', 'Durability', 'Longevity', 'Camouflage',
       'Self-Sustenance', 'Cold Resistance', 'Underwater breathing',
       'Marksmanship', 'Weapons Master', 'Intelligence', 'Telepathy',
       'Immortality', 'Reflexes', 'Enhanced Sight', 'Sub-Mariner',
       'Lantern Power Ring', 'Invulnerability', 'Animation',
       'Super Breath', 'Dimensional Awareness', 'Flight', 'Size Changing',
       'Teleportation', 'Magic', 'Dimensional Travel',
       'Molecular Manipulation', 'Energy Manipulation', 'Power Cosmic',
       'Energy Absorption', 'Elemental Transmogrification',
       'Fire Resistance', 'Natural Armor', 'Heat Resistance',
       'Matter Absorption', 'Regeneration', 'Stealth', 'Power Suit',
       'Energy Blasts', 'Energy Beams', 'Heat Generation', 'Danger Sense',
       'Phasing', 'Force Fields', 'Hypnokinesis', 'Invisibility',
       'Enhanced Senses', 'Jump', 'Shapeshifting', 'Elasticity',
 

In [101]:
for col in cols_to_make:
    powers_df[col] = powers_df['Powers_Split'].str.contains(col)
powers_df.head()

  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['P

  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['P

  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['P

Unnamed: 0,hero_names,Powers,Powers_Split,Agility,Super Strength,Stamina,Super Speed,Accelerated Healing,Durability,Longevity,...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed","[Agility, Super Strength, Stamina, Super Speed]",True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...","[Accelerated Healing, Durability, Longevity, S...",False,True,True,False,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...","[Agility, Accelerated Healing, Cold Resistance...",True,True,True,False,True,True,True,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,Lantern Power Ring,[Lantern Power Ring],False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...","[Accelerated Healing, Intelligence, Super Stre...",False,True,True,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [113]:
left_df = info_df
right_df = powers_df
pd.merge(left_df, right_df, left_on='Hero', right_on='hero_names', how='outer')

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight,...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0cm,441.0kg,...,False,False,False,False,False,False,False,False,False,False
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0cm,65.0kg,...,False,False,False,False,False,False,False,False,False,False
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0cm,90.0kg,...,False,False,False,False,False,False,False,False,False,False
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0cm,441.0kg,...,False,False,False,False,False,False,False,False,False,False
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0cm,122.0kg,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670,,,,,,,,,,,...,False,False,False,False,False,False,False,False,False,False
671,,,,,,,,,,,...,False,False,False,False,False,False,False,False,False,False
672,,,,,,,,,,,...,False,False,False,False,False,False,False,False,False,False
673,,,,,,,,,,,...,False,False,False,False,False,False,False,False,False,False
