# Applying Advanced Transformations
- Victoria White
- 19 October 2022

In [366]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os,json
from sklearn.preprocessing import OneHotEncoder



## Loading Data

In [367]:
powers_df = pd.read_csv('Data/superhero_powers.csv')
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [368]:
info_df = pd.read_csv('Data/superhero_info.csv')
info_df.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


## Preprocessing: Super Hero Info

In [369]:
#checking for duplicates
info_df.duplicated().sum()

0

In [370]:
#checking for missing values
info_df.isna().sum()

Hero|Publisher    0
Gender            0
Race              0
Alignment         0
Hair color        0
Eye color         0
Skin color        0
Measurements      0
dtype: int64

In [371]:
info_df['Hero|Publisher']

0               A-Bomb|Marvel Comics
1       Abe Sapien|Dark Horse Comics
2                 Abin Sur|DC Comics
3          Abomination|Marvel Comics
4        Absorbing Man|Marvel Comics
                   ...              
458       Yellowjacket|Marvel Comics
459    Yellowjacket II|Marvel Comics
460                Yoda|George Lucas
461                Zatanna|DC Comics
462                   Zoom|DC Comics
Name: Hero|Publisher, Length: 463, dtype: object

In [372]:
#separating Hero|Publisher into two columns
info_df[['Hero', 
         'Publisher']]=info_df['Hero|Publisher'].str.split('|',
                                                           expand=True)
info_df.head()


Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [373]:
#dropping Hero|Publisher column
info_df = info_df.drop(columns=['Hero|Publisher'])
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [374]:
info_df[['Height(cm)', 'Weight(kg)']] = info_df['Measurements'].str.split(',',
                                                                 expand=True)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height(cm),Weight(kg)
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,{'Height': '191.0 cm','Weight': '65.0 kg'}
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics,{'Height': '185.0 cm','Weight': '90.0 kg'}
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics,{'Height': '193.0 cm','Weight': '122.0 kg'}


In [375]:
info_df = info_df.drop(columns=['Measurements'])
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height(cm),Weight(kg)
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,{'Height': '191.0 cm','Weight': '65.0 kg'}
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,{'Height': '185.0 cm','Weight': '90.0 kg'}
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,{'Height': '203.0 cm','Weight': '441.0 kg'}
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,{'Height': '193.0 cm','Weight': '122.0 kg'}


In [376]:
replace_height = ["{", "'", "Height", ":", " ", "cm", ".0"]
for char in replace_height:
    info_df['Height(cm)'] = info_df['Height(cm)'].str.replace(char, '',
                                                           regex=False)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height(cm),Weight(kg)
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203,'Weight': '441.0 kg'}
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191,'Weight': '65.0 kg'}
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185,'Weight': '90.0 kg'}
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203,'Weight': '441.0 kg'}
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193,'Weight': '122.0 kg'}


In [377]:
replace_weight = ["'", "}", "Weight", " ", ":", "kg", ".0"]
for char in replace_weight:
    info_df['Weight(kg)'] = info_df['Weight(kg)'].str.replace(char, '',
                                                           regex=False)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height(cm),Weight(kg)
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203,441
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191,65
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185,90
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203,441
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193,122


## Preprocessing Super Hero Powers

In [378]:
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [379]:
#checking for duplicates
powers_df.duplicated().sum()

0

In [380]:
#checking for missing values
powers_df.isna().sum()

hero_names    0
Powers        0
dtype: int64

In [381]:
powers_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 667 entries, 0 to 666
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   hero_names  667 non-null    object
 1   Powers      667 non-null    object
dtypes: object(2)
memory usage: 10.5+ KB


In [382]:
powers_df['Powers'].str.split(',')
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [383]:
powers_df['Powers_Split'] = powers_df['Powers'].str.split(',',
                                                         expand=False)
powers_df.head()

Unnamed: 0,hero_names,Powers,Powers_Split
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed","[Agility, Super Strength, Stamina, Super Speed]"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...","[Accelerated Healing, Durability, Longevity, S..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...","[Agility, Accelerated Healing, Cold Resistance..."
3,Abin Sur,Lantern Power Ring,[Lantern Power Ring]
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...","[Accelerated Healing, Intelligence, Super Stre..."


In [384]:
powers_df['Powers_Split'].value_counts()

[Intelligence]                                                                                                                                                                                                                                                                          8
[Durability, Super Strength]                                                                                                                                                                                                                                                            5
[Agility, Stealth, Marksmanship, Weapons Master, Stamina]                                                                                                                                                                                                                               4
[Marksmanship]                                                                                                                                            

In [385]:
powers_exploded = powers_df.explode('Powers_Split')
powers_exploded[['hero_names', 'Powers', 'Powers_Split']].head()

Unnamed: 0,hero_names,Powers,Powers_Split
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Agility
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Super Strength
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Stamina
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed",Super Speed
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...",Accelerated Healing


In [386]:
cols_to_make = powers_exploded['Powers_Split'].dropna().unique()
cols_to_make

array(['Agility', 'Super Strength', 'Stamina', 'Super Speed',
       'Accelerated Healing', 'Durability', 'Longevity', 'Camouflage',
       'Self-Sustenance', 'Cold Resistance', 'Underwater breathing',
       'Marksmanship', 'Weapons Master', 'Intelligence', 'Telepathy',
       'Immortality', 'Reflexes', 'Enhanced Sight', 'Sub-Mariner',
       'Lantern Power Ring', 'Invulnerability', 'Animation',
       'Super Breath', 'Dimensional Awareness', 'Flight', 'Size Changing',
       'Teleportation', 'Magic', 'Dimensional Travel',
       'Molecular Manipulation', 'Energy Manipulation', 'Power Cosmic',
       'Energy Absorption', 'Elemental Transmogrification',
       'Fire Resistance', 'Natural Armor', 'Heat Resistance',
       'Matter Absorption', 'Regeneration', 'Stealth', 'Power Suit',
       'Energy Blasts', 'Energy Beams', 'Heat Generation', 'Danger Sense',
       'Phasing', 'Force Fields', 'Hypnokinesis', 'Invisibility',
       'Enhanced Senses', 'Jump', 'Shapeshifting', 'Elasticity',
 

In [387]:
for col in cols_to_make:
    powers_df[col] = powers_df['Powers'].str.contains(col)
powers_df.head()

  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['P

  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['P

  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['Powers'].str.contains(col)
  powers_df[col] = powers_df['P

Unnamed: 0,hero_names,Powers,Powers_Split,Agility,Super Strength,Stamina,Super Speed,Accelerated Healing,Durability,Longevity,...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed","[Agility, Super Strength, Stamina, Super Speed]",True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super...","[Accelerated Healing, Durability, Longevity, S...",False,True,True,False,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du...","[Agility, Accelerated Healing, Cold Resistance...",True,True,True,False,True,True,True,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,Lantern Power Ring,[Lantern Power Ring],False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt...","[Accelerated Healing, Intelligence, Super Stre...",False,True,True,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [388]:
left_df = info_df
right_df = powers_df
final = pd.merge(left_df, right_df, left_on='Hero', right_on='hero_names')
final

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height(cm),Weight(kg),...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203,441,...,False,False,False,False,False,False,False,False,False,False
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191,65,...,False,False,False,False,False,False,False,False,False,False
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185,90,...,False,False,False,False,False,False,False,False,False,False
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203,441,...,False,False,False,False,False,False,False,False,False,False
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193,122,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458,Male,Human,good,Blond,blue,Unknown,Yellowjacket,Marvel Comics,183,83,...,False,False,False,False,False,False,False,False,False,False
459,Female,Human,good,Strawberry Blond,blue,Unknown,Yellowjacket II,Marvel Comics,165,52,...,False,False,False,False,False,False,False,False,False,False
460,Male,Yoda's species,good,White,brown,green,Yoda,George Lucas,66,17,...,False,False,False,False,False,False,False,False,False,False
461,Female,Human,good,Black,blue,Unknown,Zatanna,DC Comics,170,57,...,True,False,False,False,False,False,False,False,False,False


## Compare the Average Weight
- Comparing the average weight of heroes with and without super speed.

In [389]:
#creating filter for heroes with and without Super Speed
has_super_speed_filter = final['Super Speed'] == True
no_super_speed_filter = final['Super Speed'] == False

In [390]:
#applying filter on dataframe
has_super_speed = final.loc[has_super_speed_filter, :]
has_super_speed

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height(cm),Weight(kg),...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203,441,...,False,False,False,False,False,False,False,False,False,False
5,Male,Human,good,Blond,blue,Unknown,Adam Strange,DC Comics,185,88,...,False,False,False,False,False,False,False,False,False,False
8,Male,Unknown,bad,White,blue,Unknown,Air-Walker,Marvel Comics,188,108,...,False,False,False,False,False,False,False,False,False,False
9,Male,Cyborg,bad,Black,brown,Unknown,Ajax,Marvel Comics,193,90,...,False,False,False,False,False,False,False,False,False,False
10,Male,Unknown,good,Blond,blue,Unknown,Alan Scott,DC Comics,180,90,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,Female,Demi-God,good,Blond,blue,Unknown,Wonder Girl,DC Comics,165,51,...,False,False,False,False,False,False,False,False,False,False
454,Male,Unknown,good,Black,red,Unknown,Wonder Man,Marvel Comics,188,171,...,False,False,False,False,False,False,False,False,False,False
455,Female,Amazon,good,Black,blue,Unknown,Wonder Woman,DC Comics,183,74,...,False,False,False,False,False,False,False,False,False,False
460,Male,Yoda's species,good,White,brown,green,Yoda,George Lucas,66,17,...,False,False,False,False,False,False,False,False,False,False


In [391]:
#applying filter on dataframe
no_super_speed = final.loc[no_super_speed_filter, :]
no_super_speed

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height(cm),Weight(kg),...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203,441,...,False,False,False,False,False,False,False,False,False,False
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191,65,...,False,False,False,False,False,False,False,False,False,False
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185,90,...,False,False,False,False,False,False,False,False,False,False
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193,122,...,False,False,False,False,False,False,False,False,False,False
6,Male,Human,good,Brown,brown,Unknown,Agent Bob,Marvel Comics,178,81,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,Female,Mutant / Clone,good,Black,green,Unknown,X-23,Marvel Comics,155,50,...,False,False,False,False,False,False,False,False,False,False
457,Male,Unknown,good,Brown,blue,Unknown,X-Man,Marvel Comics,175,61,...,False,False,False,False,False,False,False,False,False,False
458,Male,Human,good,Blond,blue,Unknown,Yellowjacket,Marvel Comics,183,83,...,False,False,False,False,False,False,False,False,False,False
459,Female,Human,good,Strawberry Blond,blue,Unknown,Yellowjacket II,Marvel Comics,165,52,...,False,False,False,False,False,False,False,False,False,False


In [392]:
#change dtype from object for Weight
has_super_speed['Weight(kg)'] = has_super_speed['Weight(kg)'].astype(float)
no_super_speed['Weight(kg)'] = no_super_speed['Weight(kg)'].astype(float)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  has_super_speed['Weight(kg)'] = has_super_speed['Weight(kg)'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  no_super_speed['Weight(kg)'] = no_super_speed['Weight(kg)'].astype(float)


In [393]:
has_super_speed['Weight(kg)'].mean()

129.40404040404042

In [394]:
no_super_speed['Weight(kg)'].mean()

101.77358490566037

### Average Weight
####  The average weight of heroes with the power super speed is 129.4kg. The average weight of heroes without the power of super speed is 101.77kg.

## Finding Average Height of Heroes by Publisher

In [395]:
#finding how many different publishers there are
final['Publisher'].value_counts()

Marvel Comics        297
DC Comics            138
Dark Horse Comics     11
George Lucas           5
Team Epic TV           4
Shueisha               4
Star Trek              2
Unknown                1
Image Comics           1
Name: Publisher, dtype: int64

In [396]:
#changing height(cm) to numeric
final['Height(cm)'] = final['Height(cm)'].astype(float)

In [397]:
#creating filters for different publishers
marvel_filter = final["Publisher"] == 'Marvel Comics'
dc_filter = final["Publisher"] == 'DC Comics'
dark_horse_filter = final["Publisher"] == 'Dark Horse Comics'
george_lucas_filter = final["Publisher"] == 'George Lucas'
team_epic_filter = final["Publisher"] == 'Team Epic TV'
shueisha_filter = final["Publisher"] == 'Shueisha'
star_trek_filter = final["Publisher"] == 'Star Trek'
unknown_filter = final["Publisher"] == 'Unknown'
image_comics_filter = final["Publisher"] == 'Image Comics'

In [398]:
#finding average height
marvel = final.loc[marvel_filter, :]
marvel['Height(cm)'].mean()

191.54612794612794

In [399]:
#finding average height
dc = final.loc[dc_filter,:]
dc['Height(cm)'].mean()

181.92391304347825

In [400]:
#finding average height
dark_horse = final.loc[dark_horse_filter,:]
dark_horse['Height(cm)'].mean()

176.9090909090909

In [401]:
#finding average height
george_lucas= final.loc[george_lucas_filter,:]
george_lucas['Height(cm)'].mean()

159.6

In [402]:
#finding average height
team_epic = final.loc[team_epic_filter,:]
team_epic['Height(cm)'].mean()

180.75

In [403]:
#finding average height
shueisha= final.loc[shueisha_filter,:]
shueisha['Height(cm)'].mean()

171.5

In [404]:
#finding average height
star_trek = final.loc[star_trek_filter,:]
star_trek['Height(cm)'].mean()

181.5

In [405]:
#finding average height
unknown = final.loc[unknown_filter,:]
unknown['Height(cm)'].mean()

178.0

In [406]:
#finding average height
image_comics_filter
image_comics = final.loc[image_comics_filter,:]
image_comics['Height(cm)'].mean()

211.0

### Average Height of Heroes by Publisher
#### The average heights(cm) of super heroes based on publisher are:
- Marvel: 191.55cm
- DC Comics: 181.92cm
- Dark Horse Comics: 176.91cm
- George Lucas:159.60cm
- Team Epic TV: 180.75cm
- Shueisha:171.5cm
- Star Trek:181.5cm
- Unknown:178.0cm
- Image Comics: 211.0cm