In [1]:
import pandas as pd
import pyarrow
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

We will start comparing spacecraft from both universes with this notebook.  

In [2]:
# Read in spacecraft from Star Trek

sts = pd.read_parquet('../WPscraped/StarTrek_Spacecraft.parquet')

# Read in spacecraft classes for Star Trek to add to the spacecraft

stsc = pd.read_parquet('../WPscraped/StarTrek_Spacecraft_Classes.parquet')

# Read in spacecraft from Star Wars which will need to be cleaned to only spacecrafts

sws = pd.read_parquet('../WPscraped/StarWars_Vehicles.parquet')

In [3]:
# view sts
sts.info()
sts.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1443 entries, 0 to 1442
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   name             1443 non-null   object
 1   spacecraftClass  610 non-null    object
dtypes: object(2)
memory usage: 22.7+ KB


Unnamed: 0,name,spacecraftClass
0,042,"{'name': 'Worker bee', 'uid': 'SCMA0000226396'}"
1,208,"{'name': 'Jumpship', 'uid': 'SCMA0000175103'}"
2,313-C,
3,5,
4,ATW-11,"{'name': 'UFP-02 type', 'uid': 'SCMA0000276004'}"


In [4]:
# view stsc
stsc.info()
stsc.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 727 entries, 0 to 726
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           727 non-null    object 
 1   numberOfDecks  33 non-null     float64
 2   warpCapable    727 non-null    bool   
 3   species        412 non-null    object 
dtypes: bool(1), float64(1), object(2)
memory usage: 17.9+ KB


Unnamed: 0,name,numberOfDecks,warpCapable,species
0,Abronian ship,,False,"{'name': 'Abronian', 'uid': 'SPMA0000274634'}"
1,Academy trainer craft,,False,
2,Aeroshuttle,,False,
3,Agena target vehicle,,False,
4,Akira class,,False,


In [5]:
# view sws
sws.info()
sws.head(15)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1049 entries, 0 to 1048
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   vehicle_name  1049 non-null   object
 1   manufacturer  1049 non-null   object
 2   model         1049 non-null   object
 3   class         1049 non-null   object
 4   length        1049 non-null   object
 5   crew          1049 non-null   object
 6   owner(s)      1049 non-null   object
 7   affiliation   1049 non-null   object
dtypes: object(8)
memory usage: 65.7+ KB


Unnamed: 0,vehicle_name,manufacturer,model,class,length,crew,owner(s),affiliation
0,"""Changeling"" Mark 71NB",Ravager Mechanics,"""Changeling"" Mark 71NB",Racing starfighter,,2,,"['Ace Squadron', 'Colossus resistance']"
1,125-Z treadspeeder bike,Aratech-Loratus Corporation,125-Z treadspeeder bik,Speeder,4.17 meters,1,,"['First Order', 'Resistance']"
2,49AX3,,Dreadnought-class heavy cruiser,Heavy cruiser,,,,"['Galactic Empire', 'Alliance to Restore the R..."
3,614-AvA speeder bike,Aratech Repulsor Company,614-AvA,Speeder bike,4.4 meters,['Pilot'],,"['Galactic Empire', 'Lothal resistance group',..."
4,712-AvA speeder bike,Aratech Repulsor Company,712-AvA,Speeder bike,,,,
5,720 light freighter,Ghtroc Industries,720,Light freighter,,2,,Citizens' Fleet
6,74-Z speeder bike,Aratech Repulsor Company,74-Z speeder bike,Speeder bike,3.3 meters,Pilot,,"['Galactic Republic', 'Galactic Empire', 'Impe..."
7,773,Rothana Heavy Engineering,All Terrain Tactical Enforcer,Walker,22.02 meters,,,"['Galactic Republic', 'Carnivore Battalion']"
8,A-A4B truckspeeder,Trast Heavy Transports,A-A4B,['Landspeeder'],,Pilot,,White Worms
9,A/SF-01 B-wing starfighter,Slayn & Korpil,A/SF-01 B-wing starfighter,Heavy assault starfighter,16.9 meters,Pilot,,"['Phoenix Cell', 'Alliance to Restore the Repu..."


We've pulled in all spacecraft and spacecraft class information for the Star Trek, and we've pulled in all vehicles for Star Wars.  We will combine the Star Trek spacecraft with the classes where applicable.  We will then attempt to isolate only the spacecraft for Star Wars.

In [6]:
# Start with renaming column names in both dataframes
sts = sts.rename(columns={'name' : 'starship_name'})
stsc = stsc.rename(columns= {'name' : 'starship_class'})

# Start by pulling class name out of dictionary in Star Trek spacecraftClass

sts['spc_updated'] = sts['spacecraftClass'].apply(lambda x: x.get('name') if isinstance(x,dict) else None)

sts.info()
sts.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1443 entries, 0 to 1442
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   starship_name    1443 non-null   object
 1   spacecraftClass  610 non-null    object
 2   spc_updated      610 non-null    object
dtypes: object(3)
memory usage: 33.9+ KB


Unnamed: 0,starship_name,spacecraftClass,spc_updated
0,042,"{'name': 'Worker bee', 'uid': 'SCMA0000226396'}",Worker bee
1,208,"{'name': 'Jumpship', 'uid': 'SCMA0000175103'}",Jumpship
2,313-C,,
3,5,,
4,ATW-11,"{'name': 'UFP-02 type', 'uid': 'SCMA0000276004'}",UFP-02 type
5,Abaddon's space station,,
6,Adonis,"{'name': 'Type 6A shuttlecraft', 'uid': 'SCMA0...",Type 6A shuttlecraft
7,Aeon,,
8,Akritirian maximum security detention facility,,
9,Aldara,"{'name': 'Galor class', 'uid': 'SCMA0000002988'}",Galor class


In [11]:
# Drop spacecraftClass from sts
sts = sts[['starship_name', 'spc_updated']]

# Drop species from stsc
stsc = stsc[['starship_class', 'numberOfDecks', 'warpCapable']]

In [17]:
# Add combine sts and stsc where applicable

sts_merged = pd.merge(sts, stsc, how= 'left', left_on='spc_updated', right_on='starship_class')


In [18]:
sts_merged.info()
sts_merged.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1503 entries, 0 to 1502
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   starship_name   1503 non-null   object 
 1   spc_updated     670 non-null    object 
 2   starship_class  670 non-null    object 
 3   numberOfDecks   113 non-null    float64
 4   warpCapable     670 non-null    object 
dtypes: float64(1), object(4)
memory usage: 70.5+ KB


Unnamed: 0,starship_name,spc_updated,starship_class,numberOfDecks,warpCapable
0,042,Worker bee,Worker bee,,False
1,042,Worker bee,Worker bee,,False
2,208,Jumpship,Jumpship,,False
3,313-C,,,,
4,5,,,,


In [24]:
# Drop duplicate rows

sts_updated = sts_merged.drop_duplicates(subset='starship_name')

In [25]:
sts_updated.head()

Unnamed: 0,starship_name,spc_updated,starship_class,numberOfDecks,warpCapable
0,042,Worker bee,Worker bee,,False
2,208,Jumpship,Jumpship,,False
3,313-C,,,,
4,5,,,,
5,ATW-11,UFP-02 type,UFP-02 type,,False


In [26]:
# Reset index
sts_updated = sts_updated.reset_index(drop=True)

In [27]:
sts_updated.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1312 entries, 0 to 1311
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   starship_name   1312 non-null   object 
 1   spc_updated     527 non-null    object 
 2   starship_class  527 non-null    object 
 3   numberOfDecks   87 non-null     float64
 4   warpCapable     527 non-null    object 
dtypes: float64(1), object(4)
memory usage: 51.4+ KB


In [34]:
# find out how many classes and counts of starships there are in star trek
sts_count = sts_updated['starship_class'].value_counts()

sts_count

California class        27
Type 6A shuttlecraft    16
Excelsior class         16
Sovereign class         15
Type 7 shuttlecraft     15
                        ..
Rev-12 type              1
Ravinok type             1
Kobheerian freighter     1
Talarian warship         1
Maymora class            1
Name: starship_class, Length: 189, dtype: int64