In [1]:
import stapi
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import pickle
import re
from tqdm import tqdm
from pathlib import Path
import urllib
import pyarrow

In [2]:
# these are tests for the stapi.search
criteria = stapi.search_criteria.AnimalSearchCriteria(0, 50, "", avian=True)
response = stapi.RestClient().animal.search(criteria)
# response now contains results from the API, in this case the first fifty animals which are avians
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 63, 'totalPages': 2, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'animals': [{'uid': 'ANMA0000028273', 'name': 'Arbazan vulture', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000022561', 'name': 'Arcturian dog bird', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000039166', 'name': 'Arkarian horn fowl', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000000111', 'name': 'Aurelian', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000086683', 'name': 'Baneriam hawk', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000231825', 'name': 'Betelgeusian', 'earthAnimal': False, 'earthInsect': 

In [3]:
# test with larger file read
criteria = stapi.search_criteria.AnimalSearchCriteria(0, 100, "", avian=True)
response = stapi.RestClient().animal.search(criteria)
len(response)

3

In [4]:
rest_client = stapi.RestClient()
loracus = rest_client.astronomicalObject.get("ASMA0000012319")
loracus.astronomicalObjectType

'PLANET'

In [5]:
rc = stapi.RestClient()
loracus = rc.astronomicalObject.get("ASMA0000012319")
print(loracus)
print(dir(loracus))
print(loracus.name)

<stapi.full.AstronomicalObjectFull object at 0x000001BCD474F130>
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'astronomicalObjectType', 'astronomicalObjects', 'location', 'name', 'uid']
Loracus Prime


We see Stapi is working, so let's try running Characters, Planets/Places, Species, Vehicles/Starships

In [6]:
# try search for characters

criteria = stapi.search_criteria.CharacterSearchCriteria(pageNumber=0, pageSize=500, sort="")
response = stapi.RestClient().character.search(criteria)
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 7571, 'totalPages': 152, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'characters': [{'uid': 'CHMA0000215045', 'name': '0413 Theta', 'gender': None, 'yearOfBirth': None, 'monthOfBirth': None, 'dayOfBirth': None, 'placeOfBirth': None, 'yearOfDeath': None, 'monthOfDeath': None, 'dayOfDeath': None, 'placeOfDeath': None, 'height': None, 'weight': None, 'deceased': None, 'bloodType': None, 'maritalStatus': None, 'serialNumber': None, 'hologramActivationDate': None, 'hologramStatus': None, 'hologramDateStatus': None, 'hologram': False, 'fictionalCharacter': False, 'mirror': True, 'alternateReality': False}, {'uid': 'CHMA0000174718', 'name': '0718', 'gender': 'M', 'yearOfBirth': 2259, 'monthOfBirth': None, 'dayOfBirth': None, 'placeOfBirth': None, 'yearOfDeath': None, 'monthOfDeath': None, 'dayOfDeath': None, 'placeOfDeath': None, 'height': None, 'weight': None, 'deceased': None, 'bloodType'

In [7]:
# creating a loop to run continuously to pull all characters to a dictionary

characters = {}
page_number = 0
page_size = 1000

while True:
    criteria = stapi.search_criteria.CharacterSearchCriteria(pageNumber=page_number, pageSize=page_size, sort="")
    response = stapi.RestClient().character.search(criteria)
    if not response['characters']:
        break
    for character in response['characters']:
        characters[character['uid']] = {
            'name': character['name'],
            'gender': character.get('gender'),
            'height': character.get('height'),
            'weight': character.get('weight'),
            'bloodType': character.get('bloodType'),
            'maritalStatus': character.get('maritalStatus')
        }
    page_number += 1

In [8]:
# create a dataframe from the star trek character dictionary
st_chars = pd.DataFrame(characters)

# transpose dataframe
st_chars = st_chars.T

In [9]:
# view dataframe
st_chars

Unnamed: 0,name,gender,height,weight,bloodType,maritalStatus
CHMA0000215045,0413 Theta,,,,,
CHMA0000174718,0718,M,,,,
CHMA0000283851,10111,,,,,
CHMA0000278055,335,,,,,
CHMA0000282741,355,,,,,
...,...,...,...,...,...,...
CHMA0000011445,Zorn,M,,,,
CHMA0000128774,Zoumas,M,,,,
CHMA0000281049,Zuniga,,,,,
CHMA0000006133,Zyree,F,,,,


In [10]:
# view dataframe info
st_chars.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7571 entries, CHMA0000215045 to CHMA0000278244
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           7571 non-null   object
 1   gender         1146 non-null   object
 2   height         66 non-null     object
 3   weight         49 non-null     object
 4   bloodType      9 non-null      object
 5   maritalStatus  366 non-null    object
dtypes: object(6)
memory usage: 414.0+ KB


In [11]:
# save star trek characters to parquet

st_chars.to_parquet('../WPscraped/StarTrek_Characters.parquet', index=False)

In [12]:
# try search for astronomical objects

criteria = stapi.search_criteria.AstronomicalObjectSearchCriteria(pageNumber=0, pageSize=500, sort="")
response = stapi.RestClient().astronomicalObject.search(criteria)
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 2404, 'totalPages': 49, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'astronomicalObjects': [{'uid': 'ASMA0000015822', 'name': "'aucdet IX", 'astronomicalObjectType': 'PLANET', 'location': {'uid': 'ASMA0000025892', 'name': 'Alpha Quadrant'}}, {'uid': 'ASMA0000264696', 'name': "'etnap Nebula", 'astronomicalObjectType': 'NEBULA', 'location': {'uid': 'ASMA0000002015', 'name': 'Beta Quadrant'}}, {'uid': 'ASMA0000289027', 'name': '1 Centauri', 'astronomicalObjectType': 'STAR_SYSTEM', 'location': {'uid': 'ASMA0000002015', 'name': 'Beta Quadrant'}}, {'uid': 'ASMA0000229695', 'name': '11 Leonis Minoris', 'astronomicalObjectType': 'STAR_SYSTEM', 'location': {'uid': 'ASMA0000025892', 'name': 'Alpha Quadrant'}}, {'uid': 'ASMA0000177695', 'name': '1889 V', 'astronomicalObjectType': 'COMET', 'location': {'uid': 'ASMA0000020577', 'name': 'Earth'}}, {'uid': 'ASMA0000174468', 'name': '1892 III Holmes

In [13]:
# creating a loop to run continuously to pull all astronomical objects to a dictionary

astronomical_objects = {}
page_number = 0
page_size = 100

while True:
    criteria = stapi.search_criteria.AstronomicalObjectSearchCriteria(pageNumber=page_number, pageSize=page_size, sort="")
    response = stapi.RestClient().astronomicalObject.search(criteria)
    if not response['astronomicalObjects']:
        break
    for astronomical_object in response['astronomicalObjects']:
        astronomical_objects[astronomical_object['uid']] = {
            'name': astronomical_object['name'],
            'astronomicalObjectType': astronomical_object.get('astronomicalObjectType')
        }
    page_number += 1

In [14]:
# create a dataframe from the star trek astronomical dictionary
st_astro = pd.DataFrame(astronomical_objects)

# transpose dataframe
st_astro = st_astro.T

In [15]:
# view dataframe
st_astro

Unnamed: 0,name,astronomicalObjectType
ASMA0000015822,'aucdet IX,PLANET
ASMA0000264696,'etnap Nebula,NEBULA
ASMA0000289027,1 Centauri,STAR_SYSTEM
ASMA0000229695,11 Leonis Minoris,STAR_SYSTEM
ASMA0000177695,1889 V,COMET
...,...,...
ASMA0000254854,Zimja,PLANET
ASMA0000165865,Zimmerman's Star,STAR
ASMA0000249661,Zoraster II,PLANET
ASMA0000070738,Zozek system,STAR_SYSTEM


In [16]:
# view dataframe info
st_astro.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2404 entries, ASMA0000015822 to ASMA0000005665
Data columns (total 2 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   name                    2404 non-null   object
 1   astronomicalObjectType  2363 non-null   object
dtypes: object(2)
memory usage: 56.3+ KB


In [17]:
# save star trek astronomical objects to parquet

st_astro.to_parquet('../WPscraped/StarTrek_Astronomical_Objects.parquet', index=False)

In [18]:
# try search for different species

criteria = stapi.search_criteria.SpeciesSearchCriteria(pageNumber=0, pageSize=500, sort="")
response = stapi.RestClient().species.search(criteria)
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 759, 'totalPages': 16, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'species': [{'uid': 'SPMA0000041310', 'name': '892-IV native', 'homeworld': {'uid': 'ASMA0000015188', 'name': '892-IV'}, 'quadrant': None, 'extinctSpecies': False, 'warpCapableSpecies': False, 'extraGalacticSpecies': False, 'humanoidSpecies': True, 'reptilianSpecies': False, 'nonCorporealSpecies': False, 'shapeshiftingSpecies': False, 'spaceborneSpecies': False, 'telepathicSpecies': False, 'transDimensionalSpecies': False, 'unnamedSpecies': True, 'alternateReality': False}, {'uid': 'SPMA0000001088', 'name': 'Aaamazzarite', 'homeworld': None, 'quadrant': None, 'extinctSpecies': False, 'warpCapableSpecies': False, 'extraGalacticSpecies': False, 'humanoidSpecies': True, 'reptilianSpecies': False, 'nonCorporealSpecies': False, 'shapeshiftingSpecies': False, 'spaceborneSpecies': False, 'telepathicSpecies': False, 'transDim

In [19]:
# creating a loop to run continuously to pull all species to a dictionary

species = {}
page_number = 0
page_size = 100

while True:
    criteria = stapi.search_criteria.SpeciesSearchCriteria(pageNumber=page_number, pageSize=page_size, sort="")
    response = stapi.RestClient().species.search(criteria)
    if not response['species']:
        break
    for specie in response['species']:
        species[specie['uid']] = {
            'name': specie['name'],
            'homeworld': specie.get('homeworld'),
            'warpCapableSpecies' : specie.get('warpCapableSpecies'),
            'humanoidSpecies' : specie.get('humanoidSpecies'),
            'spaceborneSpecies' : specie.get('spaceborneSpecies')
        }
    page_number += 1

In [20]:
# create a dataframe from the star trek species dictionary
st_species = pd.DataFrame(species)

# transpose dataframe
st_species = st_species.T

In [21]:
# view dataframe
st_species

Unnamed: 0,name,homeworld,warpCapableSpecies,humanoidSpecies,spaceborneSpecies
SPMA0000041310,892-IV native,"{'uid': 'ASMA0000015188', 'name': '892-IV'}",False,True,False
SPMA0000001088,Aaamazzarite,,False,True,False
SPMA0000212929,Aamaarazan,,False,True,False
SPMA0000064948,Abaddon's species,,False,True,False
SPMA0000224109,Abdon's species,,False,True,False
...,...,...,...,...,...
SPMA0000036040,Zevian,,False,False,False
SPMA0000025179,Zibalian,"{'uid': 'ASMA0000174564', 'name': 'Theta Zibal...",True,True,False
SPMA0000068170,Zobral's species,"{'uid': 'ASMA0000023468', 'name': 'Torothan ho...",True,True,False
SPMA0000061959,Zolan's species,,False,True,False


In [22]:
# view dataframe info
st_species.info()

<class 'pandas.core.frame.DataFrame'>
Index: 759 entries, SPMA0000041310 to SPMA0000010630
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   name                759 non-null    object
 1   homeworld           286 non-null    object
 2   warpCapableSpecies  759 non-null    object
 3   humanoidSpecies     759 non-null    object
 4   spaceborneSpecies   759 non-null    object
dtypes: object(5)
memory usage: 35.6+ KB


In [23]:
# save star trek species to parquet

st_species.to_parquet('../WPscraped/StarTrek_Species.parquet', index=False)

In [24]:
# try search for different spacecraft 
criteria = stapi.search_criteria.SpacecraftSearchCriteria(pageNumber=0, pageSize=500, sort="")
response = stapi.RestClient().spacecraft.search(criteria)
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 1443, 'totalPages': 29, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'spacecrafts': [{'uid': 'SRMA0000245113', 'name': '042', 'registry': '042', 'status': 'Active', 'dateStatus': '3189', 'spacecraftClass': {'uid': 'SCMA0000226396', 'name': 'Worker bee'}, 'owner': None, 'operator': None}, {'uid': 'SRMA0000174913', 'name': '208', 'registry': '208', 'status': 'Destroyed', 'dateStatus': '2259', 'spacecraftClass': {'uid': 'SCMA0000175103', 'name': 'Jumpship'}, 'owner': None, 'operator': None}, {'uid': 'SRMA0000152981', 'name': '313-C', 'registry': None, 'status': None, 'dateStatus': None, 'spacecraftClass': None, 'owner': None, 'operator': None}, {'uid': 'SRMA0000270891', 'name': '5', 'registry': '5', 'status': 'Active', 'dateStatus': '2381', 'spacecraftClass': None, 'owner': None, 'operator': None}, {'uid': 'SRMA0000276018', 'name': 'ATW-11', 'registry': 'ATW-11', 'status': 'Active', 'dat

In [25]:
# creating a loop to run continuously to pull all spacecraft to a dictionary

spacecrafts = {}
page_number = 0
page_size = 100

while True:
    criteria = stapi.search_criteria.SpacecraftSearchCriteria(pageNumber=page_number, pageSize=page_size, sort="")
    response = stapi.RestClient().spacecraft.search(criteria)
    if not response['spacecrafts']:
        break
    for spacecraft in response['spacecrafts']:
        spacecrafts[spacecraft['uid']] = {
            'name' : spacecraft['name'],
            'spacecraftClass' : spacecraft.get('spacecraftClass')
        }
    page_number += 1

In [26]:
# create a dataframe from the star trek spacecraft dictionary
st_craft = pd.DataFrame(spacecrafts)

# transpose dataframe
st_craft = st_craft.T

In [27]:
# view dataframe
st_craft

Unnamed: 0,name,spacecraftClass
SRMA0000245113,042,"{'uid': 'SCMA0000226396', 'name': 'Worker bee'}"
SRMA0000174913,208,"{'uid': 'SCMA0000175103', 'name': 'Jumpship'}"
SRMA0000152981,313-C,
SRMA0000270891,5,
SRMA0000276018,ATW-11,"{'uid': 'SCMA0000276004', 'name': 'UFP-02 type'}"
...,...,...
SRMA0000258112,Yosemite,"{'uid': 'SCMA0000258298', 'name': 'Type 6A shu..."
SRMA0000006653,Yosemite 3,
SRMA0000282424,Yosemite II,"{'uid': 'SCMA0000258298', 'name': 'Type 6A shu..."
SRMA0000016523,Zayra IV,


In [28]:
# view dataframe info
st_craft.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1443 entries, SRMA0000245113 to SRMA0000056150
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   name             1443 non-null   object
 1   spacecraftClass  610 non-null    object
dtypes: object(2)
memory usage: 33.8+ KB


In [29]:
# save star trek spacecraft to parquet

st_craft.to_parquet('../WPscraped/StarTrek_Spacecraft.parquet', index=False)

In [30]:
# try search for different spacecraft classes
criteria = stapi.search_criteria.SpacecraftClassSearchCriteria(pageNumber=0, pageSize=500, sort="")
response = stapi.RestClient().spacecraftClass.search(criteria)
print(response)


{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 727, 'totalPages': 15, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'spacecraftClasses': [{'uid': 'SCMA0000284244', 'name': 'Abronian ship', 'numberOfDecks': None, 'warpCapable': False, 'alternateReality': False, 'activeFrom': None, 'activeTo': None, 'species': {'uid': 'SPMA0000274634', 'name': 'Abronian'}, 'owner': None, 'operator': None, 'affiliation': None}, {'uid': 'SCMA0000264229', 'name': 'Academy trainer craft', 'numberOfDecks': None, 'warpCapable': False, 'alternateReality': False, 'activeFrom': '24th century', 'activeTo': '24th century', 'species': None, 'owner': None, 'operator': None, 'affiliation': None}, {'uid': 'SCMA0000003735', 'name': 'Aeroshuttle', 'numberOfDecks': None, 'warpCapable': False, 'alternateReality': False, 'activeFrom': '2370s', 'activeTo': '2370s', 'species': None, 'owner': None, 'operator': None, 'affiliation': None}, {'uid': 'SCMA0000171321', 'name': '

In [31]:
# creating a loop to run continuously to pull all spacecraft classes to a dictionary

spacecraft_classes = {}
page_number = 0
page_size = 50

while True:
    criteria = stapi.search_criteria.SpacecraftClassSearchCriteria(pageNumber=page_number, pageSize=page_size, sort="")
    response = stapi.RestClient().spacecraftClass.search(criteria)
    if not response['spacecraftClasses']:
        break
    for spacecraft_class in response['spacecraftClasses']:
        spacecraft_classes[spacecraft_class['uid']] = {
            'name': spacecraft_class['name'],
            'numberOfDecks': spacecraft_class.get('numberOfDecks'),
            'warpCapable': spacecraft_class.get('warpCapable'),
            'species': spacecraft_class.get('species')
        }
    page_number += 1

In [32]:
# create a dataframe from the star trek spacecraft classes dictionary
st_craft_classes = pd.DataFrame(spacecraft_classes)

# transpose dataframe
st_craft_classes = st_craft_classes.T

In [33]:
# view dataframe
st_craft_classes

Unnamed: 0,name,numberOfDecks,warpCapable,species
SCMA0000284244,Abronian ship,,False,"{'uid': 'SPMA0000274634', 'name': 'Abronian'}"
SCMA0000264229,Academy trainer craft,,False,
SCMA0000003735,Aeroshuttle,,False,
SCMA0000171321,Agena target vehicle,,False,
SCMA0000000216,Akira class,,False,
...,...,...,...,...
SCMA0000043138,Zalkonian escape pod,,False,"{'uid': 'SPMA0000012656', 'name': 'Zalkonian'}"
SCMA0000019156,Zalkonian warship,,True,"{'uid': 'SPMA0000012656', 'name': 'Zalkonian'}"
SCMA0000231163,Zeon spacecraft,,False,"{'uid': 'SPMA0000070944', 'name': 'Zeon'}"
SCMA0000145300,Zibalian escape pod,,False,"{'uid': 'SPMA0000025179', 'name': 'Zibalian'}"


In [34]:
# view dataframe info
st_craft_classes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 727 entries, SCMA0000284244 to SCMA0000023469
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           727 non-null    object
 1   numberOfDecks  33 non-null     object
 2   warpCapable    727 non-null    object
 3   species        412 non-null    object
dtypes: object(4)
memory usage: 28.4+ KB


In [35]:
# save star trek spacecraft classes to parquet

st_craft_classes.to_parquet('../WPscraped/StarTrek_Spacecraft_Classes.parquet', index=False)