In [17]:
import stapi
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import pickle
import re
from tqdm import tqdm
from pathlib import Path
import urllib
import pyarrow

In [2]:
# these are tests for the stapi.search
criteria = stapi.search_criteria.AnimalSearchCriteria(0, 50, "", avian=True)
response = stapi.RestClient().animal.search(criteria)
# response now contains results from the API, in this case the first fifty animals which are avians
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 63, 'totalPages': 2, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'animals': [{'uid': 'ANMA0000028273', 'name': 'Arbazan vulture', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000022561', 'name': 'Arcturian dog bird', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000039166', 'name': 'Arkarian horn fowl', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000000111', 'name': 'Aurelian', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000086683', 'name': 'Baneriam hawk', 'earthAnimal': False, 'earthInsect': False, 'avian': True, 'canine': False, 'feline': False}, {'uid': 'ANMA0000231825', 'name': 'Betelgeusian', 'earthAnimal': False, 'earthInsect': 

In [3]:
# test with larger file read
criteria = stapi.search_criteria.AnimalSearchCriteria(0, 100, "", avian=True)
response = stapi.RestClient().animal.search(criteria)
len(response)

3

In [4]:
rest_client = stapi.RestClient()
loracus = rest_client.astronomicalObject.get("ASMA0000012319")
loracus.astronomicalObjectType

'PLANET'

In [5]:
rc = stapi.RestClient()
loracus = rc.astronomicalObject.get("ASMA0000012319")
print(loracus)
print(dir(loracus))
print(loracus.name)

<stapi.full.AstronomicalObjectFull object at 0x000002AFBCBAFD00>
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'astronomicalObjectType', 'astronomicalObjects', 'location', 'name', 'uid']
Loracus Prime


We see Stapi is working, so let's try running Characters, Planets/Places, Species, Vehicles/Starships

In [6]:
# try search for characters

criteria = stapi.search_criteria.CharacterSearchCriteria(pageNumber=0, pageSize=500, sort="")
response = stapi.RestClient().character.search(criteria)
print(response)

{'page': {'pageNumber': 0, 'pageSize': 50, 'numberOfElements': 50, 'totalElements': 7571, 'totalPages': 152, 'firstPage': True, 'lastPage': False}, 'sort': {'clauses': []}, 'characters': [{'uid': 'CHMA0000215045', 'name': '0413 Theta', 'gender': None, 'yearOfBirth': None, 'monthOfBirth': None, 'dayOfBirth': None, 'placeOfBirth': None, 'yearOfDeath': None, 'monthOfDeath': None, 'dayOfDeath': None, 'placeOfDeath': None, 'height': None, 'weight': None, 'deceased': None, 'bloodType': None, 'maritalStatus': None, 'serialNumber': None, 'hologramActivationDate': None, 'hologramStatus': None, 'hologramDateStatus': None, 'hologram': False, 'fictionalCharacter': False, 'mirror': True, 'alternateReality': False}, {'uid': 'CHMA0000174718', 'name': '0718', 'gender': 'M', 'yearOfBirth': 2259, 'monthOfBirth': None, 'dayOfBirth': None, 'placeOfBirth': None, 'yearOfDeath': None, 'monthOfDeath': None, 'dayOfDeath': None, 'placeOfDeath': None, 'height': None, 'weight': None, 'deceased': None, 'bloodType'

In [7]:
# creating a loop to run continuously to pull all characters to a dictionary

characters = {}
page_number = 0
page_size = 1000

while True:
    criteria = stapi.search_criteria.CharacterSearchCriteria(pageNumber=page_number, pageSize=page_size, sort="")
    response = stapi.RestClient().character.search(criteria)
    if not response['characters']:
        break
    for character in response['characters']:
        characters[character['uid']] = {
            'name': character['name'],
            'gender': character.get('gender'),
            'height': character.get('height'),
            'weight': character.get('weight'),
            'bloodType': character.get('bloodType'),
            'maritalStatus': character.get('maritalStatus')
        }
    page_number += 1

In [10]:
# create a dataframe from the star trek character dictionary
st_chars = pd.DataFrame(characters)

# transpose dataframe
st_chars = st_chars.T

In [11]:
# view dataframe
st_chars

Unnamed: 0,name,gender,height,weight,bloodType,maritalStatus
CHMA0000215045,0413 Theta,,,,,
CHMA0000174718,0718,M,,,,
CHMA0000283851,10111,,,,,
CHMA0000278055,335,,,,,
CHMA0000282741,355,,,,,
...,...,...,...,...,...,...
CHMA0000011445,Zorn,M,,,,
CHMA0000128774,Zoumas,M,,,,
CHMA0000281049,Zuniga,,,,,
CHMA0000006133,Zyree,F,,,,


In [13]:
# view dataframe info
st_chars.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7571 entries, CHMA0000215045 to CHMA0000278244
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           7571 non-null   object
 1   gender         1146 non-null   object
 2   height         66 non-null     object
 3   weight         49 non-null     object
 4   bloodType      9 non-null      object
 5   maritalStatus  366 non-null    object
dtypes: object(6)
memory usage: 414.0+ KB


In [24]:
# save star trek characters to parquet

st_chars.to_parquet('C:/Users/musta/Documents/DA8/Capstone/wookiestapi/WPscraped/StarTrek_Characters.parquet', index=False)