#### Paleo Analysis and Parsing of the Paleontological Database
Data extracted from *The Paleobiology Database*															


In [141]:
# library imports
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib as mpl
import requests as rq
import io

In [142]:
# Scraping data from the Paleobiology Database and cleaning up the Null values
url = rq.get('https://paleobiodb.org/data1.2/occs/taxa.csv?base_name=dinosauria&rank=max_genus&taxon_status=accepted&pres=regular&max_ma=252&min_ma=65&show=parent,app,size,class').content
dinos = pd.read_csv(io.StringIO(url.decode('utf-8'))).drop(columns=['extant_size', 'orig_no', 'taxon_no', 'parent_no', 'record_type', 'flags', 'difference', 'accepted_no', 'accepted_rank', 'accepted_name', 'container_no', 'reference_no', 'is_extant', 'phylum', 'type_taxon', 'firstapp_min_ma', 'lastapp_max_ma'])

dinos.columns = ['Rank', 'Name', 'Parent Taxon', 'Number of Occurrences', 'Max First Appearance (MYA)', 'Min Last Appearance (MYA)', 'Early Interval', 'Late Interval', 'Taxon Size', 'Class', 'Order', 'Family', 'Genus']
dinos = dinos[['Name', 'Rank', 'Genus', 'Parent Taxon', 'Family', 'Order', 'Class', 'Number of Occurrences', 'Taxon Size', 'Max First Appearance (MYA)', 'Min Last Appearance (MYA)', 'Early Interval', 'Late Interval']]
dinos['Late Interval'] = dinos['Late Interval'].fillna(dinos['Early Interval'])

eras = ['Triassic', 'Jurassic', 'Cretaceous']
conditions = [
    (dinos['Max First Appearance (MYA)'] >= 251.9) & (dinos['Min Last Appearance (MYA)'] < 201.4), 
    (dinos['Max First Appearance (MYA)'] >= 201.4) & (dinos['Min Last Appearance (MYA)'] < 145.1),
    (dinos['Max First Appearance (MYA)'] >= 145.1) & (dinos['Min Last Appearance (MYA)'] < 66.1)
    ]

dinos['Era'] = np.select(conditions, eras, default=pd.NaT) #--> Need to fix the type errors here
dinos.head()

Unnamed: 0,Name,Rank,Genus,Parent Taxon,Family,Order,Class,Number of Occurrences,Taxon Size,Max First Appearance (MYA),Min Last Appearance (MYA),Early Interval,Late Interval,Era
0,Ajkaceratops,genus,Ajkaceratops,Ceratopsoidea,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,2,2,86.3,83.6,Santonian,Santonian,NaT
1,Ajkaceratops kozmai,species,Ajkaceratops,Ajkaceratops,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,1,1,86.3,83.6,Santonian,Santonian,NaT
2,Turanoceratops,genus,Turanoceratops,Ceratopsoidea,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,12,2,93.9,89.8,Turonian,Turonian,NaT
3,Turanoceratops tardabilis,species,Turanoceratops,Turanoceratops,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,6,1,93.9,89.8,Turonian,Turonian,NaT
4,Zuniceratops,genus,Zuniceratops,Ceratopsoidea,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,6,2,93.9,89.8,Turonian,Turonian,NaT


In [143]:
# Splitting the Genus up from the Species to separate the Parent Taxon column (need to include that in the species dataframe)
genus = dinos[dinos["Rank"] == 'genus'].drop(columns=['Rank'])

In [144]:
# Creating the Species Dataframe to include the genus and parent taxon columns (this will be the main datafram we use)
species = dinos[dinos["Rank"] == 'species'].drop(columns=['Taxon Size', 'Rank', 'Parent Taxon'])
species = species.merge(genus[['Genus', 'Parent Taxon']], how='inner', on='Genus')[['Name', 'Genus', 'Parent Taxon', 'Family', 'Order', 'Class', 'Number of Occurrences', 'Max First Appearance (MYA)', 'Min Last Appearance (MYA)', 'Early Interval', 'Late Interval', 'Era']]
species.head()

Unnamed: 0,Name,Genus,Parent Taxon,Family,Order,Class,Number of Occurrences,Max First Appearance (MYA),Min Last Appearance (MYA),Early Interval,Late Interval,Era
0,Ajkaceratops kozmai,Ajkaceratops,Ceratopsoidea,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,1,86.3,83.6,Santonian,Santonian,NaT
1,Turanoceratops tardabilis,Turanoceratops,Ceratopsoidea,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,6,93.9,89.8,Turonian,Turonian,NaT
2,Zuniceratops christopheri,Zuniceratops,Ceratopsoidea,NO_FAMILY_SPECIFIED,NO_ORDER_SPECIFIED,Ornithischia,3,93.9,89.8,Turonian,Turonian,NaT
3,Cerasinops hodgskissi,Cerasinops,Leptoceratopsidae,Leptoceratopsidae,NO_ORDER_SPECIFIED,Ornithischia,3,83.6,72.1,Campanian,Campanian,NaT
4,Ferrisaurus sustutensis,Ferrisaurus,Leptoceratopsidae,Leptoceratopsidae,NO_ORDER_SPECIFIED,Ornithischia,1,72.1,66.0,Maastrichtian,Maastrichtian,NaT
