In [1]:
# Watermark is not required for this code, but is included for information. 
import watermark
%load_ext watermark
%watermark -a "ELEANOR LUTZ" -d -v -iv -m

Author: ELEANOR LUTZ

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.11.0

Compiler    : Clang 13.0.0 (clang-1300.0.29.30)
OS          : Darwin
Release     : 22.3.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit

watermark: 2.3.1



In [2]:
import numpy as np
import pandas as pd
import os.path

In [3]:
# Manually assign column names from bundle_description.txt file
colnames = ['number', 'name', 'full_name', 'axis_AU', 'eccentricity', 'inclination',
            'type', 'companions', 'magnitude', 'abs_magnitude', 'abs_magnitude_err', 
            'diameter_km', 'e_diameter_err1', 'e_diameter_err2', 'e_diameter_notecode', 
            'p_diameter', 'p_diameter_err1', 'p_diameter_err2', 'p_diameter_notecode',
            'c_diameter', 'c_diameter_err1', 'c_diameter_err2', 'c_diameter_notecode',
            'albedo', 'albedo_err1', 'albedo_err2', 'albedo_notecode', 'albedo_colorcode', 
            'density', 'density_err1', 'density_err2', 'density_notecode', 
            'methods_da', 'methods_density', 'reference'
           ]

diams = pd.read_csv('./data/diameters/tno-centaur_diam-albedo-density/data/tno_centaur_diam_alb_dens.tab', 
                    header=None, sep='\t', encoding='latin-1')

display(diams.head())
diams = diams[0].str.split(expand=True)
diams[2] = diams[2] + diams[3]
diams.drop([3], inplace=True, axis=1)

diams.columns = colnames
display(diams.head())

diams = diams[['number', 'name', 'full_name', 'type', 'diameter_km', 'p_diameter']]
diams['diameter_km'] = diams['diameter_km'].astype(float)
diams['p_diameter'] = diams['p_diameter'].astype(float)
diams.loc[~(diams['diameter_km'] > 0), 'diameter_km']=np.nan
diams.loc[~(diams['p_diameter'] > 0), 'p_diameter']=np.nan
diams['diameter_km'] = diams['diameter_km'].fillna(diams['p_diameter'])
diams = diams[~pd.isnull(diams['diameter_km'])]

diams.set_index(['number', 'name', 'full_name', 'type'], inplace=True)
diams = diams.groupby(['number', 'name', 'full_name', 'type']).median()
diams = diams.reset_index()
diams['name'] = diams['name'].str.replace('-', '')
diams.loc[diams['name'] == 'Pluto', 'full_name'] = '134340Pluto'
diams.drop(['p_diameter'], axis=1, inplace=True)

display(diams.head())
savename = './data/diameters/TNO_Centaurs.csv'
if not os.path.isfile(savename):
    diams.to_csv(savename, index=False)

print(len(diams), 'unique asteroids')

Unnamed: 0,0
0,2060 Chiron 1977 UB 13.7 0...
1,2060 Chiron 1977 UB 13.7 0...
2,2060 Chiron 1977 UB 13.7 0...
3,2060 Chiron 1977 UB 13.7 0...
4,2060 Chiron 1977 UB 13.7 0...


Unnamed: 0,number,name,full_name,axis_AU,eccentricity,inclination,type,companions,magnitude,abs_magnitude,...,albedo_err2,albedo_notecode,albedo_colorcode,density,density_err1,density_err2,density_notecode,methods_da,methods_density,reference
0,2060,Chiron,1977UB,13.7,0.38,7.0,CEN,0,5.8,6.9,...,0.035,*,-,-9.999,-9.999,-9.999,-,T,-,L84a
1,2060,Chiron,1977UB,13.7,0.38,7.0,CEN,0,5.8,-99.999,...,-9.999,-,G,-9.999,-9.999,-9.999,-,T,-,S91a
2,2060,Chiron,1977UB,13.7,0.38,7.0,CEN,0,5.8,6.8,...,-9.999,*,G,-9.999,-9.999,-9.999,-,T,-,J92a
3,2060,Chiron,1977UB,13.7,0.38,7.0,CEN,0,5.8,-99.999,...,-9.999,-,-,-9.999,-9.999,-9.999,-,O,-,B93a
4,2060,Chiron,1977UB,13.7,0.38,7.0,CEN,0,5.8,6.9,...,0.03,*,-,-9.999,-9.999,-9.999,-,T,-,C94a


Unnamed: 0,number,name,full_name,type,diameter_km
0,0,,1996TS66,CUB,163.0
1,0,,1998WW31,CUB,170.3
2,0,,1999OJ4,CUB,103.95
3,0,,2000CF105,CUB,78.3
4,0,,2000GM137,CEN,8.8


179 unique asteroids


In [4]:
df = pd.read_csv('./data/all_asteroids.csv', low_memory=False)
df['full_name'] = df['full_name'].str.replace(' ', '').str.replace('(', '').str.replace(')', '').str.replace('-', '')
display(df.tail())

diams = pd.read_csv('./data/diameters/TNO_Centaurs.csv', low_memory=False)
diams.drop(['number', 'name', 'type'], axis=1, inplace=True)
df = pd.merge(df, diams, on='full_name', how='left')
display(df.tail())

print(len(df[~pd.isnull(df['diameter_km'])]), 'diameter values that can be joined')
original = len(df[~pd.isnull(df['diameter'])])
df['diameter'] = df['diameter'].fillna(df['diameter_km'])
print(len(df[~pd.isnull(df['diameter'])])-original, 'new diameter values added')
df.drop(['diameter_km'], inplace=True, axis=1)

savename = './data/all_asteroids_wrangled.csv'
if not os.path.isfile(savename):
    df.to_csv(savename, index=False)
display(df.tail())

  df['full_name'] = df['full_name'].str.replace(' ', '').str.replace('(', '').str.replace(')', '').str.replace('-', '')


Unnamed: 0,id,spkid,full_name,pdes,name,neo,pha,diameter,prefix,q,per,class
794557,bT3S3411,3246664,3411T3,3411 T-3,,N,N,,,2.073221,1428.185418,MBA
794558,bT3S3521,3246672,3521T3,3521 T-3,,N,N,,,1.546423,1386.029296,MCA
794559,bT3S4571,3248621,4571T3,4571 T-3,,N,N,,,1.813901,1484.222588,MBA
794560,bT3S4658,3248624,4658T3,4658 T-3,,N,N,,,1.718022,1138.438109,MBA
794561,bT3S5154,3248651,5154T3,5154 T-3,,N,N,,,2.239464,1985.374056,MBA


Unnamed: 0,id,spkid,full_name,pdes,name,neo,pha,diameter,prefix,q,per,class,diameter_km
794557,bT3S3411,3246664,3411T3,3411 T-3,,N,N,,,2.073221,1428.185418,MBA,
794558,bT3S3521,3246672,3521T3,3521 T-3,,N,N,,,1.546423,1386.029296,MCA,
794559,bT3S4571,3248621,4571T3,4571 T-3,,N,N,,,1.813901,1484.222588,MBA,
794560,bT3S4658,3248624,4658T3,4658 T-3,,N,N,,,1.718022,1138.438109,MBA,
794561,bT3S5154,3248651,5154T3,5154 T-3,,N,N,,,2.239464,1985.374056,MBA,


44 diameter values that can be joined
35 new diameter values added


Unnamed: 0,id,spkid,full_name,pdes,name,neo,pha,diameter,prefix,q,per,class
794557,bT3S3411,3246664,3411T3,3411 T-3,,N,N,,,2.073221,1428.185418,MBA
794558,bT3S3521,3246672,3521T3,3521 T-3,,N,N,,,1.546423,1386.029296,MCA
794559,bT3S4571,3248621,4571T3,4571 T-3,,N,N,,,1.813901,1484.222588,MBA
794560,bT3S4658,3248624,4658T3,4658 T-3,,N,N,,,1.718022,1138.438109,MBA
794561,bT3S5154,3248651,5154T3,5154 T-3,,N,N,,,2.239464,1985.374056,MBA


In [5]:
# Check that Pluto has been added appropriately
df[df['full_name'] == '134340Pluto']

Unnamed: 0,id,spkid,full_name,pdes,name,neo,pha,diameter,prefix,q,per,class
134339,a0134340,2134340,134340Pluto,134340,Pluto,N,N,2361.0,,29.573992,90487.276927,TNO
