In [2]:
%load_ext autoreload
%autoreload 2

In [58]:
import csv
import pandas as pd
import json
from pandas.io.json import json_normalize

In [19]:
neo_df = pd.read_csv('./data/neos.csv')

In [25]:
neo_df.head(3)

Unnamed: 0,id,spkid,full_name,pdes,name,prefix,neo,pha,H,G,...,n_obs_used,n_del_obs_used,n_dop_obs_used,condition_code,rms,two_body,A1,A2,A3,DT
0,a0000433,2000433,433 Eros (A898 PA),433,Eros,,Y,N,10.4,0.46,...,8767,4.0,2.0,0.0,0.28397,,,,,
1,a0000719,2000719,719 Albert (A911 TB),719,Albert,,Y,N,15.5,,...,1874,,,0.0,0.39148,,,,,
2,a0000887,2000887,887 Alinda (A918 AA),887,Alinda,,Y,N,13.8,-0.12,...,1424,,,0.0,0.48159,,,,,


In [54]:
neo_df.columns

Index(['id', 'spkid', 'full_name', 'pdes', 'name', 'prefix', 'neo', 'pha', 'H',
       'G', 'M1', 'M2', 'K1', 'K2', 'PC', 'diameter', 'extent', 'albedo',
       'rot_per', 'GM', 'BV', 'UB', 'IR', 'spec_B', 'spec_T', 'H_sigma',
       'diameter_sigma', 'orbit_id', 'epoch', 'epoch_mjd', 'epoch_cal',
       'equinox', 'e', 'a', 'q', 'i', 'om', 'w', 'ma', 'ad', 'n', 'tp',
       'tp_cal', 'per', 'per_y', 'moid', 'moid_ld', 'moid_jup', 't_jup',
       'sigma_e', 'sigma_a', 'sigma_q', 'sigma_i', 'sigma_om', 'sigma_w',
       'sigma_ma', 'sigma_ad', 'sigma_n', 'sigma_tp', 'sigma_per', 'class',
       'producer', 'data_arc', 'first_obs', 'last_obs', 'n_obs_used',
       'n_del_obs_used', 'n_dop_obs_used', 'condition_code', 'rms', 'two_body',
       'A1', 'A2', 'A3', 'DT'],
      dtype='object')

In [56]:
neo_df['pha'].unique()

array(['N', 'Y', nan], dtype=object)

In [55]:
neo_df[['pdes', 'name', 'diameter', 'pha']]

Unnamed: 0,pdes,name,diameter,pha
0,433,Eros,16.840,N
1,719,Albert,,N
2,887,Alinda,4.200,N
3,1036,Ganymed,37.675,N
4,1221,Amor,1.000,N
...,...,...,...,...
23962,2019 Y4-D,ATLAS,,
23963,2020 G1,Pimentel,,
23964,2020 M3,ATLAS,,
23965,2020 P4-B,,,


In [26]:
with open('./data/cad.json') as json_data:
    data = json.load(json_data)


In [70]:
data['fields'].index('cd')

3

In [67]:
data['fields']

['des',
 'orbit_id',
 'jd',
 'cd',
 'dist',
 'dist_min',
 'dist_max',
 'v_rel',
 'v_inf',
 't_sigma_f',
 'h']

In [34]:
data['signature']

{'source': 'NASA/JPL SBDB Close Approach Data API', 'version': '1.1'}

In [35]:
data['count']

'406785'

In [40]:
cad_df = pd.DataFrame(data['data'], columns=data['fields'])
cad_df['cd'] = pd.to_datetime(cad_df['cd'])

In [41]:
cad_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 406785 entries, 0 to 406784
Data columns (total 11 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   des        406785 non-null  object        
 1   orbit_id   406785 non-null  object        
 2   jd         406785 non-null  object        
 3   cd         406785 non-null  datetime64[ns]
 4   dist       406785 non-null  object        
 5   dist_min   406785 non-null  object        
 6   dist_max   406785 non-null  object        
 7   v_rel      406785 non-null  object        
 8   v_inf      406766 non-null  object        
 9   t_sigma_f  406785 non-null  object        
 10  h          406277 non-null  object        
dtypes: datetime64[ns](1), object(10)
memory usage: 34.1+ MB


In [32]:
cad_df.head(3)

Unnamed: 0,des,orbit_id,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h
0,170903,105,2415020.50766961,1900-Jan-01 00:11,0.0921795123769547,0.0912006569517418,0.0931589328621254,16.7523040362574,16.7505784933163,01:00,18.1
1,2005 OE3,52,2415020.60601349,1900-Jan-01 02:33,0.414975519685102,0.414968315685577,0.414982724454678,17.918395877175,17.9180375373357,< 00:01,20.3
2,2006 XO4,15,2415020.634068074,1900-Jan-01 03:13,0.114291499199114,0.114272705486348,0.114310301346124,7.39720266467069,7.3940503943318,00:23,23.4


### How many NEOs are in the neos.csv data set? 

In [7]:
len(neo_df)

23967

### What is the primary designation of the first Near Earth Object in the neos.csv data set?

In [10]:
neo_df[['pdes']].iloc[0, 0]

'433'

### What is the diameter (in kilometers) of the NEO whose name is "Apollo"?

In [12]:
neo_df[neo_df['name'] == 'Apollo']['diameter']

10    1.5
Name: diameter, dtype: float64

### How many NEOs have IAU names in the data set?

In [16]:
neo_df['name'].notnull().sum()

343

### How many NEOs have diameters in the data set?

In [17]:
neo_df['diameter'].notnull().sum()

1268

### How many close approaches are in the cad.json data set?

In [36]:
len(cad_df)

406785

### On January 1st, 2000, how close did the NEO whose primary designation is "2015 CL" pass by Earth?

In [47]:
cad_df[(cad_df['cd'].dt.year == 2000) & (cad_df['cd'].dt.month == 1) & (cad_df['cd'].dt.day == 1)]

Unnamed: 0,des,orbit_id,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h
180844,2015 CL,7,2451544.575085225,2000-01-01 01:48:00,0.144929602021186,0.144894711605919,0.144964493657327,12.0338907050642,12.0323628689746,00:19,25.3
180845,2002 PB,22,2451544.656861311,2000-01-01 03:46:00,0.499221505520251,0.499198890779505,0.499244120869353,29.3862908945476,29.3861092695424,< 00:01,20.5
180846,417655,32,2451544.967962443,2000-01-01 11:14:00,0.347809805138266,0.347809185789178,0.347810424487433,9.63652182082792,9.63572681897425,< 00:01,19.9
180847,2002 AY1,47,2451545.232240258,2000-01-01 17:34:00,0.271330274354689,0.271330198605043,0.271330350105786,24.6801874471394,24.6797895515273,< 00:01,20.8


### On January 1st, 2000, how fast did the NEO whose primary designation is "2002 PB" pass by Earth?

In [50]:
cad_df[(cad_df['des'] == '2002 PB') & (cad_df['cd'].dt.year == 2000)]

Unnamed: 0,des,orbit_id,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h
180845,2002 PB,22,2451544.656861311,2000-01-01 03:46:00,0.499221505520251,0.499198890779505,0.499244120869353,29.3862908945476,29.3861092695424,< 00:01,20.5


In [71]:
neo_df[neo_df['pdes'] == '2014']

Unnamed: 0,id,spkid,full_name,pdes,name,prefix,neo,pha,H,G,...,n_obs_used,n_del_obs_used,n_dop_obs_used,condition_code,rms,two_body,A1,A2,A3,DT
12162,bK15C00L,3709715,(2015 CL),2015 CL,,,Y,N,25.3,,...,71,,,1.0,0.55564,,,,,
