### CNEOS Data Extractor
#### Batch Data Processing
#### Authored By -- Vaibhav Gupta

#### Importing Modules

In [None]:
import pandas as pd
import numpy as np
import requests
import json
from pyspark.sql import SparkSession, Row
import pyspark.pandas as ps
from pyspark.sql.functions import udf, col, explode, lit, split, concat, to_timestamp, to_date, date_format, round
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType, StringType, ArrayType, DateType, TimestampType
from datetime import datetime, timedelta

In [None]:
spark = SparkSession.builder.master('local[*]').appName('CNEOS_Data_Extractor').getOrCreate()
spark

In [None]:
date = datetime.today().date()
date_min = date + timedelta(days=59)
date_max = date + timedelta(days=60)
print(date_min, date_max)

2023-03-18 2023-03-19


#### Triggering Get API Request

In [None]:
def makeAPICall(url, parameters):
    try:
        response = requests.get(url, params=parameters)
    except Exception as e:
        return e

    if response.status_code == 200 and response != None:
        return response.json()
    return None

In [None]:
url = "https://ssd-api.jpl.nasa.gov/cad.api"
parameters = {
#     "date-min": str(datetime.today().date()),
    "date-min": "1900-01-04",
    "date-max": str(date_max),
    "dist-max": "2",
    'fullname': "true",
    # 'dist-max': "0.1",
    'diameter': "true"
}
# response = requests.get(url, parameters)
# data = response.json()

# response = makeAPICall(url, parameters)

In [None]:
response = makeAPICall(url, parameters)

In [None]:
response['count']

Out[68]: '337448'

#### Stating Data Attributes

In [None]:
# schema = StructType([
#     StructField("Signature", StringType(), True),
#     StructField("Count", StringType(), True),
#     StructField("Fields", ArrayType(
#         StructType([
#             StructField("Designation", StringType(), True),
#             StructField("Orbit_Id", StringType(), True),
#             StructField("Time of Close approach", StringType(), True),
#             StructField("Close-Approach Date", StringType(), True),
#             StructField("Nominal Approch distance (au)", StringType(), True),
#             StructField("Min Close-Approach Distance (au)",
#                         StringType(), True),
#             StructField("Max Close-Approach Distance (au)",
#                         StringType(), True),
#             StructField("V Reletive (Km/s)", StringType(), True),
#             StructField("V Infinite (Km/s)", StringType(), True),
#             StructField("Close-Approach Uncertain Time", StringType(), True),
#             StructField("Absolute Magnitude (mag)", StringType(), True),
#             StructField("Diameter (Km)", StringType(), True),
#             StructField("Diameter-Sigma (Km)", StringType(), True),
#             StructField("Designation", StringType(), True),
#         ])
#     ), True),
#     StructField("Data", ArrayType(
#         StructType([
#             StructField("Designation", StringType(), True),
#             StructField("Orbit_Id", StringType(), True),
#             StructField("Time of Close approach", DoubleType(), True),
#             StructField("Close-Approach Date", DateType(), True),
#             StructField("Nominal Approch distance (au)", DoubleType(), True),
#             StructField("Min Close-Approach Distance (au)",
#                         DoubleType(), True),
#             StructField("Max Close-Approach Distance (au)",
#                         DoubleType(), True),
#             StructField("V Reletive (Km/s)", DoubleType(), True),
#             StructField("V Infinite (Km/s)", DoubleType(), True),
#             StructField("Close-Approach Uncertain Time", StringType(), True),
#             StructField("Absolute Magnitude (mag)", DoubleType(), True),
#             StructField("Diameter (Km)", DoubleType(), True),
#             StructField("Diameter-Sigma (Km)", DoubleType(), True),
#             StructField("Designation", StringType(), True),
#         ])
#     ), True)
# ])
# schema


Out[66]: StructType([StructField('Signature', StringType(), True), StructField('Count', StringType(), True), StructField('Fields', ArrayType(StructType([StructField('Designation', StringType(), True), StructField('Orbit_Id', StringType(), True), StructField('Time of Close approach', StringType(), True), StructField('Close-Approach Date', StringType(), True), StructField('Nominal Approch distance (au)', StringType(), True), StructField('Min Close-Approach Distance (au)', StringType(), True), StructField('Max Close-Approach Distance (au)', StringType(), True), StructField('V Reletive (Km/s)', StringType(), True), StructField('V Infinite (Km/s)', StringType(), True), StructField('Close-Approach Uncertain Time', StringType(), True), StructField('Absolute Magnitude (mag)', StringType(), True), StructField('Diameter (Km)', StringType(), True), StructField('Diameter-Sigma (Km)', StringType(), True), StructField('Designation', StringType(), True)]), True), True), StructField('Data', ArrayType(

In [None]:
columns = [
    'Designation',
    'Orbit_Id',
    'Time_of_Close_approach',
    'Close_Approach_Date',
    'Nominal_Approch_distance_au',
    'Min_Close_Approach_Distance_au',
    'Max_Close_Approach_Distance_au',
    'V_Reletive_Kms',
    'V_Infinite_Kms',
    'Close_Approach_Uncertain_Time',
    'Absolute_Magnitude_mag',
    'Diameter_Km',
    'Diameter_Sigma_Km',
    'Object'
]


In [None]:
# def formattingData(data):
#     formattedData = []
#     for row in data:
#         temp = {}
#         for j in range(len(row)):
#             temp[columns[j]] = row[j]
#         formattedData.append(temp)
#     return formattedData


In [None]:
# formattingData(response['data'])

In [None]:
# data = formattingData(response['data'])

#### Loading Data to Spark Pandas Dataframe

In [None]:
df = ps.DataFrame(response['data'], columns=columns)
df.head()

Unnamed: 0,Designation,Orbit_Id,Time_of_Close_approach,Close_Approach_Date,Nominal_Approch_distance_au,Min_Close_Approach_Distance_au,Max_Close_Approach_Distance_au,V_Reletive_Kms,V_Infinite_Kms,Close_Approach_Uncertain_Time,Absolute_Magnitude_mag,Diameter_Km,Diameter_Sigma_Km,Object
0,2020 BN7,6,2415023.594589649,1900-Jan-04 02:16,0.0896607474147164,0.0882582365913522,0.0914306781836958,5.2581158476114,5.25246109718832,1_13:09,23.8,,,(2020 BN7)
1,2017 MW4,18,2415023.595882106,1900-Jan-04 02:18,0.0613004997707699,0.0612907488305461,0.0613102521172838,17.5916418448861,17.5891708464552,00:01,20.05,,,(2017 MW4)
2,2014 MF18,11,2415023.799642058,1900-Jan-04 07:11,0.241200417356034,0.241141521401975,0.241259428445274,7.09890002378741,7.09734373263039,04:38,26.1,0.069,0.02,(2014 MF18)
3,163026,17,2415024.061758172,1900-Jan-04 13:29,0.227139998718129,0.227132767166169,0.227147230794654,7.32876671403419,7.32716592067723,< 00:01,21.24,,,163026 (2001 XR30)
4,2020 WW,9,2415024.308174091,1900-Jan-04 19:24,0.343109945959218,0.342518186199474,0.343764786683642,5.55126928284656,5.54987020627105,10:36,21.16,,,(2020 WW)


##### Checking stats of raw API Data

In [None]:
df.shape

Out[215]: (337448, 14)

In [None]:
# df.describe()

Unnamed: 0,Designation,Orbit_Id,Time_of_Close_approach,Close_Approach_Date,Nominal_Approch_distance_au,Min_Close_Approach_Distance_au,Max_Close_Approach_Distance_au,V_Reletive_Kms,V_Infinite_Kms,Close_Approach_Uncertain_Time,Absolute_Magnitude_mag,Diameter_Km,Diameter_Sigma_Km,Object
count,337448,337448,337448.0,337448,337448.0,337448,337448.0,337448.0,337421.0,337448,337059.0,23401.0,21684.0,337448
unique,30685,406,337448.0,336429,337448.0,337432,337448.0,337448.0,337421.0,14287,2140.0,917.0,379.0,30685
top,277810,5,2415073.543858523,2014-Dec-09 14:31,0.447127720189524,0,0.313408665373468,15.5639527179313,,< 00:01,24.4,,,277810 (2006 FV35)
freq,246,15591,1.0,3,1.0,17,1.0,1.0,27.0,106714,2874.0,314047.0,315764.0,246


In [None]:
# df[columns[0]] = df[columns[0]].astype(str)
df.dtypes

Out[217]: Designation                       object
Orbit_Id                          object
Time_of_Close_approach            object
Close_Approach_Date               object
Nominal_Approch_distance_au       object
Min_Close_Approach_Distance_au    object
Max_Close_Approach_Distance_au    object
V_Reletive_Kms                    object
V_Infinite_Kms                    object
Close_Approach_Uncertain_Time     object
Absolute_Magnitude_mag            object
Diameter_Km                       object
Diameter_Sigma_Km                 object
Object                            object
dtype: object

In [None]:
df.tail()

Unnamed: 0,Designation,Orbit_Id,Time_of_Close_approach,Close_Approach_Date,Nominal_Approch_distance_au,Min_Close_Approach_Distance_au,Max_Close_Approach_Distance_au,V_Reletive_Kms,V_Infinite_Kms,Close_Approach_Uncertain_Time,Absolute_Magnitude_mag,Diameter_Km,Diameter_Sigma_Km,Object
337443,619492,30,2460022.120706278,2023-Mar-18 14:54,0.112340994675077,0.112340407446303,0.112341581904823,6.18840091576884,6.18456710920094,< 00:01,20.55,,,619492 (2004 KZ14)
337444,2022 UJ1,2,2460022.140308976,2023-Mar-18 15:22,0.111129130834281,0.110767528121713,0.111491094855885,5.13558625235879,5.1309154450453,03:03,27.91,,,(2022 UJ1)
337445,386504,68,2460022.207881971,2023-Mar-18 16:59,0.323967587340494,0.323966825914268,0.323968348767603,24.5521012251626,24.5517662403698,< 00:01,18.71,,,386504 (2009 BP58)
337446,2020 PF,14,2460022.221742335,2023-Mar-18 17:19,0.497889345197824,0.456743315204394,0.539047735220833,26.6188719540857,26.6186709099002,2_01:39,23.2,,,(2020 PF)
337447,2003 YG136,8,2460022.315938008,2023-Mar-18 19:35,0.339725591931309,0.327494331696837,0.352061292158272,11.7679606276373,11.7672941352632,5_14:22,25.3,,,(2003 YG136)


#### Loading Spark Pandas DataFrame to Spark SQL DataFrame

In [None]:
sdf = df.to_spark()


In [None]:
sdf.printSchema()

root
 |-- Designation: string (nullable = false)
 |-- Orbit_Id: string (nullable = false)
 |-- Time_of_Close_approach: string (nullable = false)
 |-- Close_Approach_Date: string (nullable = false)
 |-- Nominal_Approch_distance_au: string (nullable = false)
 |-- Min_Close_Approach_Distance_au: string (nullable = false)
 |-- Max_Close_Approach_Distance_au: string (nullable = false)
 |-- V_Reletive_Kms: string (nullable = false)
 |-- V_Infinite_Kms: string (nullable = true)
 |-- Close_Approach_Uncertain_Time: string (nullable = false)
 |-- Absolute_Magnitude_mag: string (nullable = true)
 |-- Diameter_Km: string (nullable = true)
 |-- Diameter_Sigma_Km: string (nullable = true)
 |-- Object: string (nullable = false)



In [None]:
sdf.show(4)

+--------+---------------------------+------------------------------+------------------------------+--------------+--------------+----------------------+-------------+------------------+---------------------------+------------------------------+------------------------------+-----------------------------+
|Orbit_Id|Nominal_Approch_distance_au|Min_Close_Approach_Distance_au|Max_Close_Approach_Distance_au|V_Reletive_Kms|V_Infinite_Kms|Absolute_Magnitude_mag|  Diameter_Km|            Object|Nominal_Approch_distance_km|Min_Close_Approach_Distance_km|Max_Close_Approach_Distance_km|Close_Approach_Date_formatted|
+--------+---------------------------+------------------------------+------------------------------+--------------+--------------+----------------------+-------------+------------------+---------------------------+------------------------------+------------------------------+-----------------------------+
|       6|                    0.08966|                       0.08826|          

##### Performing Some DataType Conversions

In [None]:
sdf2 = sdf.withColumn("Designation", col("Designation").cast(StringType())) \
    .withColumn("Orbit_Id", col("Orbit_Id").cast(StringType())) \
    .withColumn("Time_of_Close_approach", col("Time_of_Close_approach").cast(DoubleType())) \
    .withColumn("Close_Approach_Date", col("Close_Approach_Date").cast(StringType())) \
    .withColumn("Nominal_Approch_distance_au", col("Nominal_Approch_distance_au").cast(DoubleType())) \
    .withColumn("Min_Close_Approach_Distance_au", col("Min_Close_Approach_Distance_au").cast(DoubleType())) \
    .withColumn("Max_Close_Approach_Distance_au", col("Max_Close_Approach_Distance_au").cast(DoubleType())) \
    .withColumn("V_Reletive_Kms", col("V_Reletive_Kms").cast(DoubleType())) \
    .withColumn("V_Infinite_Kms", col("V_Infinite_Kms").cast(DoubleType())) \
    .withColumn("Close_Approach_Uncertain_Time", col("Close_Approach_Uncertain_Time").cast(StringType())) \
    .withColumn("Absolute_Magnitude_mag", col("Absolute_Magnitude_mag").cast(DoubleType())) \
    .withColumn("Diameter_Km", col("Diameter_Km").cast(DoubleType())) \
    .withColumn("Diameter_Sigma_Km", col("Diameter_Sigma_Km").cast(DoubleType())) \
    .withColumn("Object", col("Object").cast(StringType()))
sdf2.printSchema()

root
 |-- Designation: string (nullable = false)
 |-- Orbit_Id: string (nullable = false)
 |-- Time_of_Close_approach: double (nullable = true)
 |-- Close_Approach_Date: string (nullable = false)
 |-- Nominal_Approch_distance_au: double (nullable = true)
 |-- Min_Close_Approach_Distance_au: double (nullable = true)
 |-- Max_Close_Approach_Distance_au: double (nullable = true)
 |-- V_Reletive_Kms: double (nullable = true)
 |-- V_Infinite_Kms: double (nullable = true)
 |-- Close_Approach_Uncertain_Time: string (nullable = false)
 |-- Absolute_Magnitude_mag: double (nullable = true)
 |-- Diameter_Km: double (nullable = true)
 |-- Diameter_Sigma_Km: double (nullable = true)
 |-- Object: string (nullable = false)



In [None]:
sdf2.show(5)

+-----------+--------+----------------------+-------------------+---------------------------+------------------------------+------------------------------+--------------+--------------+-----------------------------+----------------------+-------------+-----------------+------------------+---------------------------+------------------------------+------------------------------+-----------------------------+
|Designation|Orbit_Id|Time_of_Close_approach|Close_Approach_Date|Nominal_Approch_distance_au|Min_Close_Approach_Distance_au|Max_Close_Approach_Distance_au|V_Reletive_Kms|V_Infinite_Kms|Close_Approach_Uncertain_Time|Absolute_Magnitude_mag|  Diameter_Km|Diameter_Sigma_Km|            Object|Nominal_Approch_distance_km|Min_Close_Approach_Distance_km|Max_Close_Approach_Distance_km|Close_Approach_Date_formatted|
+-----------+--------+----------------------+-------------------+---------------------------+------------------------------+------------------------------+--------------+----------

In [None]:
# sdf.withColumn()

In [None]:
# def auToKm(row):
#     return row['Nominal Approch distance (au)'] * 149597871


In [None]:
# def kmpsToMps(kms):
#     return kms * 1000

#### Transforming Data Based On Following Conditions

- ###### Object

    Object primary designation
- ###### Close-Approach (Close Approach) Date

    Date and time (TDB) of closest Earth approach. "Nominal Date" is given to appropriate precision. The 3-sigma uncertainty in the time is given in the +/- column in days_hours:minutes format (for example, "2_15:23" is 2 days, 15 hours, 23 minutes; "< 00:01" is less than 1 minute).
    
- ###### Close Approach Distance Nominal (au)/(km)

    The most likely (Nominal) close-approach distance (Earth center to NEO center), in astronomical units.
- ###### Close Approach Distance Minimum (au)/(km)

    The minimum possible close-approach distance (Earth center to NEO center), in astronomical units. The minimum possible distance is based on the 3-sigma Earth target-plane error ellipse.
- ###### Close Approach Distance Maximum (au)/(km)

    The maximum possible close-approach distance (Earth center to NEO center), in astronomical units. The maximum possible distance is based on the 3-sigma Earth target-plane error ellipse.
- ###### V relative (km/s)

    Object velocity relative to Earth at close-approach.
- ###### V infinity (km/s)

    Object velocity relative to a massless Earth at close-approach.
- ###### Absolute Magnitute H (mag)

    Asteroid absolute magnitude (in general, smaller H implies larger asteroid diameter). Undefined for comets.
- ###### Diameter (km)

    Diameter value when known or a range (min - max) estimated using the asteroid's absolute magnitude (H) and limiting albedos of 0.25 and 0.05.
- ###### au

    One Astronomical Unit (au) is approximately 150 million kilometers (see glossary for definition).
- ###### LD

    One Lunar Distance (LD) is approximately 384,000 kilometers (see glossary for definition).

In [None]:
sdf2 = sdf2.withColumn('Nominal_Approch_distance_km', lit(round(col('Nominal_Approch_distance_au')*149597871, 0))) \
        .withColumn('Time_of_Close_approach', lit(round(col('Time_of_Close_approach'), 2))) \
        .withColumn('Nominal_Approch_distance_au', lit(round(col('Nominal_Approch_distance_au'), 5))) \
        .withColumn('Min_Close_Approach_Distance_au', lit(round(col('Min_Close_Approach_Distance_au'), 5))) \
        .withColumn('Max_Close_Approach_Distance_au', lit(round(col('Max_Close_Approach_Distance_au'), 5))) \
        .withColumn('Min_Close_Approach_Distance_km', lit(round(col('Min_Close_Approach_Distance_au')*149597871, 0))) \
        .withColumn('Max_Close_Approach_Distance_km', lit(round(col('Max_Close_Approach_Distance_au')*149597871, 0))) \
        .withColumn('V_Reletive_Kms', lit(round(col('V_Reletive_Kms'), 2))) \
        .withColumn('V_Infinite_Kms', lit(round(col('V_Infinite_Kms'), 2))) \
        .withColumn('Diameter_Km', concat(lit(round(((1329 * (10 ** ((-0.2) * col('Absolute_Magnitude_mag'))))/0.25 ** 0.5), 3)), \
                                          lit(' - '), \
                                          lit(round(((1329 * (10 ** ((-0.2) * col('Absolute_Magnitude_mag'))))/0.05 ** 0.5), 3)))) \
        .withColumn('Close_Approach_Date_formatted', concat(col('Close_Approach_Date'), lit(':00:000 ± '), lit(col('Close_Approach_Uncertain_Time'))))


In [None]:
display(sdf2)

Designation,Orbit_Id,Time_of_Close_approach,Close_Approach_Date,Nominal_Approch_distance_au,Min_Close_Approach_Distance_au,Max_Close_Approach_Distance_au,V_Reletive_Kms,V_Infinite_Kms,Close_Approach_Uncertain_Time,Absolute_Magnitude_mag,Diameter_Km,Diameter_Sigma_Km,Object,Nominal_Approch_distance_km,Min_Close_Approach_Distance_km,Max_Close_Approach_Distance_km,Close_Approach_Date_formatted
2020 BN7,6,2415023.59,1900-Jan-04 02:16,0.08966,0.08826,0.09143,5.26,5.25,1_13:09,23.8,0.046 - 0.103,,(2020 BN7),13413057.0,13203508.0,13677733.0,1900-Jan-04 02:16:00:000 ± 1_13:09
2017 MW4,18,2415023.6,1900-Jan-04 02:18,0.0613,0.06129,0.06131,17.59,17.59,00:01,20.05,0.26 - 0.581,,(2017 MW4),9170424.0,9168854.0,9171845.0,1900-Jan-04 02:18:00:000 ± 00:01
2014 MF18,11,2415023.8,1900-Jan-04 07:11,0.2412,0.24114,0.24126,7.1,7.1,04:38,26.1,0.016 - 0.036,0.02,(2014 MF18),36083069.0,36074031.0,36091982.0,1900-Jan-04 07:11:00:000 ± 04:38
163026,17,2415024.06,1900-Jan-04 13:29,0.22714,0.22713,0.22715,7.33,7.33,< 00:01,21.24,0.15 - 0.336,,163026 (2001 XR30),33979660.0,33978164.0,33981156.0,1900-Jan-04 13:29:00:000 ± < 00:01
2020 WW,9,2415024.31,1900-Jan-04 19:24,0.34311,0.34252,0.34376,5.55,5.55,10:36,21.16,0.156 - 0.348,,(2020 WW),51328517.0,51240263.0,51425764.0,1900-Jan-04 19:24:00:000 ± 10:36
485823,43,2415024.34,1900-Jan-04 20:07,0.30599,0.30599,0.306,10.46,10.46,00:02,20.61,0.201 - 0.449,,485823 (2012 DF61),45776140.0,45775453.0,45776949.0,1900-Jan-04 20:07:00:000 ± 00:02
509352,57,2415024.43,1900-Jan-04 22:25,0.00963,0.00962,0.00964,8.69,8.65,00:02,20.16,0.247 - 0.552,,509352 (2007 AG),1440903.0,1439132.0,1442123.0,1900-Jan-04 22:25:00:000 ± 00:02
2002 AA29,23,2415024.52,1900-Jan-05 00:35,0.41127,0.4112,0.41134,13.26,13.26,00:02,24.1,0.04 - 0.09,,(2002 AA29),61525214.0,61514645.0,61535588.0,1900-Jan-05 00:35:00:000 ± 00:02
68347,81,2415024.6,1900-Jan-05 02:23,0.40052,0.40052,0.40053,15.9,15.9,< 00:01,19.92,0.276 - 0.617,,68347 (2001 KB67),59917634.0,59916939.0,59918435.0,1900-Jan-05 02:23:00:000 ± < 00:01
2022 LA1,9,2415024.89,1900-Jan-05 09:25,0.49266,0.48605,0.50297,11.43,11.43,3_05:41,21.97,0.107 - 0.24,,(2022 LA1),73701171.0,72712045.0,75243241.0,1900-Jan-05 09:25:00:000 ± 3_05:41


In [None]:
# # display(sdf2.withColumn('Close_Approach_Uncertain_Time_formatted', ))
# display(sdf2.withColumn('Close_Approach_Date_formatted', concat(col('Close_Approach_Date'), lit(':00:000 ± '), lit(col('Close_Approach_Uncertain_Time')))))

In [None]:
# def format_close_approch_date(df):
#     return df.withColumn('Close_Approach_Date', col('Close_Approach_Date').)

In [None]:
# display(sdf2.withColumn('Close_Approach_Date_formatted', to_date(date_format(col('Close_Approach_Date'), format='dd-MMM-yyyy hh:mm'))))

In [None]:
del sdf

In [None]:
sdf = sdf2.alias('sdf')


In [None]:
sdf.printSchema()

root
 |-- Orbit_Id: string (nullable = false)
 |-- Nominal_Approch_distance_au: double (nullable = true)
 |-- Min_Close_Approach_Distance_au: double (nullable = true)
 |-- Max_Close_Approach_Distance_au: double (nullable = true)
 |-- V_Reletive_Kms: double (nullable = true)
 |-- V_Infinite_Kms: double (nullable = true)
 |-- Absolute_Magnitude_mag: double (nullable = true)
 |-- Diameter_Km: string (nullable = true)
 |-- Object: string (nullable = false)
 |-- Nominal_Approch_distance_km: double (nullable = true)
 |-- Min_Close_Approach_Distance_km: double (nullable = true)
 |-- Max_Close_Approach_Distance_km: double (nullable = true)
 |-- Close_Approach_Date_formatted: string (nullable = false)



In [None]:
# sdf = sdf.drop('Designation', 'Time_of_Close_approach','Close_Approach_Date', 'Close_Approach_Uncertain_Time', 'Diameter_Sigma_Km')
# sdf.printSchema()

root
 |-- Orbit_Id: string (nullable = false)
 |-- Nominal_Approch_distance_au: double (nullable = true)
 |-- Min_Close_Approach_Distance_au: double (nullable = true)
 |-- Max_Close_Approach_Distance_au: double (nullable = true)
 |-- V_Reletive_Kms: double (nullable = true)
 |-- V_Infinite_Kms: double (nullable = true)
 |-- Absolute_Magnitude_mag: double (nullable = true)
 |-- Diameter_Km: string (nullable = true)
 |-- Object: string (nullable = false)
 |-- Nominal_Approch_distance_km: double (nullable = true)
 |-- Min_Close_Approach_Distance_km: double (nullable = true)
 |-- Max_Close_Approach_Distance_km: double (nullable = true)
 |-- Close_Approach_Date_formatted: string (nullable = false)



#### Finalizing Formatted and Transformed Data

In [None]:
final_sdf = sdf.select('Object', 'Close_Approach_Date_formatted', 'Orbit_Id', 'Nominal_Approch_distance_au', 'Nominal_Approch_distance_km', 'Min_Close_Approach_Distance_au', 'Min_Close_Approach_Distance_km', 'Max_Close_Approach_Distance_au', 'Max_Close_Approach_Distance_km', 'V_Reletive_Kms', 'V_Infinite_Kms', 'Absolute_Magnitude_mag', 'Diameter_Km')

In [None]:
display(sdf)

Orbit_Id,Nominal_Approch_distance_au,Min_Close_Approach_Distance_au,Max_Close_Approach_Distance_au,V_Reletive_Kms,V_Infinite_Kms,Absolute_Magnitude_mag,Diameter_Km,Object,Nominal_Approch_distance_km,Min_Close_Approach_Distance_km,Max_Close_Approach_Distance_km,Close_Approach_Date_formatted
6,0.08966,0.08826,0.09143,5.26,5.25,23.8,0.046 - 0.103,(2020 BN7),13413057.0,13203508.0,13677733.0,1900-Jan-04 02:16:00:000 ± 1_13:09
18,0.0613,0.06129,0.06131,17.59,17.59,20.05,0.26 - 0.581,(2017 MW4),9170424.0,9168854.0,9171845.0,1900-Jan-04 02:18:00:000 ± 00:01
11,0.2412,0.24114,0.24126,7.1,7.1,26.1,0.016 - 0.036,(2014 MF18),36083069.0,36074031.0,36091982.0,1900-Jan-04 07:11:00:000 ± 04:38
17,0.22714,0.22713,0.22715,7.33,7.33,21.24,0.15 - 0.336,163026 (2001 XR30),33979660.0,33978164.0,33981156.0,1900-Jan-04 13:29:00:000 ± < 00:01
9,0.34311,0.34252,0.34376,5.55,5.55,21.16,0.156 - 0.348,(2020 WW),51328517.0,51240263.0,51425764.0,1900-Jan-04 19:24:00:000 ± 10:36
43,0.30599,0.30599,0.306,10.46,10.46,20.61,0.201 - 0.449,485823 (2012 DF61),45776140.0,45775453.0,45776949.0,1900-Jan-04 20:07:00:000 ± 00:02
57,0.00963,0.00962,0.00964,8.69,8.65,20.16,0.247 - 0.552,509352 (2007 AG),1440903.0,1439132.0,1442123.0,1900-Jan-04 22:25:00:000 ± 00:02
23,0.41127,0.4112,0.41134,13.26,13.26,24.1,0.04 - 0.09,(2002 AA29),61525214.0,61514645.0,61535588.0,1900-Jan-05 00:35:00:000 ± 00:02
81,0.40052,0.40052,0.40053,15.9,15.9,19.92,0.276 - 0.617,68347 (2001 KB67),59917634.0,59916939.0,59918435.0,1900-Jan-05 02:23:00:000 ± < 00:01
9,0.49266,0.48605,0.50297,11.43,11.43,21.97,0.107 - 0.24,(2022 LA1),73701171.0,72712045.0,75243241.0,1900-Jan-05 09:25:00:000 ± 3_05:41


In [None]:
display(final_sdf)

Object,Close_Approach_Date_formatted,Orbit_Id,Nominal_Approch_distance_au,Nominal_Approch_distance_km,Min_Close_Approach_Distance_au,Min_Close_Approach_Distance_km,Max_Close_Approach_Distance_au,Max_Close_Approach_Distance_km,V_Reletive_Kms,V_Infinite_Kms,Absolute_Magnitude_mag,Diameter_Km
(2020 BN7),1900-Jan-04 02:16:00:000 ± 1_13:09,6,0.08966,13413057.0,0.08826,13203508.0,0.09143,13677733.0,5.26,5.25,23.8,0.046 - 0.103
(2017 MW4),1900-Jan-04 02:18:00:000 ± 00:01,18,0.0613,9170424.0,0.06129,9168854.0,0.06131,9171845.0,17.59,17.59,20.05,0.26 - 0.581
(2014 MF18),1900-Jan-04 07:11:00:000 ± 04:38,11,0.2412,36083069.0,0.24114,36074031.0,0.24126,36091982.0,7.1,7.1,26.1,0.016 - 0.036
163026 (2001 XR30),1900-Jan-04 13:29:00:000 ± < 00:01,17,0.22714,33979660.0,0.22713,33978164.0,0.22715,33981156.0,7.33,7.33,21.24,0.15 - 0.336
(2020 WW),1900-Jan-04 19:24:00:000 ± 10:36,9,0.34311,51328517.0,0.34252,51240263.0,0.34376,51425764.0,5.55,5.55,21.16,0.156 - 0.348
485823 (2012 DF61),1900-Jan-04 20:07:00:000 ± 00:02,43,0.30599,45776140.0,0.30599,45775453.0,0.306,45776949.0,10.46,10.46,20.61,0.201 - 0.449
509352 (2007 AG),1900-Jan-04 22:25:00:000 ± 00:02,57,0.00963,1440903.0,0.00962,1439132.0,0.00964,1442123.0,8.69,8.65,20.16,0.247 - 0.552
(2002 AA29),1900-Jan-05 00:35:00:000 ± 00:02,23,0.41127,61525214.0,0.4112,61514645.0,0.41134,61535588.0,13.26,13.26,24.1,0.04 - 0.09
68347 (2001 KB67),1900-Jan-05 02:23:00:000 ± < 00:01,81,0.40052,59917634.0,0.40052,59916939.0,0.40053,59918435.0,15.9,15.9,19.92,0.276 - 0.617
(2022 LA1),1900-Jan-05 09:25:00:000 ± 3_05:41,9,0.49266,73701171.0,0.48605,72712045.0,0.50297,75243241.0,11.43,11.43,21.97,0.107 - 0.24


In [None]:
final_sdf.write.option('overwrite').csv('./cneosData')

In [None]:
# from databricks import sql
# import os

# with sql.connect(server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME"),
#                  http_path       = os.getenv("DATABRICKS_HTTP_PATH"),
#                  access_token    = os.getenv("DATABRICKS_TOKEN")) as connection:

# with connection.cursor() as cursor:
#     cursor.execute("SELECT * FROM default.diamonds LIMIT 2")
#     result = cursor.fetchall()

#     for row in result:
#         print(row)
