In [54]:
import numpy as np
import pandas as pd
import requests
import html5lib
from bs4 import BeautifulSoup

url = "http://neo.jpl.nasa.gov/cgi-bin/neo_ca?type=NEO&hmax=24&sort=dist_min&sdir=ASC&tlim=all&dmax=5LD&max_rows=0&action=Display+Table&show=1"
website_html = requests.get(url).text
#fetches website HTML
#OUR DATA - from years 1900 - 2200, objects over 67meters in diameter, closer than 4 Lunar Distances


In [55]:
asteroiddata=[]

soupdata = BeautifulSoup(website_html, "html5lib")
table = soupdata.find("table", { "border" : "1" })
#finds table tag in html, with border equal to 1 (our table we want)
for record in table.findAll('td'):
        #goes through each td row in the table we want
        asteroiddata.append(record.text)
        #appends the text from the line to the asteroiddata list
            
asteroiddata.append('20.3')
#some reason the last value wasn't added, so had to manually add
def magnitudetodiameter(m):
    dic1={"15.0":4500, "15.5":3500,"16.0":3000,"16.5":2000,"17.0":1750,"17.5":1250,"18.0":1085,"18.5":865,"19.0":680,"19.5":540, "20.0":430, "20.5":340, "21.0":275, "21.5":215, "22.0":175, "22.5":137, "23.0":107, "23.5":85, "24.0":67}
    #this dictionary takes key values defined online for apparent magnitude (brightness) of the NEO. Each magnitude is associated with a diameter in meters
    mod = (round(m * 2) / 2)
    #have to round the magnitude to nearest .5 to fit the dictionary's keys
    mod = str(mod)
    return dic1[mod]



In [80]:
asteroiddata2=asteroiddata[8:]
#had to clean the data by removing the header values
dic = {'Object Name':[], 'Closest Distance (LD)':[],'Relative Velocity (km/s)':[],'Diameter (m)':[]}
#initalized dictionary that will later hold all of the asteroid data as a DataFrame
i=0
while i<(len(asteroiddata2)-1):
    dic['Object Name'].append(asteroiddata2[i])
    #have to go through asteroiddata2 list and pick the names by index
    i+=8
    #each row has 8 values so adding 8 will get to the next name
i=3
while i<(len(asteroiddata2)-1):
    distance = asteroiddata2[i]
    distance = float(distance[:4])
    #had to clean data by removing the AU from distance, and convert for analytics later
    dic['Closest Distance (LD)'].append(distance)
    i+=8
i=4
while i<(len(asteroiddata2)-1):
    velocity = float(asteroiddata2[i])
    #had to cast velocity value to float for analytics
    dic['Relative Velocity (km/s)'].append(velocity)
    i+=8
i=7
while i<(len(asteroiddata2)-1):
    if asteroiddata2[i]=="n/a":
        dic['Diameter (m)'].append(magnitudetodiameter(20.0))
        #had to change n/a value in data to an acceptable, averaged value
    else:
        mag = float(asteroiddata2[i])
        #calling method created earlier to convert apparent magnitude to diameter in meters
        dic['Diameter (m)'].append(magnitudetodiameter(mag))
    i+=8


    

In [57]:
index1=[]
x=1
while x<(len(asteroiddata2)):
    datename=asteroiddata2[x]
    datename=datename[:11]
    #had to slice date value and add to new list for the future DataFrame
    index1.append(datename)
    x+=8

In [58]:
from pandas import DataFrame

frame2 = DataFrame(dic, columns = ['Object Name', 'Closest Distance (LD)' , 'Relative Velocity (km/s)', 'Diameter (m)'], index = index1)
#used pandas to create new DataFrame, had to specify columns so they were in correct order. Set index as index1 with date values
frame2

Unnamed: 0,Object Name,Closest Distance (LD),Relative Velocity (km/s),Diameter (m)
2013-Feb-15,367943 Duende,0.09,7.82,67
2029-Apr-13,99942 Apophis,0.10,7.42,540
1973-Jan-17,(2009 BH2),0.11,16.83,137
2129-Oct-19,(2007 UW1),0.16,5.50,137
2153-Apr-15,(2004 HM),0.16,12.32,107
1927-Apr-10,(2002 JE9),0.20,12.83,275
2068-Jan-07,(2010 VB1),0.25,7.84,85
2041-Apr-08,(2012 UE34),0.27,6.13,85
2002-Jun-14,(2002 MN),0.31,10.57,85
1945-Mar-02,(2015 DR),0.32,15.86,67


In [59]:
#DATA ANALYTICS
print("The average Relative Velocity of asteroids in the database is " + str(round(frame2["Relative Velocity (km/s)"].mean(),2)) + " (KM/S)")
print("The average Diameter of an asteroid in the database is " + str(round(frame2["Diameter (m)"].mean(),2)) + " (m)")


The average Relative Velocity of asteroids in the database is 13.19 (KM/S)
The average Diameter of an asteroid in the database is 340.05 (m)


In [91]:
#DATA ANALYTICS
list1 = frame2.index.values
#converts indexes of DataFrame into an array within a 2d array
listindexes=[]
for i in range (len(list1)):
    #have to go through the indexes and check if the date is within 20 years in past
    date1 = list1[i]
    date1 = int(date1[:4])
    if date1 < 2016 and date1 > 1996:
        #picks only indexes with year within 20 of current year
        listindexes.append(i)
        #adds index to list for later use with same 20 year data


In [95]:
values = np.array(frame2.values)
#creates 2D list with each row as a list within 2d list, excluding indexes
averagediameter=[]
averagevelocity=[]
averagediameter1LD=[]
averagevelocity1LD=[]
withinlunar=[]

for x in listindexes:
    #loops through only the indexes that correspond to dates within 20 years of current date
    if values[x,1] <= 1:
        withinlunar.append(values[x][1])
        averagediameter1LD.append(values[x][3])
        averagevelocity1LD.append(values[x][2])
    averagevelocity.append(values[x][2])
    averagediameter.append(values[x][3])

print("In the past 20 years, " + str(len(averagevelocity)) + " asteroids in total have come within 4 Lunar Distances of Earth")
print("The average velocity of total asteroids that came in past 20 years was " + str(round((sum(averagevelocity)/len(averagevelocity)),2)) + " (KM/S)")
print("The average diameter of total asteroids that came in past 20 years was " + str(round((sum(averagediameter)/len(averagediameter)),2))+ " (m)")
print("")
print("In the past 20 years, " + str(len(withinlunar)) + " asteroids came within 1 Lunar Distance of Earth")
print("The average velocity of these asteroids that came in past 20 years was " + str(round((sum(averagevelocity1LD)/len(averagevelocity1LD)),2)) + " (KM/S)")
print("The average diameter of these asteroids that came in past 20 years was " + str(round((sum(averagediameter1LD)/len(averagediameter1LD)),2))+ " (m)")
print("")
print("It seems that asteroids farther away may be bigger on average")

In the past 20 years, 95 asteroids in total have come within 4 Lunar Distances of Earth
The average velocity of total asteroids that came in past 20 years was 13.51 (KM/S)
The average diameter of total asteroids that came in past 20 years was 217.0 (m)

In the past 20 years, 4 asteroids came within 1 Lunar Distance of Earth
The average velocity of these asteroids that came in past 20 years was 13.25 (KM/S)
The average diameter of these asteroids that came in past 20 years was 108.0 (m)

It seems that asteroids farther away may be bigger on average


In [62]:
def getYearIndexes(list1, year):
    listIndexes=[]
    for i in range(len(list1)):
        date = list1[i]
        date = date[:4]
        if date == year:
            listIndexes.append(i)
    return listIndexes

def getMonthIndexes(list1, month):
    listIndexes=[]
    for i in range(len(list1)):
        date = list1[i]
        date = date[5:8]
        if date == month:
            listIndexes.append(i)
    return listIndexes


def getObjectsPerIndex(Indexes, dataFrame, distanceMax):
    data = dataFrame.values
    counter=0
    for i in Indexes:
        if data[i][1] <=distanceMax:
            counter+=1
    return counter
            

In [71]:
#DATA VISUALIZATION Bar Graph Asteroids within 2 lunar distances per year
import matplotlib.pyplot as plt
year='2006'
years=[]
valuesbar=[]
for i in range(10):
    valuesbar.append(getObjectsPerIndex(getYearIndexes(frame2.index.values, year), frame2, 2.0))
    years.append(year)
    year=int(year)
    year+=1
    year=str(year)
    
    
widths = [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
colors = ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b']


fig, ax = plt.subplots()

plt.bar(range(10), valuesbar, width=widths, 
        color=colors, align='center')

plt.xticks(range(10), years )

ax.set_ylabel('Asteroid Count')

plt.title('Asteroids Closer than 2 Lunar Distances past 10 years')

plt.show()


In [74]:
#Data visualization bar graph asteroids within 1 lunar distances per month
months=['Jan','Feb', 'Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
valuesbar=[]
for i in range(12):
    valuesbar.append(getObjectsPerIndex(getMonthIndexes(frame2.index.values, months[i]), frame2, 1.0))
    
    
widths2 = [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7,0.7,0.7]
colors2 = ['r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r','r','r']

fig, ax = plt.subplots()

plt.bar(range(12), valuesbar, width=widths2, 
        color=colors2, align='center')

plt.xticks(range(12), months )

ax.set_ylabel('Asteroid Count')

plt.title('Asteroids Closer than 1 Lunar Distances/Month')

plt.show()


In [75]:
#Data visualization bar graph asteroids within 2 lunar distances per month
#PREDICTION - expect beginning and end of year to be highest count

valuesbar=[]
for i in range(12):
    valuesbar.append(getObjectsPerIndex(getMonthIndexes(frame2.index.values, months[i]), frame2, 2.0))
    
    
widths2 = [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7,0.7,0.7]
colors2 = ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b','b','b']

fig, ax = plt.subplots()

plt.bar(range(12), valuesbar, width=widths2, 
        color=colors2, align='center')

plt.xticks(range(12), months )

ax.set_ylabel('Asteroid Count')

plt.title('Asteroids Closer than 2 Lunar Distances/Month')

plt.show()



In [76]:
#Data visualization bar graph asteroids within 4 lunar distances per month
#PREDICTION - expect beginning and end of year to be highest count
valuesbar=[]
for i in range(12):
    valuesbar.append(getObjectsPerIndex(getMonthIndexes(frame2.index.values, months[i]), frame2, 4.0))
    
    
widths2 = [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7,0.7,0.7]
colors2 = ['g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g','g','g']

fig, ax = plt.subplots()

plt.bar(range(12), valuesbar, width=widths2, 
        color=colors2, align='center')

plt.xticks(range(12), months)

ax.set_ylabel('Asteroid Count')

plt.title('Asteroids Closer than 4 Lunar Distances/Month')

plt.show()


In [102]:
#Shows velocity vs diameter for the past 20 years
plt.scatter(averagevelocity, averagediameter , marker="D", c='b')
plt.title('Speed vs Diameter of Asteroids in the past 20 Years')
plt.xlabel('Velocity (KM/S)')
plt.ylabel('Diameter (m)')
plt.axes().set_ylim([0,4000])
plt.show()


In [110]:
#shows velocity vs diameter for all time
alllist = frame2.values
allvel=[]
alldia=[]
    
for i in range(len(alllist)):
    allvel.append(values[i][2])
    alldia.append(values[i][3])
    
plt.scatter(allvel, alldia , marker="D", c='r')
plt.title('Speed vs Diameter of all Asteroids in DataBase Sample')
plt.xlabel('Velocity (KM/S)')
plt.ylabel('Diameter (m)')
plt.axes().set_ylim([0,4000])
plt.show()
#PREDICTION - based on previous scatterplot, we estimate faster asteroids to be smaller and larger ones to be slower
