In [2]:
import urllib.request, urllib.parse, urllib.error
import json, datetime, time
from pymongo import MongoClient

# Connection string to MongoDB cluster
uri = "mongodb+srv://vanas_31:SSGY5WwQaAybundd@cluster0-hsnh5.mongodb.net/test?retryWrites=true&w=majority"
client = MongoClient(uri)

# All earthquakes magnitude 2.5 and above from 5 days before to 25 days after a major quake
# IRAN, Dec. 26, 2003 
# Original date of "2003-12-21T00:00:00" did not produce enough earthquake data so changed starttime

earthquakeURL = "http://earthquake.usgs.gov/fdsnws/event/1/query?"
paramD = dict()
paramD["format"] = "geojson"                # the format the data will be in
paramD["starttime"] = "2003-11-20T00:00:00" # the minimum date that might be retrieved
paramD["endtime"] = "2004-01-20T23:59:59"   # the maximum date/time that might be retrieved
paramD["minmag"] = 2.5                      # the smallest earthquake magnitude to return
paramD["limit"] = 500                         # the maximum number of earthquakes to return
                                            # starts with the most recent
    
# Connects to database or creates if not present
db = client['earthquakesdb']

# Determines if data exists in DB by finding time and if so it reassigns "endtime" to last
# downloaded data.  Also, it converts the time from epoch to ISO format
projection = {"properties.time": 1}
for earthquakes in db.earthquakes.find({}, projection).sort("properties.time", 1).limit(1):
    ttime = earthquakes["properties"]["time"]
    ctime = datetime.datetime.fromtimestamp(int(ttime/1000))
    paramD["endtime"] = ctime.isoformat()
    
count = paramD["limit"] # Sets count for the while loop

# While loop runs as long as the limit parameter equals count and also only when 
# starttime is less than endtime to prevent query issues
while count == paramD["limit"] and paramD["starttime"] < paramD["endtime"]:
    params = urllib.parse.urlencode(paramD)
    print('Retrieving', earthquakeURL+params)
    uh = urllib.request.urlopen(earthquakeURL+params)
    data = uh.read().decode()
    print('Retrieved', len(data), 'characters')

    try:
        js = json.loads(data)
    except:
        js = None
    
    # After loading data read the count field of the returned data
    count = (js["metadata"]["count"])
    print('The current count of metadata returned is:', count, '\n')
    
    # Error handling, only continue while loop only if count equals "limit"
    if count != paramD["limit"]:
        break
    
    if not js or 'type' not in js :
        print('==== Failure To Retrieve ====')
        print(data)
    
    # Insert new earthquake data into MongoDB
    db.earthquakes.insert_many(js["features"])
    #print("The new data are {}".format(db.earthquakes.inserted_ids)) #Not needed, used for verification
    
    # Assigns features document to eqk var to reduce wordiness and ease of use
    eqk = js["features"]
    
    # Create list to store batch of time and a for loop to append time to list.
    # Then we convert time from epoch time to ISO format
    etime = []
    i = 0
    for e in eqk:
        etime.append(e["properties"]["time"])
        ts = datetime.datetime.fromtimestamp(int(etime[i]/1000))
        etime[i] = ts.isoformat()
        i += 1
    
    # Assign the earliest time to the endtime var to avoid duplicate data on query
    paramD["endtime"] = str(etime[-1])
    print("Earliest time in batch(End Time):", paramD["endtime"]) #Used for troubleshooting
    
    # Sleep timer to avoid being blocked or timedout of site for too many requests
    time.sleep(2)

Retrieving http://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2003-11-20T00%3A00%3A00&endtime=2003-11-26T11%3A37%3A58&minmag=2.5&limit=500
Retrieved 316628 characters
The current count of metadata returned is: 418 

