In [20]:

from google.colab import drive
drive.mount('/content/drive')

import os

BASE_DIR = "/content/drive/MyDrive/MSSP6070"
DATA_DIR = os.path.join(BASE_DIR, "data")
NB_DIR   = os.path.join(BASE_DIR, "notebooks")

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(NB_DIR,   exist_ok=True)

print("Base folder:", BASE_DIR)
print("Put parsed.csv in:", DATA_DIR)
print("Notebooks go in:", NB_DIR)




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Base folder: /content/drive/MyDrive/MSSP6070
Put parsed.csv in: /content/drive/MyDrive/MSSP6070/data
Notebooks go in: /content/drive/MyDrive/MSSP6070/notebooks


In [21]:
import pandas as pd
import numpy as np
import re
from IPython.display import display


csv_path = f"{DATA_DIR}/parsed.csv"

df = pd.read_csv(csv_path)
df.columns = [c.strip().lower() for c in df.columns]
cols = set(df.columns)


if 'mag' not in cols and 'magnitude' in cols:
    df['mag'] = pd.to_numeric(df['magnitude'], errors='coerce')
else:
    df['mag'] = pd.to_numeric(df.get('mag'), errors='coerce')

# Column names
magtype_col  = 'magtype' if 'magtype' in cols else ('mag_type' if 'mag_type' in cols else None)
tsunami_col  = 'tsunami' if 'tsunami' in cols else None
place_col    = 'place' if 'place' in cols else None
country_col  = 'country' if 'country' in cols else None
state_col    = 'state' if 'state' in cols else None

print("Rows:", len(df))
print("Columns:", sorted(df.columns))

# --- Helper functions (always return bools) ---
def _has(word, text):
    """True if `word` appears as a whole word in `text` (case-insensitive)."""
    if not isinstance(text, str):
        return False
    return bool(re.search(rf"\b{re.escape(word)}\b", text, re.IGNORECASE))

def is_country(row, name):
    if country_col and isinstance(row.get(country_col), str):
        return row[country_col].strip().lower() == name.lower()
    return _has(name, row.get(place_col))

def is_state(row, name):
    if state_col and isinstance(row.get(state_col), str):
        return row[state_col].strip().lower() == name.lower()
    return _has(name, row.get(place_col))

def is_japan(row):     return is_country(row, 'japan')
def is_indonesia(row): return is_country(row, 'indonesia')
def is_nevada(row):    return is_state(row, 'nevada') or _has('nevada', row.get(place_col))


Rows: 9332
Columns: ['alert', 'cdi', 'code', 'detail', 'dmin', 'felt', 'gap', 'ids', 'mag', 'magtype', 'mmi', 'net', 'nst', 'parsed_place', 'place', 'rms', 'sig', 'sources', 'status', 'time', 'title', 'tsunami', 'type', 'types', 'tz', 'updated', 'url']


In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
#a: 95th percentile magnitude in Japan (magType='mb')
if magtype_col is not None:
    mask_japan_mb = df.apply(is_japan, axis=1) & (df[magtype_col].astype(str).str.lower() == 'mb')
    mag_95_japan_mb = df.loc[mask_japan_mb, 'mag'].quantile(0.95)
else:
    # If the dataset has no magType, compute for all Japan records
    mask_japan_mb = df.apply(is_japan, axis=1)
    mag_95_japan_mb = df.loc[mask_japan_mb, 'mag'].quantile(0.95)

print("2a) 95th percentile magnitude (Japan, magType='mb'):",
      None if pd.isna(mag_95_japan_mb) else round(float(mag_95_japan_mb), 3))


2a) 95th percentile magnitude (Japan, magType='mb'): 4.9


In [23]:
#(b) % of Indonesia quakes that had tsunamis
if tsunami_col is None:
    pct_indo_tsunami = np.nan
    total_indo = np.nan
    tsunami_indo = np.nan
    print("2b) No 'tsunami' column in this dataset.")
else:
    mask_indo = df.apply(is_indonesia, axis=1)
    total_indo = int(mask_indo.sum())
    tsunami_indo = int((df.loc[mask_indo, tsunami_col].fillna(0).astype(float) > 0).sum())
    pct_indo_tsunami = (tsunami_indo / total_indo * 100.0) if total_indo > 0 else np.nan

    print("2b) Indonesia quakes:", total_indo)
    print("    Indonesia quakes w/ tsunami:", tsunami_indo)
    print("    % with tsunami:", None if pd.isna(pct_indo_tsunami) else round(float(pct_indo_tsunami), 2))


2b) Indonesia quakes: 147
    Indonesia quakes w/ tsunami: 34
    % with tsunami: 23.13


In [24]:
#c) Summary statistics for Nevada
mask_nv = df.apply(is_nevada, axis=1)
nv = df.loc[mask_nv]

print("2c) Nevada summary (describe):")
display(nv.describe(include='all'))

num_summary = nv.select_dtypes(include=[np.number]).describe().T
display(num_summary)


2c) Nevada summary (describe):


Unnamed: 0,alert,cdi,code,detail,dmin,felt,gap,ids,mag,magtype,...,status,time,title,tsunami,type,types,tz,updated,url,parsed_place
count,0.0,15.0,677.0,677,677.0,15.0,677.0,677,677.0,677,...,677,677.0,677,677.0,677,677,677.0,677.0,677,677
unique,0.0,,677.0,677,,,,677,,1,...,2,,626,,2,6,,,677,1
top,,,657031.0,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,",nn00657031,",,ml,...,reviewed,,"M -0.3 - 55km ENE of Beatty, Nevada",,earthquake,",geoserve,origin,phase-data,",,,https://earthquake.usgs.gov/earthquakes/eventp...,Nevada
freq,,,1.0,1,,,,1,,677,...,671,,4,,646,651,,,1,677
mean,,2.44,,,0.166982,2.4,154.029527,,0.491728,,...,,1538314000000.0,,0.0,,,-480.0,1538402000000.0,,
std,,0.501142,,,0.1664,4.626013,68.769713,,0.68956,,...,,595407000.0,,0.0,,,0.0,600026700.0,,
min,,2.0,,,0.001,1.0,29.14,,-0.5,,...,,1537247000000.0,,0.0,,,-480.0,1537307000000.0,,
25%,,2.0,,,0.054,1.0,97.67,,-0.1,,...,,1537854000000.0,,0.0,,,-480.0,1537928000000.0,,
50%,,2.2,,,0.113,1.0,149.55,,0.4,,...,,1538280000000.0,,0.0,,,-480.0,1538428000000.0,,
75%,,2.9,,,0.234,1.0,200.47,,0.9,,...,,1538821000000.0,,0.0,,,-480.0,1538878000000.0,,


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cdi,15.0,2.44,0.5011416,2.0,2.0,2.2,2.9,3.3
dmin,677.0,0.1669823,0.1664003,0.001,0.054,0.113,0.234,1.414
felt,15.0,2.4,4.626013,1.0,1.0,1.0,1.0,19.0
gap,677.0,154.0295,68.76971,29.14,97.67,149.55,200.47,355.91
mag,677.0,0.4917282,0.6895604,-0.5,-0.1,0.4,0.9,2.9
mmi,1.0,2.84,,2.84,2.84,2.84,2.84,2.84
nst,677.0,12.60857,9.89062,3.0,6.0,9.0,16.0,61.0
rms,677.0,0.1519092,0.08474153,0.0005,0.1069,0.1463,0.1867,0.8634
sig,677.0,10.68833,19.25273,0.0,0.0,2.0,12.0,129.0
time,677.0,1538314000000.0,595407000.0,1537247000000.0,1537854000000.0,1538280000000.0,1538821000000.0,1539461000000.0


In [25]:
#d: Add ring_of_fire column
ring_terms = [
    r'\bbolivia\b',
    r'\bchile\b',
    r'\becuador\b',
    r'\bperu\b',
    r'\bcosta\s*rica\b',
    r'\bguatemala\b',
    r'(?<!new\s)\bmexico\b',   # Mexico but not "New Mexico"
    r'\bjapan\b',
    r'\bphilippines?\b',
    r'\bindonesia\b',
    r'\bnew\s*zealand\b',
    r'\bantarctic\w*',         # Antarctica / Antarctic
    r'\bcanada\b',
    r'\bfiji\b',
    r'\balaska\b',
    r'\bwashington\b',
    r'\bcalifornia\b',
    r'\brussia\b',
    r'\btaiwan\b',
    r'\btonga\b',
    r'\bkermadec\s+islands?\b'
]
ring_regex = re.compile("|".join(ring_terms), re.IGNORECASE)

def on_ring_of_fire(row):
    pieces = []
    for k in ['country', 'state', 'place']:
        if k in df.columns and isinstance(row.get(k), str):
            pieces.append(row[k])
    return bool(ring_regex.search(" | ".join(pieces)))

df['ring_of_fire'] = df.apply(on_ring_of_fire, axis=1)
print("2d) Added column: ring_of_fire")
df[['place','country' if 'country' in df.columns else 'mag', 'ring_of_fire']].head()


2d) Added column: ring_of_fire


Unnamed: 0,place,mag,ring_of_fire
0,"9km NE of Aguanga, CA",1.35,False
1,"9km NE of Aguanga, CA",1.29,False
2,"8km NE of Aguanga, CA",3.42,False
3,"9km NE of Aguanga, CA",0.44,False
4,"10km NW of Avenal, CA",2.16,False


In [26]:
#(e) Counts on/off Ring of Fire
counts = df['ring_of_fire'].value_counts(dropna=False)
in_ring  = int(counts.get(True, 0))
out_ring = int(counts.get(False, 0))

print("2e) In Ring of Fire:", in_ring)
print("    Outside Ring of Fire:", out_ring)


2e) In Ring of Fire: 4426
    Outside Ring of Fire: 4906


In [27]:
#f:Tsunami count along the Ring of Fire
if tsunami_col is None:
    print("2f) No 'tsunami' column in this dataset.")
else:
    tsunami_on_ring = int((df.loc[df['ring_of_fire'] == True, tsunami_col].fillna(0).astype(float) > 0).sum())
    print("2f) Tsunami count along the Ring of Fire:", tsunami_on_ring)


2f) Tsunami count along the Ring of Fire: 43
