# Cloud merge demo: NOAA ISD + GHCN
We'll read two datasets directly from the cloud and perform a simple merge using rounded lat/lon keys.

- ISD metadata (CSV): `https://noaa-isd-pds.s3.amazonaws.com/isd-history.csv`
- GHCN stations (fixed-width): `https://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt`

> For production work, use a spatial join or consistent IDs.


In [None]:
import pandas as pd
from dfmerge import read_table, read_ghcn_stations, merge_frames
isd_url = 'https://noaa-isd-pds.s3.amazonaws.com/isd-history.csv'
ghcn_url = 'https://noaa-ghcn-pds.s3.amazonaws.com/ghcnd-stations.txt'
isd = read_table(isd_url)
ghcn = read_ghcn_stations(ghcn_url)
isd = isd.rename(columns={'USAF':'usaf','WBAN':'wban','STATION NAME':'station_name','CTRY':'country','LAT':'lat','LON':'lon'})
isd = isd[['usaf','wban','station_name','country','lat','lon']].dropna(subset=['lat','lon'])
ghcn = ghcn[['station_id','name','state','lat','lon']].dropna(subset=['lat','lon'])
for df in (isd, ghcn):
    df['lat_round'] = df['lat'].round(2)
    df['lon_round'] = df['lon'].round(2)
merged = merge_frames(isd, ghcn, left_on=['lat_round','lon_round'], right_on=['lat_round','lon_round'], how='inner', indicator=True)
merged.attrs['merge_info'], merged.head()

In [None]:
outer = merge_frames(isd, ghcn, left_on=['lat_round','lon_round'], right_on=['lat_round','lon_round'], how='outer', indicator=True)
outer.attrs['merge_info'], outer['_merge'].value_counts().to_frame('count')