## Download Landsat 8 Data


In [6]:
# load libraries
import json
import requests
from dotenv import dotenv_values
import sys
import os
import time
import re
import geopandas as gpd

# print cwd
os.getcwd()

'/Users/chasedawson/dev/uva_equity_center/climate_equity'

In [18]:
## Methods for working with USGS API ##

# API base URL
SERVICE_URL = "https://m2m.cr.usgs.gov/api/api/json/stable/"

def login(username, password):
    """
    Authenticates user given username and password and returns API key.
    
    Parameters
    ----------
    username : str, required
        USGS account username.
        
    password : str, required
        USGS account password. 
        
    Notes 
    -----
    Go to https://ers.cr.usgs.gov/profile/access to request access 
    to the API and/or make an account.
    
    """
    # login information
    payload = {'username': username, 'password': password}

    # get apiKey 
    apiKey = sendRequest(SERVICE_URL + "login", payload)
    if apiKey == None:
        print("Login Failed\n\n")
    else:
        print("Login Successful\n\n")
    
    return apiKey

def logout(apiKey):
    """
    Invalidates API key. 
    
    Parameters
    ----------
    apiKey : str, required
        Valid API key. Obtain using the login() method defined above.
        
    Notes
    -----
    Make sure to call when you've finished working to ensure that your 
    API key can't be used by an unauthorized user.
    
    """
    endpoint = "logout"
    if sendRequest(SERVICE_URL + endpoint, None, apiKey) == None:
        print("Logged Out\n\n")
    else:
        print("Logout Failed\n\n")

def sendRequest(url, data, apiKey = None):
    """
    Sends HTTPS request to specified API endpoint. Main method for interacting
    with the API.
    
    Parameters
    ----------
    url : str, required
        API endpoint you wish you access. Typical format is SERVICE_URL + endpoint, 
        where endpoint might be something like "login" or "data-search." See https://m2m.cr.usgs.gov/api/docs/reference/
        for all available endpoints.
        
    data : dict, required
        Request payload. Data required changes based on API endpoint. See 
        https://m2m.cr.usgs.gov/api/docs/reference/ for input parameters, sample requests,
        sample and responses for available endpoints.
        
    apiKey : str, optional (default is None)
        Valid API key. Must be speficied for most requests. "login" endpoint doesn't 
        require an API key since you use that endpoint to retrieve a valid API key.
    
    """
    json_data = json.dumps(data)
    
    if apiKey == None:
        response = requests.post(url, json_data)
    else:
        headers = {'X-Auth-Token': apiKey}
        response = requests.post(url, json_data, headers = headers)
          
    try:
        httpStatusCode = response.status_code
        
        if response == None:
            print("No output from service!")
            sys.exit()
            
        output = json.loads(response.text)
        if output['errorCode'] != None:
            print(output['errorCode'], "- ", output['errorMessage'])
            sys.exit()
            
        if httpStatusCode == 404:
            print("404 Not Found")
            sys.exit()
            
        elif httpStatusCode == 401:
            print("401 Unauthorized")
            sys.exit()
            
        elif httpStatusCode == 400:
            print("Error Code", httpStatusCode)
            sys.exit()
            
    except Exception as e:
        response.close()
        print(e)
        sys.exit()
    
    response.close()
    return output['data']

def getFilename_fromCd(cd):
    """
    Uses content-disposition to infer filename and filetype.
    
    Parameters
    ----------
    cd : str, required
        The Content-Disposition response header from HTTP request 
        to download a file.
        
    Output
    ------
    Inferred filename and type of provided file : str  
    """
    if not cd:
        return None
    fname = re.findall('filename=(.+)', cd)
    if len(fname) == 0:
        return None
    
    return re.sub('\"', '', fname[0]) # remove extra quotes

def download_file(url):
    """
    Saves file to local system.
    
    Parameters
    ----------
        url: str, required
            Link to file to be downloaded.
            
    Output
    ------
    Path to downloaded file : str
    """
    res = requests.get(url)
    filename = getFilename_fromCd(res.headers.get('content-disposition'))
    open(filename, 'wb').write(res.content)
    return filename
    
def search_scenes(apiKey, bounds, start_date, end_date, dataset = "landsat_ot_c2_l2", cloud_cover_min = 0, cloud_cover_max = 10):
    """
    Search specified dataset for scenes given spatial and temporal filters.
    
    Parameters
    ----------
    apiKey : str, required
        Valid API key.
        
    bounds: dict, required
        Dictionary with two entries: 'lowerLeft' and 'upperRight' which contain
        the lower left and upper right lat, lng coordinates of the bounding box covering
        the area of interest.
        
    start_date: str, required
        Format: YYYY-MM-DD
        
    end_date: str, required
        Format: YYYY-MM-DD
        
    dataset: str, optional (default is 'landsat_ot_c2_l2')
        Dataset alias. Use the 'dataset-search' endpoint to discover
        which datasets are available.
        
    cloud_cover_min : int, optional (default is 0)
        Minimum cloud coverage percentage. Scenes with cloud coverage less
        than this value will not be included in the result.
        
    cloud_cover_max: int, optional (default is 10)
        
    """
    payload = {
        'datasetName': dataset,
        'maxResults': 5,
        'startingNumber': 1,
        'sceneFilter': {
            'spatialFilter': {
                'filterType': 'mbr',
                'lowerLeft': bounds['lowerLeft'],
                'upperRight': bounds['upperRight']
            },
            'acquisitionFilter': {
                'start': start_date,
                'end': end_date
            },
            'cloudCoverFilter': {
                'max': 10,
                'min': 0,
                'includeUnknown': False,
            }
        }
    }
    
    print("Searching Scenes...")
    scenes = sendRequest(SERVICE_URL + "scene-search", payload, apiKey)
    print("Found {num_scenes} Scene(s).".format(num_scenes = scenes['recordsReturned']))
    
    return scenes

def get_sceneIds(scenes):
    """
    Parses scene data to return list of scene ids.
    
    Parameters
    ----------
    scenes : object, required
        Output from search_scenes().
        
    
    Output
    ------
    scene ids : list
    
    """
    sceneIds = []
    for result in scenes['results']:
        sceneIds.append(result['entityId'])
    return sceneIds

def download_scenes(apiKey, scenes, label, dataset = "landsat_ot_c2_l2"):
    """
    Downloads scenes.
    
    Parameters
    ----------
    apiKey : str, required
        Valid API key.
        
    scenes : object, required
        Scenes you wish to download. Returned from search_scenes().
        
    label : str, required
        Label for your download request.
        
    dataset : str, optional (default is 'landsat_ot_c2_l2')
        Must be the dataset the scenes are from. 
        
    Output
    ------
    Paths to downloaded files : list
    """
    
    sceneIds = get_sceneIds(scenes)
    
    # download options
    payload = {
        'datasetName': dataset,
        'entityIds': sceneIds,
    }
    
    downloadOptions = sendRequest(SERVICE_URL + "download-options", payload, apiKey)
    
    # aggregate list of available products
    downloads = []
    for product in downloadOptions:
        # make sure the product is available for this scene
        if product['available'] == True:
            downloads.append({'entityId': product['entityId'],
                             'productId': product['id']})
            
    if downloads:
        requestedDownloadsCount = len(downloads)
        payload = {
            'downloads': downloads,
            'label': label
        }
        requestResults = sendRequest(SERVICE_URL + "download-request", payload, apiKey)
        if requestResults['preparingDownloads'] != None and len(requestResults['preparingDownloads']) > 0:
            payload = {'label': label}
            downloadUrls = sendRequest(SERVICE_URL + "download-retrieve", payload, apiKey)
            downloadIds = []
            for download in downloadUrls['available']:
                downloadIds.append(download['downloadId'])
                
            for download in downloadUrls['requested']:
                downloadIds.append(download['downloadId'])
                
            while len(downloadIds) < requestedDownloadsCount:
                preparingDownloads = requestedDownloadsCount - len(downloadIds)
                print('\n', preparingDownloads, "download(s) are not yet available. Waiting for 30 seconds.\n")
                time.sleep(30)
                print("Trying to retrieve data.\n")
                downloadUrls = sendRequest(SERVICE_URL + "download-retrieve", payload, apiKey)
                for download in downloadUrls['available']:
                    if download['downloadId'] not in downloadIds:
                        downloadIds.append(download['downloadId'])
        else:
            # get all available downloads
            files = []
            for download in requestResults['availableDownloads']:
                url = download['url']
                filename = download_file(url)
                files.append(filename)
                
            print("\nAll files have been downloaded.\n")
            return files
        
    else:
        print("No available products.")
        
        

In [19]:
# log in to retrieve API key
config = dotenv_values('.env')
apiKey = login(config['USGS_USERNAME'], config['USGS_PASSWORD'])

Login Successful




In [20]:
DATASET_NAME = "landsat_ot_c2_l2"

In [21]:
# define bounds
easternShore_bounds = {'upperRight': {'latitude': 38.08422, 'longitude': -75.07674},
                       'lowerLeft': {'latitude': 37.02165, 'longitude': -76.16713}}

cville_bounds = {'upperRight': {'latitude': 38.07493, 'longitude': -78.43942},
                'lowerLeft': {'latitude': 38.00855, 'longitude': -78.54259}} 

In [22]:
# define temporal filter start and end constants
SUMMER_START = "2020-06-20"
SUMMER_END = "2020-09-22"

In [23]:
# get data for Charlottesville
cville_scenes = search_scenes(apiKey, cville_bounds, SUMMER_START, SUMMER_END)
cville_files = download_scenes(apiKey, cville_scenes, 'cville_summer_2020')
print(cville_files)

Searching Scenes...
Found 5 Scene(s).

All files have been downloaded.

['LC08_L2SP_016034_20200922_20201005_02_T1.tar', 'LC08_L2SP_016034_20200906_20200918_02_T1.tar', 'LC08_L2SP_016034_20200720_20210330_02_T1.tar', 'LC08_L2SP_016033_20200922_20201005_02_T1.tar', 'LC08_L2SP_016033_20200906_20200918_02_T1.tar']


In [24]:
easternShore_scenes = search_scenes(apiKey, easternShore_bounds, SUMMER_START, SUMMER_END)
easternShore_files = download_scenes(apiKey, easternShore_scenes, 'easternShore_summer_2020')
print(easternShore_files)

Searching Scenes...
Found 5 Scene(s).

All files have been downloaded.

['LC08_L2SP_015034_20200713_20200912_02_T1.tar', 'LC08_L2SP_015033_20200729_20210330_02_T1.tar', 'LC08_L2SP_014034_20200722_20200911_02_T1.tar', 'LC08_L2SP_013035_20200715_20200912_02_T1.tar', 'LC08_L2SP_013034_20200715_20200912_02_T2.tar']


In [None]:
# logout of API
# if successful, apiKey is now invalid
logout(apiKey)

In [27]:
!mkdir landsat8_c2_l2_data
!cd landsat8_c2_l2_data
!mkdir cville
!mkdir easternShore

In [31]:
import tarfile

def extract_tar(tar, extract_to):
    """
    Extracts tar file to specified location.
    
    Parameters
    ----------
    tar : str, required
        Path to tar file that will be extracted.
        
    extract_to : str, required
        Path to folder in which the tar file will be extracted.
        
    Outputs
    -------
    None
    """
    print("Extracting", tar, "...")
    my_tar = tarfile.open(tar)
    my_tar.extractall(extract_to)
    my_tar.close()
    print("Done.")

In [30]:
# define paths for extracted tar files for both geographic regions
# of interest
cville_tar_path = "./landsat8_c2_l2_data/cville"
easternShore_tar_path = "./landsat8_c2_l2_data/easternShore"

In [32]:
# extract cville files and easternShore files
for tar in cville_files:
    extract_tar(tar, cville_tar_path)
    
for tar in easternShore_files:
    extract_tar(tar, easternShore_tar_path)

Extracting LC08_L2SP_016034_20200922_20201005_02_T1.tar ...
Done.
Extracting LC08_L2SP_016034_20200906_20200918_02_T1.tar ...
Done.
Extracting LC08_L2SP_016034_20200720_20210330_02_T1.tar ...
Done.
Extracting LC08_L2SP_016033_20200922_20201005_02_T1.tar ...
Done.
Extracting LC08_L2SP_016033_20200906_20200918_02_T1.tar ...
Done.
Extracting LC08_L2SP_015034_20200713_20200912_02_T1.tar ...
Done.
Extracting LC08_L2SP_015033_20200729_20210330_02_T1.tar ...
Done.
Extracting LC08_L2SP_014034_20200722_20200911_02_T1.tar ...
Done.
Extracting LC08_L2SP_013035_20200715_20200912_02_T1.tar ...
Done.
Extracting LC08_L2SP_013034_20200715_20200912_02_T2.tar ...
Done.


In [33]:
# verify that cville data has been extracted
! ls landsat8_c2_l2_data/cville

LC08_L2SP_016033_20200906_20200918_02_T1_ANG.txt
LC08_L2SP_016033_20200906_20200918_02_T1_MD5.txt
LC08_L2SP_016033_20200906_20200918_02_T1_MTL.txt
LC08_L2SP_016033_20200906_20200918_02_T1_MTL.xml
LC08_L2SP_016033_20200906_20200918_02_T1_QA_PIXEL.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_QA_RADSAT.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B1.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B2.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B3.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B4.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B5.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B6.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_B7.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_SR_QA_AEROSOL.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_ST_ATRAN.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_ST_B10.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_ST_CDIST.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_ST_DRAD.TIF
LC08_L2SP_016033_20200906_20200918_02_T1_ST_EMIS

In [35]:
# verify that easternShore data has been extracted
! ls landsat8_c2_l2_data/easternShore

LC08_L2SP_013034_20200715_20200912_02_T2_ANG.txt
LC08_L2SP_013034_20200715_20200912_02_T2_MD5.txt
LC08_L2SP_013034_20200715_20200912_02_T2_MTL.txt
LC08_L2SP_013034_20200715_20200912_02_T2_MTL.xml
LC08_L2SP_013034_20200715_20200912_02_T2_QA_PIXEL.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_QA_RADSAT.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B1.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B2.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B3.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B4.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B5.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B6.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_B7.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_SR_QA_AEROSOL.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_ST_ATRAN.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_ST_B10.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_ST_CDIST.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_ST_DRAD.TIF
LC08_L2SP_013034_20200715_20200912_02_T2_ST_EMIS

In [36]:
# delete original tar files
for tar in cville_files:
    os.remove(tar)
    
for tar in easternShore_files:
    os.remove(tar)

In [39]:
def remove_tar_endings(files):
    """
    Removes .tar endings from a list of filenames.
    
    Parameters
    ----------
    files : list, required
        List of tar filenames.
        
    Output
    ------
    list of filenames with .tar endings removed : list
    
    """
    reformatted_names = []
    for file in files:
        reformatted_names.append(file.split('.')[0])
    return reformatted_names

In [41]:
# remove .tar endings
cville_files = remove_tar_endings(cville_files)
easternShore_files = remove_tar_endings(easternShore_files)

## Read in Spatial Data

In [45]:
import pyreadr

In [48]:
def read_RDS(rds_file):
    """
    Reads RDS file and outputs pandas DataFrame.
    
    Parameters
    ----------
    rds_file : str, required
        Path to RDS file that will be read.
    """
    print("Reading", rds_file, "...")
    result = pyreadr.read_r(rds_file)
    return result[None]

In [52]:
# change working directory
os.chdir("../spatial_units/data")
os.getcwd()

'/Users/chasedawson/dev/uva_equity_center/spatial_units/data'

In [53]:
cville_counties = read_RDS("cville_counties.RDS")

Reading cville_counties.RDS ...


LibrdataError: Invalid file, or file has unsupported features

## Clip Raster Data with Shapefile

In [42]:
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import mapping
import rioxarray as rxr
import xarray as xr
import earthpy as et
import earthpy.plot as ep

%matplotlib inline

In [43]:
# filename ending for raster temperature data
ST_ENDING = "_ST_B10.TIF"

In [None]:
# TODO
# 2. clip raster images by shp files of cville city and counties in eastern shore
# 3. compute zonal raster statistics for areas of interest: county level, block level, tract level etc
# 4. save these stats in csvs

In [None]:
# References
# [Downloading Files From Web Using Python](https://www.tutorialspoint.com/downloading-files-from-web-using-python)
# [Jupyter Tips and Tricks](https://chrieke.medium.com/jupyter-tips-and-tricks-994fdddb2057)
# [USGS/EROS Inventory Service Documentation (Machine-to-Machine) API](https://m2m.cr.usgs.gov/api/docs/json/#section-overview)
# [How are files extracted from a tar file using Python?](https://www.tutorialspoint.com/How-are-files-extracted-from-a-tar-file-using-Python)