In [None]:
import sys, os, json, importlib, zipfile
import rasterio, geohash
import geopandas as gpd
import pandas as pd
import numpy as np

#Get reference to imagery object
sys.path.append('../src')

from ImageryObjects import imageryExtents

globalBoundaries = r"R:\GLOBAL\ADMIN\Official Bank Borders\Polygons\Admin0\Admin0_Polys.shp"
globalBoundaries = gpd.read_file(globalBoundaries)
globalBoundaries = globalBoundaries.to_crs({'init': 'epsg:4326'})

# Extracting Imagery Metadata
This script is designed to extract metadata from our imagery repositories. How the information is to be processed is yet to be determined, but we need to extract the following metadata

### Metadata extracted from imagery
1. Title
2. Country ISO3
3. Storage location
4. Size zipped
5. Resolution
6. Number of bands

### Metadata extracted from deliverable
1. Vendor
2. Sensor
3. Data of Capture

### Manually entered information
1. WB project number
2. Security classification



## Generate information to process

In [None]:
sourceFolder =    r"S:\COUNTRY\TJK\IMAGERY"# The folder of imagery to process

# These two should not be changed - if you do not have access to the I drive, contact Robert Mansour
outFolder =       r"I:\ddhfiles\internal\imagerysource\Ingest"
processedFolder = r"I:\ddhfiles\internal\imagerysource\Processed" 

In [None]:
# Generating lists of zipFiles and tif files
zipFiles = []
imgFolders = []
for root, dirs, files in os.walk(sourceFolder):
    for f in files:
        if f[-4:] in [".zip", ".rar"]:
            zipFiles.append(os.path.join(root, f))
        if f[-4:] in [".tif", ".TIF"]:
            process=True
            for x in ['spfeas', 'MappyFeatures', 'Spatial_features', 'LandScan_2012']:
                if x in root:
                    process = False
                if process and not root in imgFolders:
                    imgFolders.append(root)

In [None]:
print(len(zipFiles))
print(len(imgFolders))

In [None]:
# Processing new folder of imagery
importlib.reload(imageryExtents)

badData = []
errorData = []
newFolders = []
processedFolders = []

#imgFolders = [r"R:\Imagery\DRC\DRC Tasking (8242019)\010740474010_01\010740474010_01_P001_MUL"]
for inFolder in imgFolders:
    try:
        imgObj = imageryExtents.deliveredImageryFolder(inFolder, outFolder, globalBoundaries, "") 
        #Check if this imgObj has already been processed
        processedFile = os.path.join(processedFolder, os.path.basename(imgObj.jsonFile))
        if not os.path.exists(processedFile) and not os.path.exists(imgObj.jsonFile):
            metaData = imgObj.getMetadata()
            if imgObj.valid_metadata(metaData):
                thumbnail = imgObj.generateThumbnails()
                zipFile = imgObj.zipData()
                imgJSON = imgObj.createJSON(pNumber="NA", securityClassification="Official Use Only")
            else:
                badData.append(metaData)
            newFolders.append(inFolder)            
        else:
            processedFolders.append(inFolder)
    except:
        errorData.append(inFolder)
    print(inFolder)

In [None]:
print(len(imgFolders))
print(len(badData))
print(len(errorData))
print(len(newFolders))
print(len(processedFolders))

# What to do with error data


In [None]:
from shapely.wkt import loads
import json

In [None]:
# For these broken SPOT files, run the following command in arcpy - for some reason
#  The actual .tif files are not spatially referenced
original_location = r'S:\COUNTRY\TJK\IMAGERY\3020003_HEIN_01800_071401_Tajik_Hazard_SO17014201-8-01_DS_SPOT6_201310200533280_FR1_FR1_SE1_SE1_E073N38_01952\PROD_SPOT6_001\VOL_SPOT6_001_A\IMG_SPOT6_MS_001_A\DIM_SPOT6_MS_201310200533280_SEN_2406078101.XML'
xx = arcpy.Raster(original_location)
print(xx.extent.polygon.WKT)
print(xx.bandCount)
print(xx.meanCellHeight)

In [None]:

zip_file_base = "I:\ddhfiles\internal\imagerysource\Ingest"
bbox = "MULTIPOLYGON (((73.083069374010321 38.361483576899239, 73.529317605137848 38.361483576899239, 73.529317605137848 38.682390625431829, 73.083069374010321 38.682390625431829, 73.083069374010321 38.361483576899239)))"
bbox_shp = loads(bbox)
iso3 = ";".join(list(globalBoundaries[globalBoundaries.intersects(bbox_shp)]['ISO3']))
g_hash = geohash.encode(bbox_shp.centroid.y, bbox_shp.centroid.x)
band_count = 4
resolution = 5
vendor = "SPOT"
date = "20131020"
filename = f"{iso3}_{g_hash}_{band_count}_{resolution}_{date}.json"
filename

In [None]:
vals = {
    "title":f"Satellite imagery for {iso3}",
    'iso3':f'{iso3}',
    'location':f'{os.path.join(zip_file_base, filename.replace(".json",".zip"))}',
    'zipped_size':563444000,
    'resolution':f'{resolution}',
    'nBands':f'{band_count}',
    'vendor':f'{vendor}',
    'capture_date':f'{date}',
    'pNumber':'NA',
    "securityClassification": "Official Use Only", 
    "ImageExtent":str(bbox_shp),
    'originalLocation':original_location
}
with open(os.path.join(zip_file_base, filename), 'w') as j:
    json.dump(vals, j)

In [None]:
g_hash