# Data Preparation
Python notebook containing processes of obtaining and processing data.

This is notebook 1 of the repository.

In [8]:
import os 
from osgeo import gdal
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from rasterio.plot import show
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# checkpoint - shivagpandey

In [9]:
url = 'https://gimms.gsfc.nasa.gov/SMOS/SMAP/SMAP_10KM_tiff/NASA_USDA_SMAP_SM20150402_20150404.tif'
import requests
import tifffile as tiff
import io


def read_image_from_url(url):
    resp = requests.get(url)
    # Check that request succeeded
    return tiff.imread(io.BytesIO(resp.content))

read_image_from_url(url)
print("Done")

Done


# checkpoint end

## Importing data

In [13]:
path = r"SM//Telangana" #Path where soil moisture data is present.

In [14]:
global_dataset = [ds for ds in os.listdir(path) if ds[-4:] == '.tif']
print("Imported", len(global_dataset), "items")

Imported 88 items


In [15]:
img = gdal.Open(path+"//"+global_dataset[0])
image = np.array(img.GetRasterBand(1).ReadAsArray())
print(image)

[[-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 ...
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]
 [-9999. -9999. -9999. ... -9999. -9999. -9999.]]


In [16]:
dataset = [ds for ds in os.listdir(path) if ds[-4:] == '.tif']

In [17]:
def getImages(dataset,path):
    Imagedata = []
    for data in dataset:
        img = gdal.Open(path+"//"+data)
        image = np.array(img.GetRasterBand(1).ReadAsArray())
        Imagedata.append(image)
    return Imagedata

In [18]:
imagedata = getImages(dataset,path) #get images from dataset

In [19]:
dataset = [imagedata]
columns = ["Year1"]

## Creating a pixel wise dataframe of soil moisture data from tif images.

In [29]:
#data needs to be in the format 
#Year1  month1  pixel1 pixel2 pixel3
#Year1  month2  pixel1 pixel2 pixel3
count = 0;
from itertools import chain
yearlydata = []
df = pd.DataFrame()

for months in dataset:
    #print(len(months))
    yearlydata = []
    for month in months:
        monthlydatapixel = list(chain.from_iterable(month))
        #print(len(monthlydatapixel))
        yearlydata.append(monthlydatapixel)
    df = df.append(yearlydata)
   

In [30]:
#df = pd.DataFrame(data = dataset, columns = columns)
df = df.loc[:, (df != 0).any(axis=0)] #remove 0 values (white pixels)
df = df.loc[:, (df != -999).any(axis=0)] #remove -999 values (black pixels)
df = df.T.reset_index(drop=True).T #Get the transpose of the dataframe to get it in the form you want
df.to_csv("MaharashtraData.csv",index=True) #Store the data into a csv file.
df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4151,4152,4153,4154,4155,4156,4157,4158,4159,4160
0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
1,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
2,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
3,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
4,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0


## Add geographical coordinates into the dataframe 
We use Raster2xyz library and a reference tif image to assign the coordinates to the data obtained. 

In [41]:
from raster2xyz.raster2xyz import Raster2xyz

input_raster = "C://Users//007sh//Desktop//UNDP//SM//NASA_USDA_SMAP_SM10.ssm.tif"
out_csv = "temp.csv"

rtxyz = Raster2xyz()
rtxyz.translate(input_raster, out_csv)

myRasterDF = pd.read_csv(out_csv)
myRasterDF=myRasterDF[myRasterDF["z"] != 0] 
myRasterDF=myRasterDF[myRasterDF["z"] != -999]
myRasterDF.to_csv("Maharashtracoordinates.csv") #Obtained dataset for further processing

[2022-07-27 09:39:06 - INFO] - Getting geotransform and data...
[2022-07-27 09:39:06 - INFO] - Getting XYZ data...
[2022-07-27 09:39:06 - INFO] - Getting geotransformed coordinates...
[2022-07-27 09:39:06 - INFO] - Building XYZ data...
[2022-07-27 09:39:06 - INFO] - New XYZ (csv file) created...
