In [1]:
import pandas as pd  
import numpy as np
from glob import glob
from scipy.io import savemat
from datetime import datetime

import pdb

import psycopg2
import os

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
def convert_lon(lon):
    '''
    converts from [-180, 180] to [20, 380]
    '''
    if lon < 20:
        lon += 360
    return lon

def format_df(df):
    df = df.dropna(how='any',axis=0, subset=[obs])
    df = df.drop_duplicates(subset=['profile_id'])
    df = df.drop_duplicates(subset=['lat', 'lon']) # need to have lat long be unique when making mask
    df = df.sort_values(by=['lat', 'lon']) # sorting columns needed for binary search
    df = df[~df.date.str.contains('date', na=False)] # remove second header
    #df = df[~df.lon.str.contains('lon', na=False)] # remove second header
    #df = df[~df[obs].str.contains(obs, na=False)] # remove second header
    df.date = pd.to_datetime(df.date.values)
    df.lon = df.lon.astype(float)
    df.lat = df.lat.astype(float)
    df[obs] = df[obs].astype(float)
    df['wmo'] = df['profile_id'].apply(lambda x: float(x.split('_')[0]))
    
    return df

def make_dict_for_mat(df, obs, minYear, maxYear):
    df['year'] = df.date.apply(lambda x: x.year)
    df['month'] = df.date.apply(lambda x: x.month)
    df = df[(df['year'] >= minYear) & (df['year'] <= maxYear)]
    df.date = df.date.apply(lambda x: x.strftime("%d-%b-%Y %H:%M:%S"))
    df['lon'] = df['lon'].apply(lambda lon: convert_lon(lon))
    df = df[df[obs] != -999]
    df = df[[obs, 'profile_id', 'wmo', 'date', 'year', 'month', 'lat', 'lon']]
    df.columns = ['obsProf', 'profIdAggrSel', 'profFloatIDAggrSel', 'profJulDayAggrSel',
                  'profYearAggrSel', 'profMonthAggrSel', 'profLatAggrSel', 'profLongAggrSel']
    a_dict = {col_name : df[col_name].values for col_name in df.columns.values}
    a_dict['profFloatIDAggrSel'] = a_dict['profFloatIDAggrSel'].astype(str)
    return a_dict

def make_file_name(presLevel, obs, minYear, maxYear, prefix='/storage/Data/'):
    fileName = os.path.join(prefix,'prof')
    fileName += str(obs).capitalize()
    fileName += '_at{}dbar'.format(str(presLevel))
    fileName += '_{0}_{1}'.format(minYear, maxYear)
    fileName += '.mat'
    return fileName


In [3]:
#files = glob('/storage/kuusela-stein-intrep-profiles/iTempData_pres_50*.0.csv')
minYear=2007
maxYear=2018
files = glob('/storage/JG-interpolated-profiles/iTempData_*')
print(len(files))
prefix='/storage/Data/'
obs='temp'
for file in files[19:]:
#for file in files[1:2]:
    presLevel = file.split('_')[-1].split('.csv')[0]
    print(presLevel)
    df = pd.read_csv(file, index_col=0)
    df = format_df(df)
    presDict = make_dict_for_mat(df, obs, minYear, maxYear)
    newDir = os.path.join(prefix, presLevel)
    if not os.path.exists(newDir):
        os.mkdir(newDir)
    fileName = make_file_name(presLevel, obs, minYear, maxYear, newDir)
    print(fileName)
    savemat(fileName, presDict)

58
200.0


  interactivity=interactivity, compiler=compiler, result=result)
  mask |= (ar1 == a)


/storage/Data/200.0/profTemp_at200.0dbar_2007_2018.mat


  arr[empties] = ' '


220.0
/storage/Data/220.0/profTemp_at220.0dbar_2007_2018.mat
240.0
/storage/Data/240.0/profTemp_at240.0dbar_2007_2018.mat
260.0
/storage/Data/260.0/profTemp_at260.0dbar_2007_2018.mat
280.0
/storage/Data/280.0/profTemp_at280.0dbar_2007_2018.mat
300.0
/storage/Data/300.0/profTemp_at300.0dbar_2007_2018.mat
320.0
/storage/Data/320.0/profTemp_at320.0dbar_2007_2018.mat
340.0
/storage/Data/340.0/profTemp_at340.0dbar_2007_2018.mat
360.0
/storage/Data/360.0/profTemp_at360.0dbar_2007_2018.mat
380.0
/storage/Data/380.0/profTemp_at380.0dbar_2007_2018.mat
400.0
/storage/Data/400.0/profTemp_at400.0dbar_2007_2018.mat
420.0
/storage/Data/420.0/profTemp_at420.0dbar_2007_2018.mat
440.0
/storage/Data/440.0/profTemp_at440.0dbar_2007_2018.mat
462.5
/storage/Data/462.5/profTemp_at462.5dbar_2007_2018.mat
500.0
/storage/Data/500.0/profTemp_at500.0dbar_2007_2018.mat
550.0
/storage/Data/550.0/profTemp_at550.0dbar_2007_2018.mat
600.0
/storage/Data/600.0/profTemp_at600.0dbar_2007_2018.mat
650.0
/storage/Data/650.

In [37]:
savemat(fileName, presDict)

In [14]:
minYear=2007
maxYear=2018
presLevels = [10.0]
obs='temp'
for presLevel in presLevels:
    df = get_pressure_level_df(conn, presLevel)
    if df.empty:
        print('no presLevel []'.format(presLevel))
        continue
    presDict = make_dict_for_mat(df, obs, minYear, maxYear)
    fileName = make_file_name(presLevel, obs, minYear, maxYear)
    print(fileName)

    savemat(fileName, presDict)

/storage/kuusela-stein-intrep-profiles/profTemp_at10.0dbar_2007_2018.mat


  arr[empties] = ' '


In [10]:
presDict.keys()

dict_keys(['obsProf', 'profFloatIDAggrSel', 'profJulDayAggrSel', 'profYearAggrSel', 'profMonthAggrSel', 'profLatAggrSel', 'profLongAggrSel'])