In [2]:
import numpy as np
import pandas as pd
import gpstk
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore', np.RankWarning)
%matplotlib inline

# Get stations baseline < 100 Km

In [3]:
data=pd.read_csv("conus_2003_324_324.csv")
data.columns=["Station", "a","b","c","x","y","z"]

In [4]:
data=data.drop(["a","b","c"],axis=1)
array=data.as_matrix()

In [5]:
def get_pairs(array):
    distances=[]
    for i in range(len(array)):
        for j in range(i+1,len(array)):
            distance=np.sqrt((array[i][1]-array[j][1])**2+(array[i][2]-array[j][2])**2+(array[i][3]-array[j][3])**2)
            if distance<100e3:
                distances.append([array[i][0],array[j][0]])
    return distances

In [6]:
pairs=get_pairs(array)
print "Number of Stations with baseline less than 100 Km ",len(pairs)
pairs[:10]

Number of Stations with baseline less than 100 Km  455


[['1ulm', 'sihs'],
 ['1ulm', 'wnfl'],
 ['abq1', 'nmsf'],
 ['abq1', 'zab1'],
 ['acu1', 'npri'],
 ['adks', 'ang1'],
 ['adks', 'ang2'],
 ['adks', 'lkhu'],
 ['adks', 'netp'],
 ['adks', 'txhu']]

## Functions

In [7]:
def adjust_times(df):
    df=df.sort("TIME")
    times=df.TIME.as_matrix()
    for i in range(times.size):
        if times[i]%30!=0:
            times[i]=times[i]+(30-times[i]%30)
    df["TIME"]=times
    return df

In [8]:
def label_arcs(df):
    time=df.TIME.as_matrix()
    diff=np.diff(time)>3600
    diff=np.hstack((np.array([False]),diff))
    split=np.where(diff==True) #points where there is a "True" value
    
    if len(split[0])>0:
        arcs=np.split(time,split[0])
        arcsID=[]
        n=1
        for i in range(len(arcs)):
            size=len(arcs[i])
            tmp=np.empty(size)
            tmp[:]=n
            arcsID.append(tmp)
            n+=1
        arcsID=np.concatenate(arcsID)
    else:#one arc
        arcsID=np.empty(time.size)
        arcsID[:]=1
        
    df["ARCS"]=arcsID
    return df

In [9]:
def cycle_slips(PhaseDelay,L1,L2,threshold=0.5):
    slips=[]
    slips=np.where(np.abs(np.diff(np.hstack(([0],PhaseDelay))))>threshold) 
    #noL1=np.where(L1==np.nan)[0] #is nan?
    #noL2=np.where(L2==np.nan)[0]
    #print "Slips",slips
    noL1=np.where(np.isnan(L1)==True)[0]#is nan?
    noL2=np.where(np.isnan(L2)==True)[0]
    if len(noL1)>0:
        print "No L1", noL1
    if len(noL2)>0:
        print "No L2", noL2
    
    return slips[0]
    

In [10]:
def subarcs(arc,dfarc,splits):
    time=dfarc.TIME
    if len(splits)>0:
        subarcs=np.split(time,splits)
        subarcsID=[]
        n=arc*10
        for i in range(len(subarcs)):
            size=len(subarcs[i])
            tmp=np.empty(size)
            tmp[:]=n
            subarcsID.append(tmp)
            n+=1
        subarcsID=np.concatenate(subarcsID)
    else:#one arc
        subarcsID=np.empty(time.size)
        subarcsID[:]=arc*10+1
    dfarc["SUBARCS"]=subarcsID
    return dfarc

In [11]:
def del_arcs(df): #delete arcs with less than 10 point
    subarcs=df.SUBARCS.values
    times=[] #Delete this in the other station
    for subarc in np.unique(subarcs):
        if len(np.where(df.SUBARCS==subarc)[0])<10:#.SUBARCS
            #print "Subarc id",subarc," deleted with ",len(np.where(df.SUBARCS==subarc)[0])," datapoints"
            del_points =np.where(df.SUBARCS==subarc)[0] #Save times so we can delete in other station
            times.append(df.TIME[del_points].values)
            df=df[df.SUBARCS!=subarc]
            
    if len(times)>0:
        times=np.concatenate(times)
        times=times[np.isnan(times)==False]
        times=np.unique(times)
    else:
        times=None
    
    return df,times
            

In [None]:
def poly_fit(dfarc):
    #receives a dataframe with a number of subarcs
    #On each subarc 
    #takes N elements from LI=L1-L2 and performs interpolation, 
    #detects datajumps in the diference between the polinomyal fit and real data 
    f1,f2=gpstk.L1_FREQ_GPS,gpstk.L2_FREQ_GPS
    alfa=1.0/((f1**2/f2**2)-1)
    subarcs=np.unique(dfarc.SUBARCS.values)
    for subarc in subarcs:
        dfsubarc=dfarc[dfarc.SUBARCS==subarc]
        N=10 #window 
        tPoly ,Poly=[],[]
        lI=alfa*(dfsubarc.L1.as_matrix()-dfsubarc.L2.as_matrix())
        time=dfsubarc.TIME.as_matrix()

        for i in range(0,lI.size,N): 
            x=np.array(time[i:i+N])
            y=np.array(lI[i:i+N])
            z= np.polyfit(x,y,2)
            p = np.poly1d(z)
            for i in range(x.size):
                Poly.append(p(x[i]))
                tPoly.append(x[i]) 
                
        Poly=np.array(Poly)
        residual=lI-Poly
        #jumps=np.where(np.abs(np.diff(np.hstack(([0],residual))))>0.8)[0]
        
        #if jumps.size>0:
            #pslip=np.argmax(residual[jumps])
            #pslip=jumps[pslip]
        #else:
            #pslip=None
        
        dfarc[dfarc.SUBARCS==subarc]["POLYFIT"]=Poly
    return dfarc

# Preprocessing of ten Pairs of stations


In [None]:
from os import listdir
dir_txt="txtcors/"
files = [f for f in listdir(dir_txt)]

f1=gpstk.L1_FREQ_GPS
f2=gpstk.L2_FREQ_GPS
factor_alfa=f2**2/(f1**2-f2**2)
c=3e8
alfa=1.0/((f1**2/f2**2)-1) 

for stations in pairs[:10]:
    #Load Files
    st1,st2=stations[0],stations[1]
    columns=["PRN","TIME","C1","C2","L1","L2","Tgd","IPP","Elevation","Azimuth"]
    file1=dir_txt+[f for f in files if st1 in f ][0]
    file2=dir_txt+[f for f in files if st2 in f ][0]
    
    df1=pd.read_csv(file1,sep=",")
    df1.columns=columns
    df2=pd.read_csv(file2,sep=",")
    df2.columns=columns
    df1,df2=adjust_times(df1),adjust_times(df2)
    df3=pd.merge(df1,df2,on=["TIME","PRN","Tgd"])
    
    #For each satellite observed by the stations
    for sat in np.unique(df3.PRN.values):
    ##
        df=df3[df3.PRN==sat] #dataframe with times of an specific satellite
        df=df.reset_index(drop=True)
        #Estimate delay measures 
        df["PhaseDelay_x"]=alfa*(df.L1_x-df.L2_x)
        df["PhaseDelay_y"]=alfa*(df.L1_y-df.L2_y)
        df["CodeDelay_x"]=alfa*(df.C2_x-df.C1_x)
        df["CodeDelay_y"]=alfa*(df.C2_y-df.C1_y)
        #Add column with indicators of time separation
        df=label_arcs(df)
       
        for arc in df.ARCS.values:
            #Search for cycle Slips on each arc
            dfarc=df[df.ARCS==arc]
            slips1=cycle_slips(dfarc.PhaseDelay_x.as_matrix(),dfarc.L1_x.as_matrix(),dfarc.L2_x.as_matrix(),2.5)
            slips2=cycle_slips(dfarc.PhaseDelay_y.as_matrix(),dfarc.L1_y.as_matrix(),dfarc.L2_y.as_matrix(),2.5)
            #Dataframes with subarcs on stations
            dfarc1=subarcs(arc,dfarc,slips1)
            dfarc1=dfarc1.drop(["PhaseDelay_y","CodeDelay_y","L1_y","L2_y","C1_y","C2_y","IPP_y","Elevation_y","Azimuth_y"],axis=1)
            dfarc2=subarcs(arc,dfarc,slips2)
            dfarc2=dfarc2.drop(["PhaseDelay_x","CodeDelay_x","L1_x","L2_x","C1_x","C2_x","IPP_x","Elevation_x","Azimuth_x"],axis=1)
            #Remove short-arcs 
            new_dfarc1,times1=del_arcs(dfarc1)
            new_dfarc2,times2=del_arcs(dfarc2)
            if times1!=None:
                for t in times1:
                    new_dfarc2=new_dfarc2[new_dfarc2.TIME!=t]
            if times2!=None:
                for t in times2:
                    new_dfarc1=new_dfarc1[new_dfarc1.TIME!=t]
            
            columns=['PRN','TIME','C1','C2','L1','L2','Tgd', 'IPP', 'Elevation', 'Azimuth', 'PhaseDelay', 'CodeDelay', 'ARCS', 'SUBARCS']
            new_dfarc1.columns=columns
            new_dfarc2.columns=columns
            #Polinomial fit and outlier detection
            new_dfarc1["POLYFIT"]=np.nan
            new_dfarc2["POLYFIT"]=np.nan
            new_dfarc1=poly_fit(new_dfarc1)
            new_dfarc2=poly_fit(new_dfarc2)
            #Outliers removal
            #Both dataframes now should have same number of observations and we can merge them
            
            #print np.unique(dfarc2.SUBARCS.values)
        break

In [None]:
np.unique(new_dfarc1.SUBARCS.values)[1]
new_dfarc1.PhaseDelay_x

In [None]:
dfsubarc=new_dfarc1[new_dfarc1.SUBARCS==17.0]

In [None]:
new_dfarc1[new_dfarc1.SUBARCS==17.0]

In [None]:
print times2
print new_dfarc1.shape
if times2!=None:
    for t in times2:
        new_dfarc1=new_dfarc1[new_dfarc1.TIME!=t]
print new_dfarc1.shape

In [None]:
new_dfarc2.TIME

In [None]:
print times2

In [None]:
#plt.scatter(dfarc1.TIME,dfarc1.SUBARCS)
#print np.unique(dfarc1.SUBARCS.values)
#dfarc1[dfarc1.SUBARCS==11].SUBARCS
#Compare before and after remove short arcs
#plt.ylim(-1000,100000)
print np.unique(dfarc1.SUBARCS.values)
plt.scatter(dfarc1.TIME,dfarc1.PhaseDelay_x,color="r",alpha=.5)
#for subarc in 
#plt.scatter(new_dfarc.TIME,new_dfarc1.PhaseDelay_x,color="b",alpha=.5)
#again merge by time, this removes short arcs y both stations
#print np.unique(new_dfarc1.SUBARCS.values)
#print np.unique(new_dfarc1.SUBARCS.values)

In [None]:
print np.unique(new_dfarc1.SUBARCS.values)
plt.scatter(new_dfarc1.TIME,new_dfarc1.PhaseDelay_x,color="r",alpha=.5)

In [None]:
subarcs=dfarc1.SUBARCS.values
print np.unique(subarcs)
times=[] #Delete this in the other station
for subarc in np.unique(subarcs):
    if len(np.where(dfarc1.SUBARCS==subarc)[0])<10:#.SUBARCS
        #print "Subarc id",subarc," deleted with ",len(np.where(df.SUBARCS==subarc)[0])," datapoints"
        del_points =np.where(dfarc1.SUBARCS==subarc)[0] #Save times so we can delete in other station
        times.append(dfarc1.TIME[del_points].values)
        print "Subarco ",subarc,"Tiempos a  eliminar",dfarc1.TIME[del_points].values
        dfarc1=dfarc1[dfarc1.SUBARCS!=subarc]

if len(times)>0:
    times=np.concatenate(times)
    times=times[np.isnan(times)==False]
    #times=np.unique(times)
else:
    times=None

            

In [None]:
dfsubarc["POLYFIT"]=None

In [None]:
dfsubarc