In [1]:
import os, sys, importlib

import geopandas as gpd
import pandas as pd

import shapely.ops
from shapely.geometry import Point, LineString
from shapely import ops

from osgeo import ogr


In [2]:
baseFolder = os.path.join(os.getcwd(), "SampleData","Canals_sample_2")
#This points to the input lines file - points to sample data right now
inLines = os.path.join(baseFolder, 'Canals_sample_2.shp')
#Enter the folder where you want output created
unqID = "UID"
inL = gpd.read_file(inLines)
#inL = inL.to_crs({'init':'epsg:3857'}) #need to project to a CRS with metres 
inL.head()

Unnamed: 0,CHANNEL_NA,CHANNEL_TY,SHAPE_Leng,Head_X,Head_y,Tail_X,Tail_Y,LENGTH,Shape_Le_1,UID,geometry
0,2R/3R Minor,M,2366.669831,72.34525,30.151624,72.320696,30.151392,2366.669831,2366.669831,26005000,"LINESTRING (244300.021 3338563.505, 241933.537..."
1,6CR Disty,D,2057.445263,73.033955,30.636666,73.026671,30.654103,2057.445263,2057.445263,20000000,"LINESTRING (311583.662 3390984.321, 311325.528..."
2,6BR Disty,D,2708.487795,73.022003,30.633134,72.999,30.645735,2708.487795,2708.487795,21000000,"LINESTRING (310431.169 3390612.871, 310376.282..."
3,7CR Disty,D,368.060039,72.581546,30.548341,72.581563,30.551659,368.060039,368.060039,34000000,"LINESTRING (268005.187 3382038.088, 268016.465..."
4,1 L Disty,D,31090.99327,73.615491,30.814278,73.615494,30.814281,31090.99327,31090.99327,4000000,"MULTILINESTRING ((367564.749 3409839.289, 3674..."


In [3]:
def generate_subset_line(allCoords, length):
    #Generate the 
    allPts = []
    for cIdx in range(0, len(allCoords)):
        cPoint = Point(allCoords[cIdx])
        allPts.append(cPoint)
        if cIdx > 0:
            curLine = LineString(allPts)
            if curLine.length > length:
                break
    return(curLine)

In [5]:
start = []
end = []
riverNameColumn = unqID
inL['startPer'] = 0
inL['endPer'] = 0
for idx, row in inL.iterrows():
    x = row['geometry']
    if x.geom_type == 'MultiLineString':
        x = ops.linemerge(x)
    x = ogr.CreateGeometryFromWkt(x.wkt)
    if x.GetGeometryType() == 2:
        totalLength = x.Length()
        x.Segmentize(round(totalLength/100))    
        startLength = totalLength * 0.4
        endLength   = totalLength * 0.2
        allCoords = list(x.GetPoints())

        startLen = generate_subset_line(allCoords, startLength)
        allCoords.reverse()
        endLen   = generate_subset_line(allCoords, endLength)

        start.append([row[riverNameColumn], startLen])
        end.append([row[riverNameColumn], endLen])
        
        inL['startPer'].loc[idx] = startLen.length/totalLength
        inL['endPer'].loc[idx] = endLen.length/totalLength
        
        print("%s - %s - %s - %s" % (row[riverNameColumn], x.Length(), startLen.length, endLen.length))
    else:
        break
startDF = pd.DataFrame(start, columns=["NAME", "GEOM"])
endDF = pd.DataFrame(end,  columns=["NAME", "GEOM"])


26005000 - 2366.669831125254 - 956.2302347980929 - 478.11511739903466
20000000 - 2057.4452628933905 - 842.1621346999596 - 421.4315999874266
21000000 - 2708.4877945779704 - 1092.3617720710258 - 565.6441078773947
34000000 - 368.06003869097054 - 150.56805723844582 - 74.848558805527


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [6]:
from shapely.wkt import loads

startDF_geom =startDF['GEOM']
startDF = startDF.drop(['GEOM'], axis=1)
startGDF = gpd.GeoDataFrame(startDF, geometry=startDF_geom, crs=inL.crs)

endDF_geom =endDF['GEOM']
endDF = endDF.drop(['GEOM'], axis=1)
endGDF = gpd.GeoDataFrame(endDF, geometry=endDF_geom, crs=inL.crs)



In [7]:
'''startDF.to_csv(os.path.join(baseFolder,"canal_starts.csv"))
endDF.to_csv(os.path.join(baseFolder,"canal_ends.csv"))
inL.to_csv(os.path.join(baseFolder,"canals_with_length.csv"))
'''
startGDF.to_file(os.path.join(baseFolder,"canal_starts.shp"))
endGDF.to_file(os.path.join(baseFolder,"canal_ends.shp"))
inL.to_file(os.path.join(baseFolder,"canal_with_length.shp"))

# Buffering and clipping
For each start and end segment, the feature needs to be buffered, but the buffer needs to be limited to not cover anything covered by other canals

In [8]:
bufferDist = 1000 # metres
bufferSet = startDF # endDF

for bufferSet, outName in [[startDF, 'canal_start_buffered_clipped'],[endDF, 'canal_end_buffered_clipped']]:
    bufferSet['GEOM2'] = ''
    outFile = os.path.join(baseFolder,"%s.shp" % outName)
    for idx, row in bufferSet.iterrows():
        curName = row['NAME']
        curShape = row['geometry']        
        bufferShape = curShape.buffer(bufferDist,cap_style=2, join_style=2, mitre_limit=1.0)

        #Select canals that intersect current buffered shape
        intersecting_canals = inL[inL.intersects(bufferShape)]
        if intersecting_canals.shape[0] > 1:
            intersecting_canals = intersecting_canals[intersecting_canals[riverNameColumn] != curName]        
            for selCanal in intersecting_canals['geometry']:
                xx = shapely.ops.split(bufferShape, selCanal)
                maxArea = 0            
                selArea = None
                for x in xx:
                    if x.area > maxArea:
                        maxArea = x.area
                        selArea = x
                bufferShape = selArea
        bufferSet['GEOM2'].iloc[idx] = bufferShape
    bufferSet_geom = bufferSet['GEOM2']
    bufferSetGDF = bufferSet.drop('GEOM2', axis=1)
    try:
        bufferSetGDF = bufferSetGDF.drop('GEOM', axis=1)
    except:
        pass
    bufferSetGDF = gpd.GeoDataFrame(bufferSetGDF, geometry=bufferSet_geom, crs=inL.crs)
    bufferSetGDF.to_file(outFile)
    #bufferSet.to_csv(outFile)