In [1]:
import os, sys, importlib

import geopandas as gpd
import pandas as pd

import shapely.ops
from shapely.geometry import Point, LineString
from osgeo import ogr


In [2]:
#This points to the input lines file - points to sample data right now
inLines = os.path.join(os.getcwd(), "SampleData","Canals_sample","Canals_edit.shp")
#Enter the folder where you want output created
baseFolder = os.getcwd()
unqID = "UID"
inL = gpd.read_file(inLines)
#inL = inL.to_crs({'init':'epsg:3857'}) #need to project to a CRS with metres 
inL.head()

Unnamed: 0,CHANNEL_NA,CHANNEL_TY,Head_X,Head_y,Tail_X,Tail_Y,LENGTH,Shape_Leng,UID,geometry
0,Hakra Right Disty,D,73.064017,29.243012,72.327767,29.198326,76047.642965,76047.642965,7000000,"LINESTRING (311859.252 3236462.930, 311601.392..."
1,3L/Hakra Right Minor,M,72.42337,29.218352,72.400381,29.178819,5058.059486,5058.059486,7003000,"LINESTRING (249521.108 3234927.959, 249429.216..."
2,1R/Hakra Right Minor,M,72.403612,29.220909,72.319315,29.232406,9367.445783,9367.445783,7004000,"LINESTRING (247606.028 3235253.798, 247494.453..."
3,1R/1R/Hakra Right Sub Minor,SM,72.388077,29.231403,72.365456,29.252722,4054.301643,4054.301643,7004010,"LINESTRING (246121.259 3236450.660, 246068.893..."
4,4L/Hakra Right Minor,M,72.364962,29.200598,72.322297,29.089492,13764.598028,13764.598028,7005000,"LINESTRING (243796.922 3233086.065, 243731.254..."


In [3]:
def generate_subset_line(allCoords, length):
    #Generate the 
    allPts = []
    for cIdx in range(0, len(allCoords)):
        cPoint = Point(allCoords[cIdx])
        allPts.append(cPoint)
        if cIdx > 0:
            curLine = LineString(allPts)
            if curLine.length > length:
                break
    return(curLine)

In [4]:
start = []
end = []
riverNameColumn = unqID
inL['startPer'] = 0
inL['endPer'] = 0
for idx, row in inL.iterrows():
    x = row['geometry']
    x = ogr.CreateGeometryFromWkt(x.wkt)
    if x.GetGeometryType() == 2:
        totalLength = x.Length()
        x.Segmentize(round(totalLength/100))    
        startLength = totalLength * 0.4
        endLength   = totalLength * 0.2
        allCoords = list(x.GetPoints())

        startLen = generate_subset_line(allCoords, startLength)
        allCoords.reverse()
        endLen   = generate_subset_line(allCoords, endLength)

        start.append([row[riverNameColumn], startLen])
        end.append([row[riverNameColumn], endLen])
        
        inL['startPer'].loc[idx] = startLen.length/totalLength
        inL['endPer'].loc[idx] = endLen.length/totalLength
        
        print("%s - %s - %s - %s" % (row[riverNameColumn], x.Length(), startLen.length, endLen.length))
    else:
        FUBAR='FOFF'
startDF = pd.DataFrame(start, columns=["NAME", "GEOM"])
endDF = pd.DataFrame(end,  columns=["NAME", "GEOM"])


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


7000000 - 76047.6429654982 - 30737.128032557655 - 15468.08469248922
7003000 - 5058.059486412522 - 2060.2183924886344 - 1013.4100968515376
7004000 - 9367.445782842664 - 3769.3363978247485 - 1935.6047828539624
7004010 - 4054.3016427293423 - 1654.1383030962536 - 821.7251657410249
7005000 - 13764.598028022854 - 5538.767070709772 - 2824.277024757403
7005010 - 2583.1891041002655 - 1035.2865802099236 - 531.4522669046517
7002000 - 12650.000335465495 - 5072.000143702569 - 2650.175915761297


In [5]:
from shapely.wkt import loads

startDF_geom =startDF['GEOM']
startDF = startDF.drop(['GEOM'], axis=1)
startGDF = gpd.GeoDataFrame(startDF, geometry=startDF_geom, crs=inL.crs)

endDF_geom =endDF['GEOM']
endDF = endDF.drop(['GEOM'], axis=1)
endGDF = gpd.GeoDataFrame(endDF, geometry=endDF_geom, crs=inL.crs)



In [6]:
startDF.to_csv(os.path.join(baseFolder, "SampleData","Canals_sample","canal_starts.csv"))
endDF.to_csv(os.path.join(baseFolder, "SampleData","Canals_sample","canal_ends.csv"))
inL.to_csv(os.path.join(baseFolder, "SampleData","Canals_sample","canals_with_length.csv"))

startGDF.to_file(os.path.join(baseFolder, "SampleData","Canals_sample","canal_starts.shp"))
endGDF.to_file(os.path.join(baseFolder, "SampleData","Canals_sample","canal_ends.shp"))
inL.to_file(os.path.join(baseFolder, "SampleData","Canals_sample","canal_with_length.shp"))

# Buffering and clipping
For each start and end segment, the feature needs to be buffered, but the buffer needs to be limited to not cover anything covered by other canals

In [7]:
bufferDist = 1000 # metres
bufferSet = startDF # endDF

for bufferSet, outName in [[startDF, 'canal_start_buffered_clipped'],[endDF, 'canal_end_buffered_clipped']]:
    bufferSet['GEOM2'] = ''
    outFile = os.path.join(baseFolder, "SampleData","Canals_sample","%s.shp" % outName)
    for idx, row in bufferSet.iterrows():
        curName = row['NAME']
        curShape = row['geometry']        
        bufferShape = curShape.buffer(bufferDist,cap_style=2, join_style=2, mitre_limit=1.0)

        #Select canals that intersect current buffered shape
        intersecting_canals = inL[inL.intersects(bufferShape)]
        if intersecting_canals.shape[0] > 1:
            intersecting_canals = intersecting_canals[intersecting_canals[riverNameColumn] != curName]        
            for selCanal in intersecting_canals['geometry']:
                xx = shapely.ops.split(bufferShape, selCanal)
                maxArea = 0            
                selArea = None
                for x in xx:
                    if x.area > maxArea:
                        maxArea = x.area
                        selArea = x
                bufferShape = selArea
        bufferSet['GEOM2'].iloc[idx] = bufferShape
    bufferSet_geom = bufferSet['GEOM2']
    bufferSetGDF = bufferSet.drop('GEOM2', axis=1)
    try:
        bufferSetGDF = bufferSetGDF.drop('GEOM', axis=1)
    except:
        pass
    bufferSetGDF = gpd.GeoDataFrame(bufferSetGDF, geometry=bufferSet_geom, crs=inL.crs)
    bufferSetGDF.to_file(outFile)
    #bufferSet.to_csv(outFile)