# Larger Training Data Sample Size

In [1]:
import geopandas as gp
import os
import numpy as np
from laspy.file import File
import pandas as pd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import pickle

In [3]:
# Lidar satellite data
las_files = [f for f in os.listdir('../training_data/las_files/') if f[0]!='.']

las_files

['MC_01951645.las',
 'MC_01951650.las',
 'MC_01951640.las',
 'MC_02051640.las',
 'MC_02051650.las',
 'MC_02051645.las',
 'MC_02001645.las',
 'MC_02001650.las',
 'MC_02001640.las']

In [4]:
def las_to_geodf(filename, crs=None):
    '''Convert an .las file to a Geopandas Geodataframe object.'''

    print('Trying file', filename, '\n')
    usft_to_meter = 0.30480060960121924
    base_path = '../training_data/las_files/'

    #Read LAS file
    try:
        inFile = File(base_path + filename)
        print('inFile accepted.')
    except:
        print(filename, " failed.")
        return None

    #Import LAS into numpy array
    lidar_points = np.array((inFile.x*usft_to_meter,
                             inFile.y*usft_to_meter,
                             inFile.z*usft_to_meter,
                             inFile.intensity,
                             inFile.classification)).transpose()
    print('information accepted.')

    #Transform to geopandas GeoDataFrame
    lidar_geodf = gp.GeoDataFrame(lidar_points, geometry = gp.points_from_xy(inFile.x, inFile.y), crs=crs)
    lidar_geodf.rename(columns = {0:'x', 1:'y', 2:'z', 3:'intensity', 4:'classification'}, inplace=True)
    print('points accepted.')

    print(filename, " successful.\n")
    inFile.close()

    return lidar_geodf

In [5]:
def merge_geodfs(geodfs):
  result_gdf = geodfs[0]
  for gdf in geodfs[1:]:
    result_gdf = result_gdf.append(gdf).reset_index(drop=True)
  return result_gdf

In [None]:
# Convert all las files to geodataframes
gdfs = [las_to_geodf(file, crs=crs_input), for file in las_files]
gdf

Expect these files to fail:  
MC_02051640.las  
MC_02001645.las  
MC_02001640.las  

In [None]:
# Create a single geodataframe
lidar_geodf = merge_geodfs(lidar_geodfs)
lidar_geodf

Resulting dataframe is:  
64464045 rows × 7 columns