In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
from pyspark.sql.types import StructType

class WITS_DATA_PREP(object):
  def __init__(self):
    self.df = spark.createDataFrame(spark.sparkContext.emptyRDD(),StructType([]))
    
  def prep_data(self, well_name, start_depth, end_depth):
    FT_TO_METER=0.3048
    df = spark.sql("SELECT RecordDateTime, bit_depth, diff_press, hole_depth, rop, rotary_rpm, rotary_torque, true_vertical_depth, weight_on_bit, state FROM 03_corva.corva_drilling_wits_silver where WellName = {}".format(well_name))
    df = df.orderBy('RecordDateTime', ascending=True)
    df = df.filter("bit_depth == hole_depth or state == 'In Slips'")
    df = df.withColumn('bit_depth', FT_TO_METER*df.bit_depth)
    df = df.withColumn('hole_depth', FT_TO_METER*df.hole_depth)
    df = df.withColumn('true_vertical_depth', FT_TO_METER*df.true_vertical_depth)
    df = df.filter('bit_depth >{} and bit_depth <{}'.format(start_depth, end_depth))
    self.df = df
    return df
  def write_to_table(self, delta_path, table_name):
    print("Writing to Delta Lake")
    self.df.write.format("delta").save(delta_path)
    print("Creating Table")
    spark.sql("CREATE TABLE sandbox.{} USING DELTA LOCATION '{}'".format(table_name, delta_path))

In [0]:
data_prep = WITS_DATA_PREP()
df_29 = data_prep.prep_data(well_name = "'BdC-29(h)'", start_depth=3000, end_depth=6000)
#data_prep.write_to_table(delta_path = '/mnt/delta/BdC_29_3000_6000', table_name = 'BdC_29_3000_6000')

In [0]:
display(df_29)

RecordDateTime,bit_depth,diff_press,hole_depth,rop,rotary_rpm,rotary_torque,true_vertical_depth,weight_on_bit,state
2021-05-23T00:34:28.000+0000,3000.0232608,609.3416,3000.0232608,134.9543,71.45271,8.34201,2999.4727920000005,15.05646,Rotary Drilling
2021-05-23T00:34:29.000+0000,3000.0232608,608.9204,3000.0232608,134.9543,71.45271,8.34201,2999.4727920000005,15.05646,Rotary Drilling
2021-05-23T00:34:30.000+0000,3000.0430728,608.9204,3000.0430728,132.9851,69.42567,8.5476,2999.49108,15.28141,Rotary Drilling
2021-05-23T00:34:31.000+0000,3000.0430728,616.0997,3000.0430728,132.9851,69.42567,8.5476,2999.49108,15.28141,Rotary Drilling
2021-05-23T00:34:32.000+0000,3000.0601416000004,616.0997,3000.0601416000004,130.4077,69.02027,8.37285,2999.509368,14.77845,Rotary Drilling
2021-05-23T00:34:33.000+0000,3000.0601416000004,621.9483,3000.0601416000004,130.4077,69.02027,8.37285,2999.509368,14.77845,Rotary Drilling
2021-05-23T00:34:34.000+0000,3000.0726384,621.9483,3000.0726384,126.3451,72.33109,7.55561,2999.5215600000006,14.52055,Rotary Drilling
2021-05-23T00:34:35.000+0000,3000.0726384,624.021,3000.0726384,126.3451,72.33109,7.55561,2999.5215600000006,14.52055,Rotary Drilling
2021-05-23T00:34:36.000+0000,3000.1012896,624.021,3000.1012896,127.9926,72.43244,8.05418,2999.5489920000005,14.56927,Rotary Drilling
2021-05-23T00:34:37.000+0000,3000.1012896,625.8753,3000.1012896,127.9926,72.43244,8.05418,2999.5489920000005,14.56927,Rotary Drilling


In [0]:
print(df_29.count(), len(df_29.columns))

In [0]:
data_prep = WITS_DATA_PREP()
well_name = "'BdC-45(h) (Aislacion)'"
df_45 = data_prep.prep_data(well_name = well_name, start_depth=3000, end_depth=6000)
#data_prep.write_to_table(delta_path = '/mnt/delta/BdC_45_3000_6000', table_name = 'BdC_45_3000_6000')

In [0]:
display(df_45)

RecordDateTime,bit_depth,diff_press,hole_depth,rop,rotary_rpm,rotary_torque,true_vertical_depth,weight_on_bit,state
2020-08-24T13:03:05.000+0000,3347.5452480000004,484.2154,3347.5452480000004,43.26626,59.86487,10.31433,3347.5452480000004,13.43368,
2020-08-24T13:03:10.000+0000,3347.6244960000004,455.6682,3347.6244960000004,45.92374,59.93243,10.31433,3347.6244960000004,12.17676,
2020-08-24T13:03:15.000+0000,3347.670216,425.1949,3347.670216,45.92374,59.93243,9.15925,3347.670216,11.13266,
2020-08-24T13:03:20.000+0000,3347.734224,423.1349,3347.734224,45.92374,59.89864,9.9734,3347.734224,9.30727,
2020-08-24T13:03:25.000+0000,3347.798232,407.8146,3347.798232,45.92374,59.7973,8.75217,3347.798232,10.85267,
2020-08-24T13:03:30.000+0000,3347.840904,413.4442,3347.840904,44.30314,60.16892,9.16943,3347.840904,11.09194,
2020-08-24T13:03:35.000+0000,3347.9232,413.6254,3347.9232,44.30314,60.0,8.64022,3347.9232,11.76521,
2020-08-24T13:03:40.000+0000,3347.9719680000003,423.5367,3347.9719680000003,44.30314,60.23649,9.65283,3347.9719680000003,12.78582,
2020-08-24T13:03:45.000+0000,3348.042072,449.6354,3348.042072,48.56155,60.0,9.98358,3348.042072,11.51374,
2020-08-24T13:03:50.000+0000,3348.099984,456.7662,3348.099984,48.56155,60.03379,11.27605,3348.099984,13.58975,


In [0]:
print(df_45.count(), len(df_45.columns))