# Thickness
* This Notebook is used to calculate several thickness-related propeties from the WellCad data
Main parts include:
- A cumulative thickness per geochem group function/algorithm
- XXXXXXXXXXXXXXXXXXXXXXX

## Importing Libraries and Data

In [41]:
# Importing libraries
import pandas as pd
import re

# Reading .csv
data = pd.read_csv(r'Data/CSV/1-AV-001-PR.csv', sep=';', decimal=',') ## Dataset1
#data = pd.read_csv(r'Data/CSV/1-CS-002-PR.csv', sep=';', decimal=',') ##Dataset2
data.head()

Unnamed: 0,Well,Unit,Facies,Facies Thickness,Flow thickness,Core Proportion
0,1-AV-001-PR,Paranapanema,Simple lava (basic | U. crust),11.2,31.36,0.589286
1,1-AV-001-PR,Paranapanema,Simple lava (basic | massive interior),18.48,,
2,1-AV-001-PR,Paranapanema,Simple lava (basic | L. crust),1.68,,
3,1-AV-001-PR,Paranapanema,Siliciclastics,1.23,1.23,
4,1-AV-001-PR,Paranapanema,Simple lava (basic | U. crust),29.81,88.48,0.643196


## Cumulative thickness of lava flows per geochem group
-----------------------------------------------------------------------------------------------------------------   
* This code runs throught the WellCad data and calculates the following:
    * Cumulative thickness of lavas per geochem group
    * Cumulative thickness of sedimentary beds per geochem group
    * Cumulative thickness of volcaniclastic beds per geochem group
-----------------------------------------------------------------------------------------------------------------   

In [39]:
## Cumulative thickness function definition v0.1
## PROBLEM #1 - Some siliciclastic units have "flow thickness" value in them while other dont. Needs more standards!!
def cum_thickness (data, cumul_thick, thick_dict):
    'Calculates the cumulative thickness of lava flows per geochemical group'
    'Input: data - pandas df derived from wellcad int; thick_dict - empty dict to append unit:cul_thick pair; cumul_thick - empty list to append data to' 
    'Output: a dictionary contaning (unit:cumulative thickness)' 
    'Output: a list with the same number of elements as input data containing cumulative flow thickness values. Used as a new row for input dataframe'
    
    # defining variables
    thick_counter = 0
    
    # Algorithm:
    for i in range(len(data)):
        if i == 0: # Setting up first iteration
            if data['Facies'].iloc[i] == 'Simple lava (basic | U. crust)' or data['Facies'].iloc[i] == 'Compound lava (basic)' \
            or data['Facies'].iloc[i] == 'Simple lava (basic | rubbly flow top)': # If we're dealing with a new lava flow, grab thickness
                thick_counter += round(data['Flow thickness'].iloc[i])
                thick_dict[data['Unit'].iloc[i]] = round(thick_counter)
                cumul_thick.append(thick_counter)
            else:
                cumul_thick.append('0')
        else: # After first iteration
            if data['Unit'].iloc[i] == data['Unit'].iloc[i-1]: # If unit is the same as before, keep going.
                if data['Facies'].iloc[i] == 'Simple lava (basic | U. crust)' or data['Facies'].iloc[i] == 'Compound lava (basic)' \
                or data['Facies'].iloc[i] == 'Simple lava (basic | rubbly flow top)': # Look for facies with flow thickness data
                    thick_counter += round(data['Flow thickness'].iloc[i])
                    thick_dict[data['Unit'].iloc[i]] = round(thick_counter)
                    cumul_thick.append(thick_counter)
                else: # If no row with flow thickness data is being iterated
                    if re.search(r'\slava\s', data['Facies'].iloc[i]) != None: # If facies contain the word 'lava':
                        cumul_thick.append(thick_counter)
                    else:
                        cumul_thick.append('-')
            else: # If unit changes
                if data['Unit'].iloc[i] in thick_dict:  # If unit has already been added as a key to the thick_dict
                    thick_dict[data['Unit'].iloc[i-1]] = round(thick_counter)
                    thick_counter = round(thick_dict[data['Unit'].iloc[i]])
                    cumul_thick.append(thick_counter)
                else: # If a new unit is found
                    thick_dict[data['Unit'].iloc[i-1]] = round(thick_counter)
                    thick_counter = round(data['Flow thickness'].iloc[i])
                    thick_dict[data['Unit'].iloc[i]] = round(thick_counter)
                    cumul_thick.append(thick_counter)
                               
  

In [42]:
# Testing / Debugging
cs_pr_dict = {}
cs_pr_list = []

cum_thickness(data, cs_pr_list, cs_pr_dict)
print(cs_pr_dict)
print(cs_pr_list)

{'Paranapanema': 654, 'Pitanga': 754, 'Vale do Sol': 117}
[31, 31, 31, '-', 119, 119, 119, '-', 132, 132, 132, '-', 166, 166, 166, 168, '-', 176, 184, 184, 184, 196, 211, 211, 211, '-', 237, 237, 237, '-', 265, 265, 265, '-', 273, 273, 273, '-', 282, 282, 282, 306, 306, 306, '-', 322, 332, 332, 332, 332, 345, 345, 345, '-', 365, 365, 365, '-', 389, 389, 389, 405, '-', 455, 455, 455, '-', 459, 474, 474, 482, '-', 495, 495, 495, 542, 542, 542, '-', 565, 565, 565, 21, 21, 21, 70, 115, 115, 115, 128, 565, 565, 565, '-', 580, 636, 636, 636, 643, 654, 654, 654, '-', 128, '-', 138, 175, 175, 175, '-', 184, 193, 193, 193, 197, 203, 217, 217, 217, '-', 233, 233, 233, '-', 264, 264, 264, 297, 297, 297, '-', 303, 323, 323, 323, 339, 339, 339, 373, '-', 395, 395, 395, 414, 414, 414, 427, 427, 427, '-', 478, 478, 478, 494, 521, 521, 521, '-', 556, 556, 556, 591, 591, 591, '-', 607, 624, 624, 624, 648, 648, 648, '-', 669, 669, 669, 718, 729, 729, 729, '-', 754, 754, 754, 14, 51, 51, 51, 63, 99, 99, 

-------------------------------------------------------------------------
# SEPARATION CELL
-------------------------------------------------------------------------

In [None]:
## Cumulative thickness of lavas per geochem v0.2
# Output:
#   --> thick_dict (dict) containg the thickness of lava flow per geochem group
#   --> A cumulative thickness row is added to the data table.
## PROBLEMS --> Reallyyy verbose. Let's try to generalize this a bit by using regex expressions.


# Setting up variables:
thick_counter = 0
cumul_thick = []
thick_dict = {}

# Algorithm:
for i in range(len(data)):
    if i == 0: # Setting up first iteration
        if data['Facies'].iloc[i] == 'Simple lava (basic | U. crust)' or data['Facies'].iloc[i] == 'Compound lava (basic)' \
        or data['Facies'].iloc[i] == 'Simple lava (basic | rubbly flow top)': # If we're dealing with a new lava flow, grab thickness
            thick_counter += round(data['Flow thickness'].iloc[i])
            thick_dict[data['Unit'].iloc[i]] = round(thick_counter)
            cumul_thick.append(thick_counter)
        else:
            cumul_thick.append('0')
    else: # After first iteration
        if data['Unit'].iloc[i] == data['Unit'].iloc[i-1]: # If unit is the same as before, keep going.
            if data['Facies'].iloc[i] == 'Simple lava (basic | U. crust)' or data['Facies'].iloc[i] == 'Compound lava (basic)' \
            or data['Facies'].iloc[i] == 'Simple lava (basic | rubbly flow top)': # Look for facies with flow thickness data
                thick_counter += round(data['Flow thickness'].iloc[i])
                thick_dict[data['Unit'].iloc[i]] = round(thick_counter)
                cumul_thick.append(thick_counter)
            else: # If no row with flow thickness data is being iterated
                if re.search(r'\slava\s', data['Facies'].iloc[i]) != None: # If facies contain the word 'lava':
                    cumul_thick.append(thick_counter)
                else:
                    cumul_thick.append('-')
        else: # If unit changes
            if data['Unit'].iloc[i] in thick_dict:  # If unit has already been added as a key to the thick_dict
                thick_dict[data['Unit'].iloc[i-1]] = round(thick_counter)
                thick_counter = round(thick_dict[data['Unit'].iloc[i]])
                cumul_thick.append(thick_counter)
            else: # If a new unit is found
                thick_dict[data['Unit'].iloc[i-1]] = round(thick_counter)
                thick_counter = round(data['Flow thickness'].iloc[i])
                thick_dict[data['Unit'].iloc[i]] = round(thick_counter)
                cumul_thick.append(thick_counter)

# Checking
print(thick_dict)
data['Cumul thick'] = cumul_thick
data.head(84)

In [None]:
# Regex debugging 
if re.search(r'\sU.\s', data['Facies'].iloc[0]) != None:
    print('Working!')
else:
    print('Not working :(')

In [None]:
x = re.split(r'\s', data['Facies'].iloc[0])
print(re.search(r'U.', data['Facies'].iloc[0]))