In [1]:
import pandas as pd
import numpy as np
import random
import math
import scipy as sp
import scipy.stats as stats
import scipy.signal as correlate
from scipy.interpolate import interp1d
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
from matplotlib import cm
from sklearn.linear_model import LinearRegression

In [3]:
Acounts = pd.read_csv('./CLPollenTotal/Data Files/m-counts-raw.csv')
rcounts = pd.read_csv('./CLPollenTotal/Data Files/r-counts-raw.csv')

In [4]:
#replace NaNs with 0s
Acounts = Acounts.fillna(0)
rcounts = rcounts.fillna(0)

#remove columns of notes such that dfs only contain data and have the same indices
a = Acounts.drop('Unnamed: 0', axis=1)
b = a.drop('Unnamed: 1', axis=1)
Acounts = b.drop('Unnamed: 2', axis=1)

##pull out columns##
Acol_vals = Acounts.iloc[0]
rcol_vals = rcounts.iloc[0]

##use genus labels from row 1 for column headers
Acounts = Acounts.rename(columns=Acounts.iloc[0])
Acounts = Acounts.drop(0, axis=0)

rcounts = rcounts.rename(columns=rcounts.iloc[0])
rcounts = rcounts.drop(0, axis=0)

##Standardize column names for depth and pollen sums
Acounts = Acounts.rename(columns={"Roger's Depth(Original Field Depths)": "Depth"})
Acounts_clean = Acounts.rename(columns={"Sum of all pollen": "Total"})

rcounts_clean = rcounts.rename(columns={"Total Pollen and Spores": "Total"})

##make sure type(data) == str, this causes problems graphing later
#Acounts_clean["Depth"] = Acounts_clean["Depth"].apply(float)
#Acounts_clean["Total"] = Acounts_clean["Total"].apply(float)

#rcounts_clean["Depth"] = rcounts_clean["Depth"].apply(float)
#rcounts_clean["Total"] = rcounts_clean["Total"].apply(float)

#Drop depths
rcounts_clean = rcounts_clean.drop("Depth", axis = 1)
Acounts_clean = Acounts_clean.drop("Depth", axis = 1)

In [5]:
Acounts_clean.tail()

Unnamed: 0,Indeterminate Conifer,Abies,Picea,Pinus,Pinus (white),Ephedra,Pseudotsuga,TCT,Juniperus/Cupressus,Tsuga mertensiana,...,Unknown E,Unknown F,Indeterminate,Other unknowns,Zea,Lycopodium,APFAC,Total,0.0,0.0.1
20,12,17,11,122,0,0,2,181,0,0,...,0,0,25,7,121,185,0,452,0.0,0.0
21,27,17,10,154,0,0,6,141,0,6,...,0,0,38,5,169,256,0,470,0.0,0.0
22,17,8,4,104,0,0,1,197,0,6,...,0,0,52,5,121,279,0,475,0.0,0.0
23,13,10,20,156,0,0,2,115,0,1,...,0,0,36,3,150,199,0,450,0.0,0.0
24,239,18,88,182,0,0,6,34,0,15,...,0,0,8,4,0,150,0,616,0.0,0.0


In [6]:
rcounts_clean.tail()

Unnamed: 0,Abies,Picea,Pinus,Pinus (white),Ephedra,Pseudotsuga,TCT,Juniperus/Cupressus,Tsuga mertensiana,Tsuga heterophylla,...,Unknown C,Unknown D,Unknown E,Unknown F,Indeterminate,Other unknowns,Zea,Lycopodium,APFAC,Total
38,20,0,285,0,0,0,7,69,0,4,...,0,0,0,0,12,11,448,294,2085,507
39,16,2,224,0,0,3,11,84,5,4,...,0,0,0,0,7,7,325,255,2085,430
40,16,2,246,0,0,3,10,111,3,3,...,0,0,0,0,10,9,255,343,2085,506
41,5,2,312,0,0,4,0,88,4,2,...,0,0,0,0,15,13,482,389,2085,557
42,21,5,480,0,0,4,16,201,14,5,...,0,0,0,0,16,19,944,749,2085,977


# Return columns where count == 0:

In [7]:
#sum the values for each column in Acounts
Acol_sums = Acounts_clean.sum(axis = 0)

#make a list of the types present, the indices of the df 'Acounts_clean'
Atypes = list(Acol_sums.index)

#print a list of the sums, Acol_sums
Acol_sum_list = list(Acol_sums)
#print(Acol_sum_list)

###combine the list of types and sums into one dict using dictionary comprehension###   
#initialize lists 
test_keys = Atypes
test_values = Acol_sum_list
  
# using dictionary comprehension to convert lists to dictionary 
Ares = {test_keys[i]: test_values[i] for i in range(len(test_keys))} 
  
# Printing resultant dictionary  
#print ("Resultant dictionary is : " +  str(Ares)) 

In [8]:
#repeat the process for rcounts:
rcol_sums = rcounts_clean.sum(axis = 0)
rtypes = list(rcol_sums.index)

#print a list of the sums, Acol_sums
rcol_sum_list = list(rcol_sums)
#print(rcol_sum_list)

###combine the list of types and sums into one dict using dictionary comprehension###   
#initialize lists 
rtest_keys = rtypes
rtest_values = rcol_sum_list
  
# using dictionary comprehension to convert lists to dictionary 
Rres = {rtest_keys[i]: rtest_values[i] for i in range(len(rtest_keys))} 
  
# Printing resultant dictionary  
#print ("Resultant dictionary is : " +  str(Rres)) 

# Absent Types (A counts)

In [9]:
###Filter the 'res' dictionary containing types and their sum for sums == 0, return a new dict with those values###
absent_types = dict()
for (key, value) in Ares.items():
    # Check if key is even then add pair to new dictionary
    if int(value) < 5:
        absent_types[key] = value

print('The following empty columns will be dropped from the total count data:')
print(absent_types)

The following empty columns will be dropped from the total count data:
{'Pinus (white)': 0, 'Ephedra': 0, 'Juniperus/Cupressus': 0, 'Acer negundo': 0, 'Q. chrysolepis/Q. vaccinifolia': 0, 'Aesculus': 0, 'Platanus': 0, 'Fremontia': 0, 'Adenostoma': 0, 'Cercocarpus': 0, 'Ceanothus': 0, 'Cephalanthus': 0, 'Sambucus': 0, 'Shepherdia': 0, 'Elaeagnus': 0, 'Sanguisorba': 0, 'Ribes': 0, 'Purshia': 0, 'Myrica': 0, 'Ambrosia': 0, 'Compositae LS': 0, 'Ericaceae': 0, 'Linaceae': 0, 'Cruciferae': 0, 'Arceuthobium': 0, 'Malvaceae': 0, 'Liliaceae': 0, 'Leguminosae': 0, 'Onagraceae': 0, 'Labiatae': 0, 'Salvia': 0, 'Eriogonum': 0, 'Polygonum amphibium': 0, 'Polygonum californica': 0, 'Vitis': 0, 'Rumex ': 0, 'Plantago': 0, 'Navarettia': 0, 'Rubiaceae': 0, 'Thalictrum': 0, 'Rhus': 0, 'Lonicera': 0, 'Sarcobatus': 0, 'Equisetum': 0, 'Ruppia': 0, 'Brasenia': 0, 'Menyanthes': 0, 'Polypodium': 0, 'Athyrium': 0, 'Polystichum': 0, 'Selaginella': 0, 'Trilete': 0, 'Colonial fungi': 0, 'Unknown B': 0, 'Unknown D'

# Absent Types (R counts)

In [10]:
###Filter the 'res' dictionary containing types and their sum for sums == 0, return a new dict with those values###
absent_Rtypes = dict()
for (rkey, rvalue) in Rres.items():
    # Check if key is even then add pair to new dictionary
    if int(rvalue) < 5:
        absent_Rtypes[rkey] = rvalue

print('The following empty columns will be dropped from the total count data:')
print(absent_Rtypes)

The following empty columns will be dropped from the total count data:
{'Fremontia': 0.0, 'Adenostoma': 0.0, 'Sambucus': 0.0, 'Shepherdia': 0.0, 'Elaeagnus': 0.0, 'Sanguisorba': 0.0, 'Ribes': 0.0, 'Purshia': 0.0, 'Linaceae': 0.0, 'Onagraceae': 0.0, 'Vitis': 0.0, 'Rumex ': 0.0, 'Lonicera': 0.0, 'Equisetum': 0.0, 'Ruppia': 0.0, 'Menyanthes': 0.0, 'Athyrium': 0.0, 'Polystichum': 0.0, 'Unknown A': 0.0, 'Unknown B': 0.0, 'Unknown C': 0.0, 'Unknown D': 0.0, 'Unknown E': 0.0, 'Unknown F': 0.0}


In [11]:
#make list of missing types from Rcounts
R_absent = absent_Rtypes.keys()
R_absent = list(R_absent)

#make list of missing types from Mcounts
A_absent = absent_types.keys()
A_absent = list(A_absent)

In [12]:
#A's missing types
print(A_absent)

['Pinus (white)', 'Ephedra', 'Juniperus/Cupressus', 'Acer negundo', 'Q. chrysolepis/Q. vaccinifolia', 'Aesculus', 'Platanus', 'Fremontia', 'Adenostoma', 'Cercocarpus', 'Ceanothus', 'Cephalanthus', 'Sambucus', 'Shepherdia', 'Elaeagnus', 'Sanguisorba', 'Ribes', 'Purshia', 'Myrica', 'Ambrosia', 'Compositae LS', 'Ericaceae', 'Linaceae', 'Cruciferae', 'Arceuthobium', 'Malvaceae', 'Liliaceae', 'Leguminosae', 'Onagraceae', 'Labiatae', 'Salvia', 'Eriogonum', 'Polygonum amphibium', 'Polygonum californica', 'Vitis', 'Rumex ', 'Plantago', 'Navarettia', 'Rubiaceae', 'Thalictrum', 'Rhus', 'Lonicera', 'Sarcobatus', 'Equisetum', 'Ruppia', 'Brasenia', 'Menyanthes', 'Polypodium', 'Athyrium', 'Polystichum', 'Selaginella', 'Trilete', 'Colonial fungi', 'Unknown B', 'Unknown D', 'Unknown E', 'Unknown F', 'APFAC', 0.0]


In [13]:
#Rogers's missing types
print(R_absent)

['Fremontia', 'Adenostoma', 'Sambucus', 'Shepherdia', 'Elaeagnus', 'Sanguisorba', 'Ribes', 'Purshia', 'Linaceae', 'Onagraceae', 'Vitis', 'Rumex ', 'Lonicera', 'Equisetum', 'Ruppia', 'Menyanthes', 'Athyrium', 'Polystichum', 'Unknown A', 'Unknown B', 'Unknown C', 'Unknown D', 'Unknown E', 'Unknown F']


In [None]:
#write a function that processes the data...