In [108]:
!pip install datascience 



In [109]:
import numpy as np
from datascience import *
import pandas as pd

# Davis Sunflower Project: Plant Counts

**Author**: Chris Zhan, as part of the Blackman lab.

**Summary**: This JupyterHub notebook is designed to take in 3 spreadsheets: a field map, a greenhouse plant count and a desired plant count. It returns several tables and data points. Firstly, the notebook produces a total count of all the plants currently in the field, grouped by species/family line. Secondly, it calculates a total sum of all plants currently in the field. Thirdly, it produces a table of how many more plants should be germinated based on the difference between the desired plant count and the total plants in the field / lathhouse.

## Important Tables:

**Davis Field Map**

In [110]:
davis_map = pd.read_csv('datasets/Davis_2021 - Current Map (Dead Plants Removed) (1).csv')
davis_map.head(10)

Unnamed: 0,Column,Border_1,Row_2,Row_3,Row_4,Row_5,Row_6,Row_7,Row_8,Row_9,...,Row_16,Row_17,Row_18,Row_19,Border,Plants,count (7/26/21),Unnamed: 23,Plants.1,count (8/2/21)
0,1.0,,,DB_1973p,,,F4_35,F4_35,KSp,KSp,...,,,,,,DB_1797,5.0,,DB_1797p,3.0
1,2.0,,,DB_1815p,DB_1844p,F4_60,F4_61,F5_117,DB_1815p,DB_1844p,...,,,,,,,,,DB_1800p,7.0
2,3.0,,F5_117,DB_1951p,DB_1841p,F4_33,F4_26,F4_58,DB_1951p,DB_1841p,...,,,,,,DB_1800,6.0,,DB_1806p,8.0
3,4.0,,F4_43,DB_1811p,DB_1880p,F4_65,F5_117,F4_58,DB_1811p,DB_1880p,...,,,,,,,,,DB_1811p,1.0
4,5.0,,F4_36,MBp,DB_1800p,F5_117,F5_116,F4_61,,DB_1800p,...,,,,,,DB_1806,6.0,,DB_1815p,1.0
5,6.0,,F4_01,KSp,DB_1806p,F4_70,F5_118,F4_63,KSp,DB_1806p,...,,,,,,,,,DB_1822p,4.0
6,7.0,,F4_28,DB_1806p,DB_1976p,F4_44,F4_35,F4_58,DB_1806p,DB_1976p,...,,,,,,DB_1811,3.0,,DB_1828p,3.0
7,8.0,,KS,DB_1976p,,F4_35,F4_63,F4_44,,DB_1845p,...,,,,,,,,,DB_1841p,3.0
8,9.0,,F5_136,DB_1828p,HEAN3p,F5_124,F4_21,F4_65,DB_1828p,HEAN3p,...,,,,,,DB_1815,2.0,,DB_1844p,4.0
9,10.0,,F4_03,DB_1841p,,F4_51,F4_69,F4_70,DB_1841p,,...,,,,,,,,,DB_1845p,5.0


**Lathhouse/Greenhouse Plant Counts**

In [111]:
potted_lathhouse = pd.read_csv('datasets/Davis_2021 - potted_seedling_lathhouse.csv')
selected_potted_lathhouse = potted_lathhouse.loc[:,'F4/5 Family ID':'counts(8/3)']
selected_potted_lathhouse.head(5)

Unnamed: 0,F4/5 Family ID,counts (8/3),DB_lines,counts (8/3).1,Species,Pop,counts(8/3)
0,F5_MBxKS_101,,DB_1973,8.0,agrestis,AGR-2740,8.0
1,F5_MBxKS_102,2.0,DB_1976,9.0,agrestis,AGR-2741,5.0
2,F5_MBxKS_103,1.0,DB_1880,6.0,agrestis,AGR-2744,4.0
3,F5_MBxKS_104,2.0,DB_1990,4.0,angustifolius,ANG-2424,
4,F5_MBxKS_105,,DB_1954,6.0,angustifolius,CRP-ANG,5.0


**Desired Germination Counts**

In [112]:
desired_counts = pd.read_csv('datasets/Davis_2021 - germination_batches.csv')
desired_counts = desired_counts.iloc[:, 0:2]
desired_counts

Unnamed: 0,F4/5 Family ID,No of plants required
0,F5_MBxKS_101,5
1,F5_MBxKS_102,5
2,F5_MBxKS_103,5
3,F5_MBxKS_104,5
4,F5_MBxKS_105,5
...,...,...
76,MBxKSF4_61,10
77,MBxKSF4_63,10
78,MBxKSF4_64,10
79,MBxKSF4_70,10


# Preliminary Steps

We need to make the F4/5 Family ID in `selected_potted_lathhouse` and `desired_counts` to be the same as the entered data in `davis_map`. 

The following cell defines a function that removes MBxKS from the Family ID in selected_potted_lathhouse so that the data can be joined with the future davis_plant_table. 

In [113]:
def remove_MBxKS(column_label, dataframe):
    ''' Takes a column label as a string and a dataframe. 
    Replaces all instances of MBxKS with '' and replaces all instances 'F5__' with 'F5_'
    for consistency purposes.
    
    Returns a cleaned dataframe
    '''
    dataframe[column_label] = dataframe[column_label].str.replace('MBxKS', '').str.replace('F5__', 'F5_')
    return dataframe

**Making Dataframes**

Now that `remove_MBxKS` is implemented, we can create some intermediate dataframes that aggregate our data into the proper format. We want to eventually join the count of plants in the lathhouse with the count of plants in the field, so `selected_potted_lathhouse` needs to be reformatted to have all the species lines in one column, and their counts in another column.

In [206]:
clean_potted_lathhouse = remove_MBxKS('F4/5 Family ID', potted_lathhouse)
clean_potted_lathhouse = clean_potted_lathhouse.iloc[:, 0:2]
clean_potted_lathhouse.head(5)

Unnamed: 0,F4/5 Family ID,counts (8/3)
0,F5_101,
1,F5_102,2.0
2,F5_103,1.0
3,F5_104,2.0
4,F5_105,


In [207]:
clean_desired_counts = remove_MBxKS('F4/5 Family ID', desired_counts)
clean_desired_counts.head(5)

Unnamed: 0,F4/5 Family ID,No of plants required
0,F5_101,5
1,F5_102,5
2,F5_103,5
3,F5_104,5
4,F5_105,5


In [116]:
davis_map.shape[0]

99

In [151]:
def append_plant_counts(davis_df):
    ''' This function iterates through the field map and appends the Plant IDs to an array.
    Returns a series of plant IDs
    '''
    result = pd.Series()
    i = 2
    copy = davis_df
    while i < 14:
        result = result.append(copy[~copy.iloc[:, i].isna()].iloc[:, i])
        i += 1
    return result

In [163]:
plant_series = append_plant_counts(davis_map)
plant_series = plant_series.to_frame().groupby(0).size().to_frame()

  result = pd.Series()


In [208]:
merged_lathhouse = clean_potted_lathhouse.merge(plant_series, how = 'left', left_on = 'F4/5 Family ID', right_index = True)
merged_lathhouse = merged_lathhouse.rename(columns = {'counts (8/3)': 'Lathhouse Counts', 0:'Field Count'})[['F4/5 Family ID', 'Field Count', 'Lathhouse Counts']]
merged_desired = merged_lathhouse.merge(clean_desired_counts, how = 'left')

difference_desired_available = merged_desired['No of plants required'] - (merged_desired['Field Count'] + merged_desired['Lathhouse Counts'])

merged_desired['Remaining Plants Required'] = difference_desired_available
merged_desired.sort_values('F4/5 Family ID')


Unnamed: 0,F4/5 Family ID,Field Count,Lathhouse Counts,No of plants required,Remaining Plants Required
37,F4_01,7.0,8.0,10.0,-5.0
38,F4_02,5.0,3.0,10.0,2.0
39,F4_03,7.0,2.0,10.0,1.0
40,F4_05,1.0,3.0,10.0,6.0
41,F4_13,5.0,3.0,10.0,2.0
...,...,...,...,...,...
32,F5_133,1.0,,5.0,
33,F5_134,1.0,1.0,5.0,3.0
34,F5_135,1.0,1.0,5.0,3.0
35,F5_136,4.0,5.0,5.0,-4.0
