This Jupyter Notebook is used to plot application rates (as a scatter plot) of all xCropProtection applications in a user-defined year. Cells 1 and 2 include user parameters. Cell 6 displays the result of the notebook.

Version 2.0 - 11/6/2024 (Symbolize application rates by substance)

Version 1.0 - 5/23/2024

Input and output file paths

In [None]:
xcrop_arrdat_path = r'C:\path\to\arr.dat'

Select year to chart (integer)

In [None]:
year_to_chart = 1996

In [None]:
import h5py
import datetime
import pandas
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np

In [None]:
# Check that all subgroups are h5py datasets
def checkInstance(datasets):
    for dataset in datasets:
        if not isinstance(dataset, h5py.Dataset):
            return False
    return True

try:
    arr_file = h5py.File(xcrop_arrdat_path, 'r')
except FileNotFoundError:
    print("The file", xcrop_arrdat_path, "could not be accessed")

dataset = arr_file['xCropProtection']

# Get data for subgroups
application_dates_subgroup = dataset['ApplicationDates']
application_rates_subgroup = dataset['ApplicationRates']
application_PPP = dataset['AppliedPPP']
# Check that subgroups are h5py datasets
if not checkInstance([application_dates_subgroup, application_rates_subgroup]):
    print("Error retrieving subgroup data.")
    quit

In [None]:
application_dates_data = application_dates_subgroup[:]
application_rates_data = application_rates_subgroup[:]
applied_ppp_data = application_PPP[:]

# Format dates from ordinal to a more familiar format
application_dates = [datetime.date.fromordinal(x) for x in application_dates_data]

In [None]:
df = pandas.DataFrame({'application_date': application_dates, 'application_rate': application_rates_data})

# Add product/substance name to the dataframe
applied_ppp_data_decode = [x.decode() for x in applied_ppp_data]
applied_ppp_df = pandas.DataFrame(applied_ppp_data_decode)
df['appliedPPP'] = applied_ppp_df

substances = np.unique(df['appliedPPP'])

for substance in substances:
    df_substance_only = df.loc[df['appliedPPP'] == substance].sort_values(by=['application_date'])
    filtered_dates = df_substance_only.loc[(df_substance_only['application_date'] >= datetime.date(year_to_chart, 1, 1)) & (df_substance_only['application_date'] < datetime.date(year_to_chart + 1, 1, 1))]
    plt.plot(filtered_dates['application_date'], filtered_dates['application_rate'], label=substance, marker='o', linestyle='')

plt.title("Application rates (" + str(year_to_chart) + ")")
plt.xlabel("Date")
myFmt = mdates.DateFormatter('%b')
plt.gca().xaxis.set_major_formatter(myFmt)
plt.ylabel("Application Rate (g/ha)")
plt.legend(loc='upper left', ncols=1, bbox_to_anchor=(1, 1.05), fancybox=True, shadow=True)
plt.show()