# Avian data 2022-2023
This notebook performs and document the transformations performed on the data received from Colibri by The Water Institute as part of this project. 

The main goals of this transformations are:
  - Create a unified dataset for 2010-2023 data (This notebook will only process the data from 2022-2023 but using the schema from the previous years)
  - Rename both high resolution photos and screenshots to have them in a unified folder structure
  - Create thumbnails for the high resolution photos to be visualized on the web dashboard
  - Generate datasets for being used in the visualizations

In [2]:
#%pip install tqdm pandas boto3
import getpass

### Parameters

In [3]:
thumbnail_size = (518, 345)
create_thumbnails = True
rename_files=False
replace = False
_base_folder = "avian_monitoring"
role_to_assume = getpass.getpass("Input the role")


Input the role········


### Common imports

In [4]:
from PIL import Image
import re
import pandas as pd

# import geopandas as gpd
import pandas_access
from datetime import datetime
import pyodbc
import numpy as np
from multiprocessing import Pool
from functools import partial
import geopandas as gp
import boto3
import re
from tqdm.notebook import tqdm
from dateutil.parser import parse
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from functools import partial

tqdm.pandas()

boto3.setup_default_session(profile_name="GLO")
bucket_name = "twi-aviandata"
starting_folder = "HighResolutionImages"
new_folder_hr = "avian_monitoring_"
sts_client = boto3.client('sts')
assumed_role_object=sts_client.assume_role(
    RoleArn= role_to_assume,
    RoleSessionName="AssumeRoleSession1",
    DurationSeconds=43000
)
credentials=assumed_role_object['Credentials']

aws_s3 = boto3.resource(
    "s3",
    aws_access_key_id=credentials['AccessKeyId'],
    aws_secret_access_key=credentials['SecretAccessKey'],
    aws_session_token=credentials['SessionToken'],
)
avian_data = aws_s3.Bucket(bucket_name)

### Utilities

In [5]:
def clean_date(text):
    """Return a date as yyyy-MMM-dd (e.g. 2022-May-15)"""
    datetimestr = parse(text)
    text = datetime.strftime(datetimestr, "%Y-%b-%d")
    return text


def update_mime_type(t, mime="image/png"):
    """Update the metadata of s3 objects to the given Mime type"""
    s3_object = avian_data.Object(t)
    s3_object.copy_from(
        CopySource={"Bucket": avian_data.name, "Key": t},
        Metadata=s3_object.metadata,
        MetadataDirective="REPLACE",
        ContentType=mime,
    )


def copy_wnew_mime_type(t, k, mime="image/png"):
    """Copy a s3 object, modifying its mime type"""
    s3_object = avian_data.Object(t)
    avian_data.copy(
        {"Bucket": avian_data.name, "Key": t},
        k,
        Metadata=s3_object.metadata,
        MetadataDirective="REPLACE",
        ContentType=mime,
    )

def rename(key, new_name, replace=False):
    """Copy an object (key) to a new location (new_name) on the avian_data bucket"""
    if not replace and [o for o in avian_data.objects.filter(Prefix=new_name)]:
        return True

    try:
        avian_data.copy({"Bucket": avian_data.name, "Key": key}, new_name)
    except Exception as e:
        print(e)
        check_lowercase = list(
            avian_data.objects.filter(Prefix=key.replace(".JPG", ".jpg"))
        )
        if check_lowercase:
            avian_data.copy(
                {"Bucket": avian_data.name, "Key": check_lowercase[0].key}, new_name
            )
            return True
        check_uppercase = list(
            avian_data.objects.filter(Prefix=key.replace(".jpg", ".JPG"))
        )
        if check_uppercase:
            avian_data.copy(
                {"Bucket": avian_data.name, "Key": check_uppercase[0].key}, new_name
            )
            return True
        print(f"{key} not found")
        return False


def generate_thumbnail(high_res_key, thumb_key, regenerate=False):
    """Generate a low resolution thumbnail at thumb_key of the high_res_key object.
    it uses the global thumbnail_size to define the size.
    """
    current = avian_data.objects.filter(Prefix=thumb_key)
    if not regenerate and [o for o in current]:
        return True
    objs = [o for o in avian_data.objects.filter(Prefix=high_res_key)]
    if not objs:
        print(f"there is no {high_res_key}")
        return False
    image = Image.open(BytesIO(objs[0].get()["Body"].read()))
    buffer = BytesIO()
    image.thumbnail(thumbnail_size)
    image.save(buffer, format="png")
    buffer.seek(0)
    avian_data.put_object(Key=thumb_key, Body=buffer, ContentType="image/png")
    return True

def createListings(tree, path):
    """Recursive method to list all files and folders in a tree,
    with the format required by jquery browse files"""
    dirs = []
    files = []
    listings = {}
    for e in tree:
        if tree[e]:
            dirs.append(e)
            listings.update(createListings(tree[e], path + "/" + e))
        elif "." in e:
            files.append(e)

    listings[path] = {"dirs": dirs, "files": files}
    return listings

def exists_key(key, replace=False):
    try:
        return len(list(avian_data.objects.filter(Prefix=key))) > 0
    except Exception as e:
        print(f"key:{key}, exception {e}")
        return False



# Unified 2010-2021 Data

Data has been unified by Colibri into a single access database, however it contains tables discriminated by years, with some differences in the schema. This process will combine all the datasets into a single one with a common deffinition of the total birds and total nests. 

In [6]:
# Notes: In order to use all the fields, I did a rename of the columns containing '?' or '/'
acc_db = "Colibri2010-2021CWBColonies_2Jan2023.accdb"

schema = pandas_access.read_schema(acc_db)


In [7]:
ct_name = "tblRWCWB_ColonyInventory_10Nov22"
# colonies_table = schema.pop("tblRWCWB_ColonyInventory_13Sept2022")
colonies_table = schema.pop(ct_name)


There are three tables with slighly different schema

In [8]:
schema


{'tblSpeciesCodes': {'SpeciesCode': 'Text (100) NOT NULL'},
 'tblSpeciesData2010': {'AutoID': 'Double',
  'Year': 'Double',
  'Date': 'DateTime',
  'ColonyName': 'Text (510)',
  'Latitude': 'Text (510)',
  'Longitude': 'Text (510)',
  'DottingAreaNumber': 'Text (510)',
  'CameraNumber': 'Text (510)',
  'CardNumber': 'Text (510)',
  'PhotoNumber': 'Text (510)',
  'PQ': 'Text (510)',
  'SpeciesCode': 'Text (510)',
  'WBN': 'Double',
  'ChickNestwithoutAdult': 'Double',
  'AbandNest': 'Double',
  'EmptyNest': 'Double',
  'PBN': 'Double',
  'Site': 'Double',
  'Brood': 'Double',
  'OtherAdultsInColony': 'Double',
  'OtherImmInColony': 'Double',
  'ChicksNestlings': 'Double',
  'RoostingBirds': 'Double',
  'RoostingAdults': 'Double',
  'RoostingImmatures': 'Double',
  'UnknownAge': 'Double',
  'Dotter': 'Text (510)',
  'DateDotted': 'DateTime'},
 'tblSpeciesData2011_2013': {'AutoID': 'Double',
  'Year': 'Double',
  'Date': 'DateTime',
  'ColonyName': 'Text (510)',
  'DottingAreaNumber': 'Te

Taking the 2015 to 2021 table as a reference, lets compare the schemas.

Fields in the reference but not in the `tblSpeciesData2011_2013` table

In [9]:
schema["tblSpeciesData2015_2018_2021"].keys() - schema["tblSpeciesData2011_2013"].keys()


{'ChickNest', 'OtherBirds', 'Subcolony', 'Territory'}

Fields in the `tblSpeciesData2011_2013` table but not in the reference

In [10]:
schema["tblSpeciesData2011_2013"].keys() - schema["tblSpeciesData2015_2018_2021"].keys()

{'ChicksNestlings',
 'OtherAdultsInColony',
 'OtherImmInColony',
 'RoostingAdults',
 'RoostingBirds',
 'RoostingImmatures',
 'UnknownAge'}

Fields in the referece but not in the 2010 table

In [11]:
schema["tblSpeciesData2015_2018_2021"].keys() - schema["tblSpeciesData2010"].keys()


{'BestForBPE', 'ChickNest', 'Notes', 'OtherBirds', 'Subcolony', 'Territory'}

Fields in the referece but not in the 2010 table

In [12]:
schema["tblSpeciesData2010"].keys() - schema["tblSpeciesData2015_2018_2021"].keys()


{'ChicksNestlings',
 'Latitude',
 'Longitude',
 'OtherAdultsInColony',
 'OtherImmInColony',
 'RoostingAdults',
 'RoostingBirds',
 'RoostingImmatures',
 'UnknownAge'}

That also means that we have different formulas to calculate the total. The bestForBPE field is used as filter in 2013 to 2021, but it is not used in the 2010.

### Formulas

|                | 2010                                                                                                                                                               | 2011-2013                                                                                                                                                          | 2015-2021                                                                                                 |
|----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
| Nests          | sum(<br>[WBN]<br>+[ChickNestw/outAdult]<br>+[AbandNest]<br>+[EmptyNest]<br>+[PBN]<br>+[Site]<br>+[Brood])                                                          | Sum(<br>[WBN]<br>+[ChickNestw/outAdult]<br>+[AbandNest]<br>+[EmptyNest]<br>+[PBN]<br>+[Site]<br>+[Brood])                                                          | Sum(<br>[WBN]<br>+[ChickNest]<br>+[ChickNestw/outAdult]<br>+[AbandNest]<br>+[PBN]<br>+[Site]<br>+[Brood]) |
| Birds          | sum([WBN]<br>+[PBN]<br>+[Site]<br>+[OtherAdultsInColony]<br>+[OtherImmInColony]<br>+[RoostingBirds]<br>+[RoostingAdults]<br>+[RoostingImmatures]<br>+[UnknownAge]) | Sum([WBN]<br>+[PBN]<br>+[Site]<br>+[OtherAdultsInColony]<br>+[OtherImmInColony]<br>+[RoostingBirds]<br>+[RoostingAdults]<br>+[RoostingImmatures]<br>+[UnknownAge]) | Sum(<br>[WBN]<br>+[ChickNest]<br>+[PBN]<br>+[Territory]<br>+[Site]<br>+[OtherBirds])                      |
| SumOfEmptyNest | sum(EmptyNest)                                                                                                                                                     | EmptyNest                                                                                                                                                          | EmptyNest                                                                                                 |


But 2015 to 2021 has additional categories: 

```SQL

Sum(WBN) AS SumOfWBN

Sum(ChickNest) AS SumOfChickNest

Sum([ChickNestw/outAdult]) AS [SumOfChickNestw/outAdult] 

Sum(Brood) AS SumOfBrood

Sum(AbandNest) AS SumOfAbandNest

Sum(PBN) AS SumOfPBN

Sum(Territory) AS SumOfTerritory
```

## Generating a common dataset:
- merge 2010 species with the colonies. 
- select only the common columns 

In [13]:
cols_2015_2021 = set(schema["tblSpeciesData2015_2018_2021"].keys())
cols_2011_2013 = set(schema["tblSpeciesData2011_2013"].keys())
cols_2010 = set(schema["tblSpeciesData2010"].keys())
common_fields = cols_2011_2013.intersection(cols_2015_2021)


In [14]:
cols_2010 - common_fields


{'ChicksNestlings',
 'Latitude',
 'Longitude',
 'OtherAdultsInColony',
 'OtherImmInColony',
 'RoostingAdults',
 'RoostingBirds',
 'RoostingImmatures',
 'UnknownAge'}

In [15]:
common_fields - cols_2010


{'BestForBPE', 'Notes'}

# UPDATE Mime types 

# 2022-2023 data

In [16]:
acc_db2023 = "./LACWB_2022-2023.accdb"
schema2023 = pandas_access.read_schema(acc_db2023)

In [17]:
fields2023 = set( schema2023["tblSpeciesData"].keys())

In [18]:
fields2023

{'AbandNest',
 'AutoID',
 'BestForBPE',
 'Brood',
 'CameraNumber',
 'CardNumber',
 'ChickNest',
 'ChickNestwithoutAdult',
 'ColonyName',
 'Date',
 'DateDotted',
 'Dotter',
 'DottingAreaNumber',
 'EmptyNest',
 'Notes',
 'OtherBirds',
 'PBN',
 'PQ',
 'PhotoNumber',
 'Site',
 'SpeciesCode',
 'Subcolony',
 'Territory',
 'WBN',
 'Year'}

In [19]:
common_fields - fields2023

set()

In [20]:
fields2023 - common_fields

{'ChickNest', 'OtherBirds', 'Subcolony', 'Territory'}

In [21]:
common_fields

{'AbandNest',
 'AutoID',
 'BestForBPE',
 'Brood',
 'CameraNumber',
 'CardNumber',
 'ChickNestwithoutAdult',
 'ColonyName',
 'Date',
 'DateDotted',
 'Dotter',
 'DottingAreaNumber',
 'EmptyNest',
 'Notes',
 'PBN',
 'PQ',
 'PhotoNumber',
 'Site',
 'SpeciesCode',
 'WBN',
 'Year'}

In [22]:
pd_species_2023 = pandas_access.read_table(acc_db2023, "tblSpeciesData")
colonies_table2023 =  pandas_access.read_table(acc_db2023, "RWCWB_ColonyInventory_13Septemb")

In [23]:
colonies_table2023

Unnamed: 0,ColonyID,ActiveInventory,ColonyGroupBuffer,ColonyName,State,Longitude,Latitude,GeoRegion,ExtrapArea,TerrestEcoRegion,MarineEcoRegion,FormerNames,OrigDotterID,NOTES August 2022
0,H_3RK,No,3 Rooker Key,3 Rooker Key,FL,-82.839000,28.113800,Tampa Bay,South Florida,Southwestern Florida Flatwoods,Eastern Gulf Neritic,,3 Rooker Key,Anclote Key State Preserve; surveyed 2010 only.
1,ALP,Yes,Alligator Point,Alligator Point,FL,-84.441670,29.915440,Apalachicola East,Coastal Marshes-Barrier Islands,Gulf Barrier Islands and Coastal Marshes,Eastern Gulf Neritic,PP19Colony01,,
2,API,Yes,Anderson Point Island,Anderson Point Island,LA,-89.327300,29.892000,Biloxi North,Coastal Marshes,Deltaic Coastal Marshes and Barrier Islands,Mississippi Estuarine Area,Biloxi North 39,KKN9,
3,ABI,Yes,Apalachicola Bird Island,Apalachicola Bird Island,FL,-84.975398,29.714977,Apalachicola East,Coastal Marshes-Barrier Islands,Gulf Barrier Islands and Coastal Marshes,Eastern Gulf Neritic,,,ColonyGroupBuffer centroid location coordinates.
4,ABI_A,Yes,Apalachicola Bird Island,Apalachicola Bird Island A,FL,-84.975800,29.715770,Apalachicola East,Coastal Marshes-Barrier Islands,Gulf Barrier Islands and Coastal Marshes,Eastern Gulf Neritic,,,Colony coordinates provided by Florida Shorebi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587,WHPI,Yes,White Pelican Island,White Pelican Island,TX,-97.363590,27.390970,Upper Laguna Madre,Lower Texas Coast,Laguna Madre Barrier Islands and Coastal Marshes,Texas Estuarine Area,614-345 White Pelican Island,,Island name: Chaney #38 White Pelican; Colony...
588,WLI,Yes,Willet Island,Willet Island,LA,-89.290300,29.407000,Birdsfoot East,Coastal Marshes,Deltaic Coastal Marshes and Barrier Islands,Mississippi Estuarine Area,BirdsfootEast_2,MWP4,Land loss/submerged by 2012.
589,WII,Yes,Wine Island,Wine Island,LA,-90.610800,29.094800,Terrebonne Bay,Coastal Marshes-Barrier Islands,Deltaic Coastal Marshes and Barrier Islands,Mississippi Estuarine Area,,Wine Island,
590,H_WLSA,No,Wolf Lake Skimmer Area,Wolf Lake Skimmer Area,TX,-95.240900,29.033700,Galveston,Mid-Upper Texas Coast,Mid-Coast TX Barrier Islands and Coastal Marshes,Texas Estuarine Area,TexasCoast_14,WAW8,TCWB database - active 2014.


In [24]:
pd_species_2023 = pd.merge(pd_species_2023, colonies_table2023, on="ColonyName", how="left")


In [25]:
pd_species_2023["HighResImage_new"] = (
    f"{_base_folder}/high_resolution_photos/"
    + pd_species_2023["Year"].astype(str)
    + "/"
    + pd_species_2023["GeoRegion"]
    + "/"
    + pd_species_2023["ColonyName"]
    + "/"
    + pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%d%B%y")
    + "Camera"
    + pd_species_2023["CameraNumber"]
    + "-"
    + "Card"
    + (pd_species_2023["CardNumber"].where(~pd_species_2023["CardNumber"].isnull(), other="1"))
    + "-"
    + pd_species_2023["PhotoNumber"]
    + ".jpg"
)
pd_species_2023["screenshot_new"] = (
    f"{_base_folder}/screenshots/"
    + pd_species_2023["Year"].astype(str)
    + "/"
    + pd_species_2023["GeoRegion"]
    + "/"
    + pd_species_2023["ColonyName"]
    + "/"
    + pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%d%B%y")
    + "Camera"
    + pd_species_2023["CameraNumber"]
    + "-"
    + "Card"
    + (pd_species_2023["CardNumber"].where(~pd_species_2023["CardNumber"].isnull(), other="1"))
    + "-"
    + pd_species_2023["PhotoNumber"]
    + ".jpg"
)
pd_species_2023["thumbnail_new"] = (
    f"{_base_folder}/thumbnails/"
    + pd_species_2023["Year"].astype(str)
    + "/"
    + pd_species_2023["GeoRegion"]
    + "/"
    + pd_species_2023["ColonyName"]
    + "/"
    + pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%d%B%y")
    + "Camera"
    + pd_species_2023["CameraNumber"]
    + "-"
    + "Card"
    + (pd_species_2023["CardNumber"].where(~pd_species_2023["CardNumber"].isnull(), other="1"))
    + "-"
    + pd_species_2023["PhotoNumber"]
    + ".png"
)


# There are some rows without photo information
They are from the locations using the new method

In [26]:

v  = pd_species_2023[pd_species_2023["HighResImage_new"].isna()][["Year","ColonyName"]].value_counts()

In [27]:
print(v.sort_index().to_string())

Year  ColonyName              
2022  Chandeleur Islands North    16
      Chandeleur Islands South    14
      New Harbor Island 1          9
      New Harbor Island 3         13
      Rabbit Island               52
2023  Queen Bess Island           15


In [28]:
 pd_species_2023["total_nests"] = (pd_species_2023["WBN"]
    + pd_species_2023["ChickNestwithoutAdult"]
    + pd_species_2023["AbandNest"]
    + pd_species_2023["ChickNest"]
    + pd_species_2023["PBN"]
    + pd_species_2023["Site"]
    + pd_species_2023["Brood"])
pd_species_2023["total_birds"] = (pd_species_2023["WBN"]
    + pd_species_2023["ChickNest"]
    + pd_species_2023["PBN"]
    + pd_species_2023["Territory"]
    + pd_species_2023["Site"]
    + pd_species_2023["OtherBirds"])

In [29]:
pd_species_2023["date2"] = pd_species_2023["Date"].astype("str").apply(clean_date)
pd_species_2023["month"] = pd_species_2023["date2"].apply(lambda x: x.split("-")[1])
order_by_field = "total_nests" 

In [30]:
pd_species_2023["SpeciesCode"] = pd_species_2023["SpeciesCode"].str.upper()


In [31]:
pd_species_2023[pd_species_2023["BestForBPE"]=='Y']

Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,FormerNames,OrigDotterID,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month
0,698,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,12,22,2023-Jun-24,Jun
1,699,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,32,43,2023-Jun-24,Jun
2,700,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,0,4,2023-Jun-24,Jun
3,702,2023,06/24/23 00:00:00,Breton Island,,89,2,1,9529,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,21,34,2023-Jun-24,Jun
4,703,2023,06/24/23 00:00:00,Breton Island,,90,1,2,5407,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,22,38,2023-Jun-24,Jun
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,336,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,,,ColonyGroupBuffer centroid location coordinates.,,,,4328,4413,2022-Jun-21,Jun
756,664,2023,06/24/23 00:00:00,Breton Island,,74,2,1,9465,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,15,26,2023-Jun-24,Jun
757,705,2023,06/24/23 00:00:00,Breton Island,,91,1,2,5409,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,13,23,2023-Jun-24,Jun
758,713,2023,06/24/23 00:00:00,Breton Island,,94,1,2,5342,E,...,"MWP12, KKN17","MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,276,310,2023-Jun-24,Jun


In [32]:
pd_species_2023["uid"] = (
    np.where(
        pd_species_2023["HighResImage_new"].isna(),
        pd_species_2023["date2"].astype(str)
        +"+"
        +pd_species_2023["ColonyName"],
        pd_species_2023["HighResImage_new"]
    ) 
    + "#"
    + np.where(pd_species_2023["SpeciesCode"].isna(),
               "N/A",
               pd_species_2023["SpeciesCode"])
)

In [33]:
pd_species_2023.loc[pd_species_2023.SpeciesCode.str.startswith("REEG "),"SpeciesCode"]="REEG"

In [34]:
pd_species_2023

Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,OrigDotterID,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month,uid
0,698,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,12,22,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
1,699,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,32,43,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
2,700,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,0,4,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
3,702,2023,06/24/23 00:00:00,Breton Island,,89,2,1,9529,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,21,34,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
4,703,2023,06/24/23 00:00:00,Breton Island,,90,1,2,5407,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,22,38,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,336,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,,ColonyGroupBuffer centroid location coordinates.,,,,4328,4413,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#SATE
756,664,2023,06/24/23 00:00:00,Breton Island,,74,2,1,9465,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,15,26,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
757,705,2023,06/24/23 00:00:00,Breton Island,,91,1,2,5409,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,13,23,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
758,713,2023,06/24/23 00:00:00,Breton Island,,94,1,2,5342,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,276,310,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...


In [35]:
pd_species_2023.to_excel("avianmonitoring_2023.xlsx", index=False)


In [36]:
agg_pd_species = (
    pd_species_2023.loc[
        pd_species_2023.BestForBPE == "Y",
        [
            "Year",
            "Date",
            "State",
            "GeoRegion",
            "ColonyName",
            "SpeciesCode",
            "Longitude",
            "Latitude",
            "total_nests",
            "total_birds",
        ],
    ]
    .groupby(
        [
            "Year",
            "Date",
            "State",
            "GeoRegion",
            "ColonyName",
            "Longitude",
            "Latitude",
            "SpeciesCode"
        ]
    )
    .sum()
)

In [37]:
agg_pd_species

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,total_nests,total_birds
Year,Date,State,GeoRegion,ColonyName,Longitude,Latitude,SpeciesCode,Unnamed: 8_level_1,Unnamed: 9_level_1
2022,05/16/22 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,AWPE,0,15
2022,05/16/22 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,BCNH,0,1
2022,05/16/22 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,BNST,0,3
2022,05/16/22 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,BRPE,5633,8064
2022,05/16/22 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,COGA,1,1
...,...,...,...,...,...,...,...,...,...
2023,06/24/23 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,HERG,0,2
2023,06/24/23 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,LAGU,704,2052
2023,06/24/23 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,ROSA,0,0
2023,06/24/23 00:00:00,LA,Breton-Chandeleur Islands,Breton Island,-89.1742,29.4955,ROYT,8779,9549


In [38]:
agg_pd_species.reset_index().rename(columns={"total_nests": "Nests", "total_birds": "Birds"}).to_excel("SummaryFileGenerated2023.xlsx", index=False)

# Reorganize Files
2022 photos are organized by date camea card and photo number:
eg. `15May2022_Camera1Card1/15May2022_Cam1Card1_1.jpg`
2023 photos have a month folder also, and a small change on naming
`2023/June 2023/21June2023Cam2Card1/21June2023_Cam2Card1_1.jpg`
Following the existing schema, for photos used on the dotting dattabase, we want to organize them in the `f"{_base_folder}/high_resolution_photos/"` folder under a GeoRegion/ColonyName subfolder

In [49]:
_hr_original_folder = "HighResolutionImages/"
# the candidate will have this format 2022/15May2022_Camera1Card1/15May2022_Cam1Card1_1.jpg
#if year is 2023, then there is a folder for each month 'June 2023' or 'May 2023'

pd_species_2023["candidate_hr"] = (_hr_original_folder
+ pd_species_2023["Year"].astype("str") + "/"
+ (np.where(pd_species_2023["Year"]==2023, pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%B %Y") +"/", ""))
+pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%d%B%Y") 
+ np.where( pd.to_datetime(pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S").dt.strftime("%B %Y")=="June 2023", "Cam","_Camera")
+pd_species_2023["CameraNumber"]
+"Card"
+pd_species_2023["CardNumber"]
+"/"
+pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%d%B%Y")
+"_Cam"
+pd_species_2023["CameraNumber"]
+"Card"
+pd_species_2023["CardNumber"]
+"_"
+pd_species_2023["PhotoNumber"].str.strip()
+".jpg")

m_grouped = pd_species_2023[["HighResImage_new","candidate_hr","thumbnail_new"]].drop_duplicates()
#remove the ones that do not have a HighResImage_new
m_grouped = m_grouped[~m_grouped["HighResImage_new"].isna()]
with ThreadPoolExecutor(max_workers=16) as e:
        futures = list(
            tqdm(
                e.map(
                    lambda x, y: rename(x, y),
                    m_grouped["candidate_hr"].tolist(),
                    m_grouped["HighResImage_new"].tolist(),
                ),
                total=m_grouped.shape[0],
            )
        )

  0%|          | 0/290 [00:00<?, ?it/s]

In [50]:
if create_thumbnails:
    with ThreadPoolExecutor(max_workers=16) as e:
        futures = list(
            tqdm(
                e.map(
                    partial(generate_thumbnail, regenerate=False),
                    m_grouped["HighResImage_new"].tolist(),
                    m_grouped["thumbnail_new"].tolist(),
                ),
                total=m_grouped.shape[0],
            )
        )


  0%|          | 0/290 [00:00<?, ?it/s]

for 2023 screenshots, the original files are available at: 
`DottedImages/2023/Breton Screen Captures/` and have the format:`24June23BRETArea1.JPG`
Given that we have the Area and the date on the dataframe we can generate the candidate names

In [39]:
pd_species_2023

Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,OrigDotterID,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month,uid
0,698,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,12,22,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
1,699,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,32,43,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
2,700,2023,06/24/23 00:00:00,Breton Island,,88,1,2,5367,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,0,4,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
3,702,2023,06/24/23 00:00:00,Breton Island,,89,2,1,9529,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,21,34,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
4,703,2023,06/24/23 00:00:00,Breton Island,,90,1,2,5407,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,22,38,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,336,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,,ColonyGroupBuffer centroid location coordinates.,,,,4328,4413,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#SATE
756,664,2023,06/24/23 00:00:00,Breton Island,,74,2,1,9465,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,15,26,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
757,705,2023,06/24/23 00:00:00,Breton Island,,91,1,2,5409,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,13,23,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...
758,713,2023,06/24/23 00:00:00,Breton Island,,94,1,2,5342,E,...,"MWP12, KKN17",,avian_monitoring/high_resolution_photos/2023/B...,avian_monitoring/screenshots/2023/Breton-Chand...,avian_monitoring/thumbnails/2023/Breton-Chande...,276,310,2023-Jun-24,Jun,avian_monitoring/high_resolution_photos/2023/B...


In [46]:
pd_species_2023["candidate_screenshot"]=np.where((pd_species_2023["Year"]==2023) & (pd_species_2023["ColonyName"]=="Breton Island"),
"DottedImages/2023/Breton Screen Captures/"
+pd.to_datetime(
        pd_species_2023["Date"].astype("str"), format="%m/%d/%y %H:%M:%S"
    ).dt.strftime("%d%B%y")+"BRETArea"+pd_species_2023["DottingAreaNumber"]+".JPG",
                                                 None
)
s_grouped = pd_species_2023.loc[~pd_species_2023.candidate_screenshot.isnull(),["screenshot_new","candidate_screenshot"]].drop_duplicates()


In [49]:
with ThreadPoolExecutor(max_workers=16) as e:
        futures = list(
            tqdm(
                e.map(
                    lambda x, y: rename(x, y),
                    s_grouped["candidate_screenshot"].tolist(),
                    s_grouped["screenshot_new"].tolist(),
                ),
                total=s_grouped.shape[0],
            )
        )

  0%|          | 0/185 [00:00<?, ?it/s]

## Update list of files on AWS (for file browsing)

In [50]:
files_in_bucket = list(avian_data.objects.all())
tree = {}
filenames = [f.key for f in files_in_bucket if not f.key.endswith("/")]
for file in filenames:
    parent = tree
    filepath = file.split("/")
    for p in filepath:
        level = parent.get(p, {})
        parent[p] = level
        parent = parent[p]

In [51]:
remove_from_root = ["403.html", 
                    "index.html", 
                    "list_files.html",
                    "list_files_ns.html",
                    "explorer.css",
                    "explorer.js",
                    "browser.html",
                    "test.txt",
                    "image_unavailable.png",
                    ]
for r in remove_from_root:
    if r in tree:
        tree.pop(r)


In [52]:
tree.keys()

dict_keys(['DottedImages', 'HighResolutionImages', 'avian_monitoring', 'file_listing.json'])

In [53]:
listings = createListings(tree, "")
import json

with open("file_listing.json", "w") as jsonfile:
    json.dump(listings, jsonfile)

In [54]:
avian_data.put_object(Key="file_listing.json", Body=json.dumps(listings), ContentType="application/json")

s3.Object(bucket_name='twi-aviandata', key='file_listing.json')

# Check existing files

In [55]:
# This is equivalent, but faster than: pd_species["thumbnail_new"] = pd_species["thumbnail_new"].progress_apply(exists_key)
with ThreadPoolExecutor(max_workers=128) as e:
    futures = {}
    _grouped = pd_species_2023[
        filter(lambda x: "_new" in x, pd_species_2023.columns)
    ].drop_duplicates()
    for f in _grouped.columns:
        futures[f] = list(
            tqdm(
                e.map(lambda y: exists_key(y), _grouped[f].tolist()),
                total=_grouped.shape[0],
            )
        )

key:nan, exception Parameter validation failed:
Invalid type for parameter Prefix, value: nan, type: <class 'float'>, valid types: <class 'str'>


  0%|          | 0/291 [00:00<?, ?it/s]

  0%|          | 0/291 [00:00<?, ?it/s]

key:nan, exception Parameter validation failed:
Invalid type for parameter Prefix, value: nan, type: <class 'float'>, valid types: <class 'str'>


  0%|          | 0/291 [00:00<?, ?it/s]

key:nan, exception Parameter validation failed:
Invalid type for parameter Prefix, value: nan, type: <class 'float'>, valid types: <class 'str'>


In [56]:
for x in futures:
    with pd.option_context("display.max_colwidth", None):
        display(_grouped.loc[~np.array(futures[x]), x])


48    NaN
Name: HighResImage_new, dtype: object

48                                                                                                          NaN
100     avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera1-Card1-5119.jpg
102     avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera1-Card1-5097.jpg
103     avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera2-Card2-1925.jpg
104     avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera2-Card2-1676.jpg
                                                         ...                                                   
735    avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/21June22Camera2-Card2-3364.jpg
736    avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/21June22Camera2-Card2-3361.jpg
737    avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/21June22Camera2-Card2-3

48    NaN
Name: thumbnail_new, dtype: object

In [58]:
x = "HighResImage_new"
pd_species_2023[pd_species_2023[x].isin(_grouped.loc[~np.array(futures[x]), x])]


Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month,uid,candidate_screenshot
48,748,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,0,1,2023-May-23,May,2023-May-23+Queen Bess Island#AMOY,
49,750,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,1,2023-May-23,May,2023-May-23+Queen Bess Island#BCNH,
50,751,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,6,2023-May-23,May,2023-May-23+Queen Bess Island#BNST,
51,752,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,6070,9940,2023-May-23,May,2023-May-23+Queen Bess Island#BRPE,
172,252,2022,05/16/22 00:00:00,New Harbor Island 1,,,,,,P,...,,,,,0,0,2022-May-16,May,2022-May-16+New Harbor Island 1#SETU,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,332,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,8,13,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#GBTE,
752,333,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,90,117,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#LAGU,
753,334,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,4,0,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROSA,
754,335,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,1602,1663,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROYT,


In [59]:
for x in futures:
    print(x, np.sum(~np.array(futures[x])))
    with pd.option_context("display.max_colwidth", None):
        _no_exists = pd_species_2023[
            pd_species_2023[x].isin(_grouped.loc[~np.array(futures[x]), x])
        ]
        display(_no_exists)
        display(_no_exists[["Year", x]].drop_duplicates().groupby(["Year"]).count())
        pd_species_2023.loc[
            pd_species_2023[x].isin(_grouped.loc[~np.array(futures[x]), x]), x
        ] = None
#   with pd.option_context('display.max_colwidth', None):
#       display(pd_species.loc[~np.array(futures[x]), x].drop_duplicates())
#       pd_species.loc[~np.array(futures[x]), x].drop_duplicates().to_csv(f"not_found_{x}.csv", index=False)

#   display(pd_species.loc[~np.array(futures[x]),[x,"Year"]].drop_duplicates().groupby("Year").count())
#   pd_species.loc[~np.array(futures[x]), x] = None


HighResImage_new 1


Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month,uid,candidate_screenshot
48,748,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,0,1,2023-May-23,May,2023-May-23+Queen Bess Island#AMOY,
49,750,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,1,2023-May-23,May,2023-May-23+Queen Bess Island#BCNH,
50,751,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,6,2023-May-23,May,2023-May-23+Queen Bess Island#BNST,
51,752,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,6070,9940,2023-May-23,May,2023-May-23+Queen Bess Island#BRPE,
172,252,2022,05/16/22 00:00:00,New Harbor Island 1,,,,,,P,...,,,,,0,0,2022-May-16,May,2022-May-16+New Harbor Island 1#SETU,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,332,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,8,13,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#GBTE,
752,333,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,90,117,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#LAGU,
753,334,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,4,0,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROSA,
754,335,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,1602,1663,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROYT,


Unnamed: 0_level_0,HighResImage_new
Year,Unnamed: 1_level_1
2022,0
2023,0


screenshot_new 107


Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month,uid,candidate_screenshot
48,748,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,0,1,2023-May-23,May,2023-May-23+Queen Bess Island#AMOY,
49,750,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,1,2023-May-23,May,2023-May-23+Queen Bess Island#BCNH,
50,751,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,6,2023-May-23,May,2023-May-23+Queen Bess Island#BNST,
51,752,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,6070,9940,2023-May-23,May,2023-May-23+Queen Bess Island#BRPE,
100,14,2022,05/16/22 00:00:00,Breton Island,,8,1,1,5119,E,...,,avian_monitoring/high_resolution_photos/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera1-Card1-5119.jpg,avian_monitoring/screenshots/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera1-Card1-5119.jpg,avian_monitoring/thumbnails/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera1-Card1-5119.png,6,32,2022-May-16,May,avian_monitoring/high_resolution_photos/2022/Breton-Chandeleur Islands/Breton Island/16May22Camera1-Card1-5119.jpg#BRPE,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
752,333,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,90,117,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#LAGU,
753,334,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,4,0,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROSA,
754,335,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,1602,1663,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROYT,
755,336,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,4328,4413,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#SATE,


Unnamed: 0_level_0,screenshot_new
Year,Unnamed: 1_level_1
2022,106
2023,0


thumbnail_new 1


Unnamed: 0,AutoID,Year,Date,ColonyName,Subcolony,DottingAreaNumber,CameraNumber,CardNumber,PhotoNumber,PQ,...,NOTES August 2022,HighResImage_new,screenshot_new,thumbnail_new,total_nests,total_birds,date2,month,uid,candidate_screenshot
48,748,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,0,1,2023-May-23,May,2023-May-23+Queen Bess Island#AMOY,
49,750,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,1,2023-May-23,May,2023-May-23+Queen Bess Island#BCNH,
50,751,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,1,6,2023-May-23,May,2023-May-23+Queen Bess Island#BNST,
51,752,2023,05/23/23 00:00:00,Queen Bess Island,,,,,,E,...,,,,,6070,9940,2023-May-23,May,2023-May-23+Queen Bess Island#BRPE,
172,252,2022,05/16/22 00:00:00,New Harbor Island 1,,,,,,P,...,,,,,0,0,2022-May-16,May,2022-May-16+New Harbor Island 1#SETU,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,332,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,8,13,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#GBTE,
752,333,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,90,117,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#LAGU,
753,334,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,4,0,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROSA,
754,335,2022,06/21/22 00:00:00,Chandeleur Islands South,,,,,,E,...,ColonyGroupBuffer centroid location coordinates.,,,,1602,1663,2022-Jun-21,Jun,2022-Jun-21+Chandeleur Islands South#ROYT,


Unnamed: 0_level_0,thumbnail_new
Year,Unnamed: 1_level_1
2022,0
2023,0


In [60]:
pd_species_2023.to_csv("avianmonitoring_2022-2023_Nulls.csv.gz", index=False)


In [61]:
pd_species_2023.to_excel("avianmonitoring_2022-2023_Nulls.xlsx", index=False)


In [None]:
if replace and create_thumbnails:
    m_grouped["thumbnail_new"].progress_apply(lambda t: update_mime_type(t))


In [None]:
if rename_files:
    with ThreadPoolExecutor(max_workers=16) as e:
        futures = list(
            tqdm(
                e.map(
                    lambda t: update_mime_type(t, mime="image/jpeg"),
                    m_grouped["HighResImage_new"].tolist(),
                ),
                total=m_grouped.shape[0],
            )
        )
