In [1]:
import polars as pl
from pathlib import Path

camtrapdp_package_path = "/home/wsyxbcl/Downloads/yunta_202306-202404-1.0/"
maze_taglist_path = "/home/wsyxbcl/Emberiza/maze-taxonomy/data/maze_taglist.csv"

In [2]:
df_observations = pl.read_csv(Path(camtrapdp_package_path).joinpath("observations.csv")).select(
    ["mediaID", "deploymentID", "scientificName", "_id"]
).rename({"_id": "observationID"})
df_media = pl.read_csv(Path(camtrapdp_package_path).joinpath("media.csv")).select(
    ["mediaID", "timestamp", "filePath", "fileName"])
df = df_observations.join(df_media, on="mediaID", how="left").with_columns(pl.col("timestamp").str.to_datetime())
df

mediaID,deploymentID,scientificName,observationID,timestamp,filePath,fileName
i64,str,str,i64,"datetime[μs, UTC]",str,str
1361895,"""1110_YT202306-202309""","""Lutra lutra""",1394198,2023-06-28 22:06:47 UTC,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
1361896,"""1110_YT202306-202309""","""Lutra lutra""",1394199,2023-06-28 22:06:48 UTC,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
1362495,"""1110_YT202306-202309""","""Lutra lutra""",1394798,2023-06-28 22:07:10 UTC,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
1361897,"""1110_YT202306-202309""","""Lutra lutra""",1394200,2023-06-29 13:06:50 UTC,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
1361898,"""1110_YT202306-202309""","""Lutra lutra""",1394201,2023-06-29 13:06:50 UTC,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
…,…,…,…,…,…,…
1361707,"""山水相机2_YT202309-202312""","""Blank""",1474409,2023-12-22 07:10:20 UTC,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"
1361709,"""山水相机2_YT202309-202312""","""Blank""",1474411,2023-12-22 07:10:36 UTC,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"
1361710,"""山水相机2_YT202309-202312""","""Panthera uncia""",1474412,2023-12-22 21:08:27 UTC,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"
1361711,"""山水相机2_YT202309-202312""","""Panthera uncia""",1474413,2023-12-22 21:08:28 UTC,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"


In [3]:
# Temporal independence
df_sorted = df.lazy().sort("timestamp").sort("scientificName").sort("deploymentID").collect()

df_independent = df_sorted.rolling(
    index_column="timestamp",
    group_by=["deploymentID", "scientificName"],
    period="30m",
).agg([
    pl.count("scientificName").alias("count"),
    pl.last("mediaID"),
    pl.last("observationID"),
    pl.last("filePath"),
    pl.last("fileName")
]).filter(
    pl.col("count") == 1
)
df_independent

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str
"""1110_YT202306-202309""","""Lutra lutra""",2023-06-28 22:06:47 UTC,1,1361895,1394198,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
"""1110_YT202306-202309""","""Lutra lutra""",2023-07-10 21:06:41 UTC,1,1361901,1394204,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-07-14 06:27:11 UTC,1,1361907,1394210,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 04:10:13 UTC,1,1362303,1394606,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 07:17:44 UTC,1,1362333,1394636,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…"
…,…,…,…,…,…,…,…
"""山水相机2_YT202309-202312""","""Aves""",2023-09-17 02:27:40 UTC,1,1361517,1474219,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"
"""山水相机2_YT202309-202312""","""Aves""",2023-10-11 03:23:39 UTC,1,1361561,1474263,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"
"""山水相机2_YT202309-202312""","""Perdix hodgsoniae""",2023-09-17 03:50:30 UTC,1,1361518,1474220,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"
"""山水相机2_YT202309-202312""","""Otocolobus manul""",2023-10-12 08:50:28 UTC,1,1361782,1474484,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…"


In [4]:
# Patch species info
df_taglist = pl.read_csv(maze_taglist_path).select(
    ["mazeScientificName", "mazeNameCN", "taxonID", "rank"]
).rename({"mazeScientificName": "scientificName"})
df_species_info = df_independent.join(df_taglist, on="scientificName", how="left")
df_species_info

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName,mazeNameCN,taxonID,rank
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str,str,str,str
"""1110_YT202306-202309""","""Lutra lutra""",2023-06-28 22:06:47 UTC,1,1361895,1394198,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""欧亚水獭""","""72PQL""","""species"""
"""1110_YT202306-202309""","""Lutra lutra""",2023-07-10 21:06:41 UTC,1,1361901,1394204,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""欧亚水獭""","""72PQL""","""species"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-07-14 06:27:11 UTC,1,1361907,1394210,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 04:10:13 UTC,1,1362303,1394606,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 07:17:44 UTC,1,1362333,1394636,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species"""
…,…,…,…,…,…,…,…,…,…,…
"""山水相机2_YT202309-202312""","""Aves""",2023-09-17 02:27:40 UTC,1,1361517,1474219,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""鸟类""","""V2""","""class"""
"""山水相机2_YT202309-202312""","""Aves""",2023-10-11 03:23:39 UTC,1,1361561,1474263,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""鸟类""","""V2""","""class"""
"""山水相机2_YT202309-202312""","""Perdix hodgsoniae""",2023-09-17 03:50:30 UTC,1,1361518,1474220,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""高原山鹑""","""9KMCS""","""species"""
"""山水相机2_YT202309-202312""","""Otocolobus manul""",2023-10-12 08:50:28 UTC,1,1361782,1474484,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""兔狲""","""3DXVG""","""species"""


In [5]:
# Patch deployments
df_deployments = pl.read_csv(Path(camtrapdp_package_path).joinpath("deployments.csv")).select("deploymentID", "latitude", "longitude", "coordinateUncertainty", "setupBy", "deploymentTags", "deploymentComments")
df_deployments = df_deployments.with_columns(
    pl.col("deploymentTags")
    .str.split(by=" | ").list.first()
    .str.split(by=":").list.last()
    .alias("gpsSource")
)
df_deployments

deploymentID,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,f64,f64,i64,str,str,str,str
"""1110_YT202306-202309""",33.63217,96.42413,10,"""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202312-202404""",0.0,0.0,10,"""藏格""","""coordinate:null | name:1110""","""有效工作127日; 水獭相机""","""null"""
"""400101_YT202306-202309""",33.62486,96.47024,10,,"""coordinate:guess | name:400101""","""有效工作65日; 跟后面季度4042位点背景一样""","""guess"""
"""4002_YT202309-202312""",33.61848,96.46279,10,"""当珍文德""","""coordinate:GPS | name:4002""","""有效工作0日; 无有效数据，可能开机没开对""","""GPS"""
"""4002_YT202312-202404""",33.61848,96.46279,10,"""当珍文德""","""coordinate:GPS | name:4002""","""有效工作23日; ""","""GPS"""
…,…,…,…,…,…,…,…
"""4043_YT202312-202404""",0.0,0.0,10,"""当文/旦增""","""coordinate:null | name:4043""","""有效工作120日; ""","""null"""
"""LJR_YT202306-202309""",33.59443,96.40628,10,,"""coordinate:GPS | name:LJR""","""有效工作55日; ""","""GPS"""
"""shanshui01realtime_YT202312-20…",33.59443,96.40628,10,,"""coordinate:GPS | name:shanshui…","""有效工作30日; ""","""GPS"""
"""山水相机1_YT202309-202312""",33.59443,96.40628,10,,"""coordinate:GPS | name:山水相机1""","""有效工作131日; LJR LHY其中一个""","""GPS"""


In [6]:
# Patch GPS
import math
def haversine(coord1_deg, coord2_deg):
    R = 6371.0
    lat1_deg, lon1_deg = coord1_deg
    lat2_deg, lon2_deg = coord2_deg

    lat1_rad = math.radians(lat1_deg)
    lon1_rad = math.radians(lon1_deg)
    lat2_rad = math.radians(lat2_deg)
    lon2_rad = math.radians(lon2_deg)

    dlat_rad = lat2_rad - lat1_rad
    dlon_rad = lon2_rad - lon1_rad
    a = math.sin(dlat_rad / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon_rad / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance_km = R * c
    return distance_km

s_latitude = df_deployments["latitude"]
s_longitude = df_deployments["longitude"]

average_coord = (s_latitude.mean(), s_longitude.mean())

max_distance = 0
for i in range(s_latitude.len()):
    coord = (s_latitude[i], s_longitude[i])
    if coord[0] == 0:
        continue
    distance_km = haversine(coord, average_coord)
    if distance_km > max_distance:
        max_distance = distance_km


gps_uncertainty = {
    "GPS": 100,
    "guess": 150 * 1000, # Estimate by grid setup
    "null": int(max_distance) * 1000 # Calculated
}


df_deployments_patched = df_deployments.with_columns(
    coordinateUncertainty = pl.col("gpsSource").replace(gps_uncertainty),
    latitude = pl.col("latitude").replace(0.0, average_coord[0]),
    longitude = pl.col("latitude").replace(0.0, average_coord[1]),
)
df_deployments_patched

deploymentID,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,f64,f64,str,str,str,str,str
"""1110_YT202306-202309""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202312-202404""",32.336575,92.728208,"""377000""","""藏格""","""coordinate:null | name:1110""","""有效工作127日; 水獭相机""","""null"""
"""400101_YT202306-202309""",33.62486,33.62486,"""150000""",,"""coordinate:guess | name:400101""","""有效工作65日; 跟后面季度4042位点背景一样""","""guess"""
"""4002_YT202309-202312""",33.61848,33.61848,"""100""","""当珍文德""","""coordinate:GPS | name:4002""","""有效工作0日; 无有效数据，可能开机没开对""","""GPS"""
"""4002_YT202312-202404""",33.61848,33.61848,"""100""","""当珍文德""","""coordinate:GPS | name:4002""","""有效工作23日; ""","""GPS"""
…,…,…,…,…,…,…,…
"""4043_YT202312-202404""",32.336575,92.728208,"""377000""","""当文/旦增""","""coordinate:null | name:4043""","""有效工作120日; ""","""null"""
"""LJR_YT202306-202309""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:LJR""","""有效工作55日; ""","""GPS"""
"""shanshui01realtime_YT202312-20…",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:shanshui…","""有效工作30日; ""","""GPS"""
"""山水相机1_YT202309-202312""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机1""","""有效工作131日; LJR LHY其中一个""","""GPS"""


In [7]:
df_result = df_species_info.join(df_deployments_patched, on="deploymentID", how="left")
df_result

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName,mazeNameCN,taxonID,rank,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str,str,str,str,f64,f64,str,str,str,str,str
"""1110_YT202306-202309""","""Lutra lutra""",2023-06-28 22:06:47 UTC,1,1361895,1394198,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""欧亚水獭""","""72PQL""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Lutra lutra""",2023-07-10 21:06:41 UTC,1,1361901,1394204,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""欧亚水獭""","""72PQL""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-07-14 06:27:11 UTC,1,1361907,1394210,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 04:10:13 UTC,1,1362303,1394606,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 07:17:44 UTC,1,1362333,1394636,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""山水相机2_YT202309-202312""","""Aves""",2023-09-17 02:27:40 UTC,1,1361517,1474219,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""鸟类""","""V2""","""class""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
"""山水相机2_YT202309-202312""","""Aves""",2023-10-11 03:23:39 UTC,1,1361561,1474263,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""鸟类""","""V2""","""class""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
"""山水相机2_YT202309-202312""","""Perdix hodgsoniae""",2023-09-17 03:50:30 UTC,1,1361518,1474220,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""高原山鹑""","""9KMCS""","""species""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
"""山水相机2_YT202309-202312""","""Otocolobus manul""",2023-10-12 08:50:28 UTC,1,1361782,1474484,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""兔狲""","""3DXVG""","""species""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""


In [10]:
df_result_under_species = df_result.filter(pl.col("rank").is_in(["species", "subspecies"]))
df_result_under_species.write_csv(Path(camtrapdp_package_path).joinpath("yunta_202306-202404_encounter_under_species.csv"), include_bom=True)
df_result_under_species

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName,mazeNameCN,taxonID,rank,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str,str,str,str,f64,f64,str,str,str,str,str
"""1110_YT202306-202309""","""Lutra lutra""",2023-06-28 22:06:47 UTC,1,1361895,1394198,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""欧亚水獭""","""72PQL""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Lutra lutra""",2023-07-10 21:06:41 UTC,1,1361901,1394204,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""欧亚水獭""","""72PQL""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-07-14 06:27:11 UTC,1,1361907,1394210,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 04:10:13 UTC,1,1362303,1394606,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
"""1110_YT202306-202309""","""Phoenicurus leucocephalus""",2023-08-13 07:17:44 UTC,1,1362333,1394636,"""https://localhost/storage/reso…","""1110_YT202306-202309-Animal-IM…","""白顶溪鸲""","""TF6L""","""species""",33.63217,33.63217,"""100""","""藏格""","""coordinate:GPS | name:1110""","""有效工作65日; ""","""GPS"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""山水相机2_YT202309-202312""","""Pseudois nayaur""",2023-11-29 09:02:23 UTC,1,1361812,1474514,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""岩羊""","""4NYLG""","""species""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
"""山水相机2_YT202309-202312""","""Pseudois nayaur""",2023-12-09 09:19:29 UTC,1,1361780,1474482,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""岩羊""","""4NYLG""","""species""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
"""山水相机2_YT202309-202312""","""Perdix hodgsoniae""",2023-09-17 03:50:30 UTC,1,1361518,1474220,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""高原山鹑""","""9KMCS""","""species""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
"""山水相机2_YT202309-202312""","""Otocolobus manul""",2023-10-12 08:50:28 UTC,1,1361782,1474484,"""https://localhost/storage/reso…","""山水相机2_YT202309-202312-Animal-E…","""兔狲""","""3DXVG""","""species""",33.59443,33.59443,"""100""",,"""coordinate:GPS | name:山水相机2""","""有效工作131日; LJR LHY另一个""","""GPS"""
