In [43]:
import polars as pl
from pathlib import Path

camtrapdp_package_path = "/home/wsyxbcl/Downloads/qilian-202312-202405-1.0"
maze_taglist_path = "/home/wsyxbcl/Emberiza/maze-taxonomy/data/maze_taglist.csv"
pl.__version__

'1.8.1'

In [44]:
df_observations = pl.read_csv(Path(camtrapdp_package_path).joinpath("observations.csv")).select(
    ["mediaID", "deploymentID", "scientificName", "_id"]
).rename({"_id": "observationID"})
df_media = pl.read_csv(Path(camtrapdp_package_path).joinpath("media.csv")).select(
    ["mediaID", "timestamp", "filePath", "fileName"])
df = df_observations.join(df_media, on="mediaID", how="left").with_columns(pl.col("timestamp").str.to_datetime())
df

mediaID,deploymentID,scientificName,observationID,timestamp,filePath,fileName
i64,str,str,i64,"datetime[μs, UTC]",str,str
1257806,"""0004_202312-202405""","""Homo sapiens""",1322999,2023-12-08 02:47:39 UTC,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0001"""
1257807,"""0004_202312-202405""","""Homo sapiens""",1323000,2023-12-08 02:47:40 UTC,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0002"""
1285420,"""0004_202312-202405""","""Homo sapiens""",1350613,2023-12-08 02:47:42 UTC,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0003"""
1257808,"""0004_202312-202405""","""Homo sapiens""",1323001,2023-12-08 02:48:28 UTC,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0004"""
1257809,"""0004_202312-202405""","""Homo sapiens""",1323002,2023-12-08 02:48:29 UTC,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0005"""
…,…,…,…,…,…,…
1285417,"""TJA20_202312-202405""","""Pseudois nayaur""",1350610,2023-12-16 03:41:16 UTC,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"
1293733,"""TJA20_202312-202405""","""Pseudois nayaur""",1358926,2023-12-16 03:41:38 UTC,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"
1285419,"""TJA20_202312-202405""","""Pseudois nayaur""",1350612,2023-12-16 03:41:44 UTC,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"
1285418,"""TJA20_202312-202405""","""Pseudois nayaur""",1350611,2023-12-16 03:41:44 UTC,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"


In [45]:
# Clean
df_cleaned = df.drop_nulls(subset=["scientificName"]).filter(~pl.col("scientificName").eq("Homo sapiens"))
df_cleaned = df_cleaned.unique(subset=["deploymentID", "scientificName", "timestamp"])

# Temporal independence
df_sorted = df_cleaned.sort("timestamp").sort("scientificName").sort("deploymentID")

df_independent = df_sorted.rolling(
    index_column="timestamp",
    group_by=[pl.col("deploymentID"), pl.col("scientificName")],
    period="30m",
    offset="0",
    closed="left"
).agg([
    pl.col("scientificName").count().alias("count"),
    pl.last("mediaID"),
    pl.last("observationID"),
    pl.last("filePath"),
    pl.last("fileName")
]).filter(pl.col("count").eq(1))
df_independent

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str
"""0004_202312-202405""","""Blank""",2023-12-08 06:19:50 UTC,1,1285505,1350698,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0258"""
"""0004_202312-202405""","""Blank""",2023-12-08 07:08:00 UTC,1,1285506,1350699,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0261"""
"""0004_202312-202405""","""Blank""",2023-12-08 19:35:54 UTC,1,1285507,1350700,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0264"""
"""0004_202312-202405""","""Blank""",2023-12-09 11:20:00 UTC,1,1285508,1350701,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0267"""
"""0004_202312-202405""","""Blank""",2023-12-10 05:39:22 UTC,1,1285511,1350704,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0276"""
…,…,…,…,…,…,…,…
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 14:47:48 UTC,1,1293710,1358903,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 18:32:06 UTC,1,1293714,1358907,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-14 20:52:34 UTC,1,1285391,1350584,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"
"""TJA20_202312-202405""","""Leucosticte brandti""",2023-12-15 01:06:58 UTC,1,1293725,1358918,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…"


In [46]:
# Patch species info
df_taglist = pl.read_csv(maze_taglist_path).select(
    ["mazeScientificName", "mazeNameCN", "taxonID", "rank"]
).rename({"mazeScientificName": "scientificName"})
df_species_info = df_independent.join(df_taglist, on="scientificName", how="left")
df_species_info

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName,mazeNameCN,taxonID,rank
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str,str,str,str
"""0004_202312-202405""","""Blank""",2023-12-08 06:19:50 UTC,1,1285505,1350698,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0258""","""无动物""","""BLANK""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-08 07:08:00 UTC,1,1285506,1350699,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0261""","""无动物""","""BLANK""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-08 19:35:54 UTC,1,1285507,1350700,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0264""","""无动物""","""BLANK""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-09 11:20:00 UTC,1,1285508,1350701,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0267""","""无动物""","""BLANK""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-10 05:39:22 UTC,1,1285511,1350704,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0276""","""无动物""","""BLANK""","""null"""
…,…,…,…,…,…,…,…,…,…,…
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 14:47:48 UTC,1,1293710,1358903,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species"""
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 18:32:06 UTC,1,1293714,1358907,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species"""
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-14 20:52:34 UTC,1,1285391,1350584,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species"""
"""TJA20_202312-202405""","""Leucosticte brandti""",2023-12-15 01:06:58 UTC,1,1293725,1358918,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高山岭雀""","""6Q5TT""","""species"""


In [47]:
# Patch deployments
df_deployments = pl.read_csv(Path(camtrapdp_package_path).joinpath("deployments.csv")).select("deploymentID", "latitude", "longitude", "coordinateUncertainty", "setupBy", "deploymentTags", "deploymentComments")
df_deployments = df_deployments.with_columns(
    pl.col("deploymentTags")
    .str.split(by=" | ").list.first()
    .str.split(by=":").list.last()
    .alias("gpsSource")
)
df_deployments

deploymentID,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,f64,f64,i64,str,str,str,str
"""0004_202312-202405""",38.88777,97.28101,10,,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_QL202305-202312""",38.88777,97.28101,10,,"""coordinate:null | name:0004""","""有效工作0日; 该型号相机视频过大""","""null"""
"""0008_202312-202405""",38.90337,97.52539,10,,"""coordinate:null | name:0008""","""有效工作99日; 点位撤销；但有雪豹""","""null"""
"""0008_QL202305-202312""",38.90337,97.52539,10,,"""coordinate:null | name:0008""","""有效工作82日; 这个型号相机视频太大，占空间""","""null"""
"""0018_202312-202405""",38.91918,97.51084,10,,"""coordinate:null | name:0018""","""有效工作142日; 点位撤销；但有雪豹""","""null"""
…,…,…,…,…,…,…,…
"""TJA04_QL202305-202312""",38.911,97.83537,10,,"""coordinate:null | name:TJA04""","""有效工作211日; 本次维护换位点：38.91173，97.…","""null"""
"""TJA1_202312-202405""",38.9497,97.12435,10,,"""coordinate:null | name:TJA1""","""有效工作178日; 202405换UOV""","""null"""
"""TJA1_QL202305-202312""",38.9497,97.12435,10,,"""coordinate:null | name:TJA1""","""有效工作383日; ""","""null"""
"""TJA20_202312-202405""",38.94872,97.62878,10,,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""


In [28]:
# df_DQ = df_deployments.filter(pl.col("deploymentID").str.starts_with("DQ"))
# df_A = df_deployments.filter(pl.col("deploymentID").str.starts_with("A"))

# def haversine(coord1_deg, coord2_deg):
#     R = 6371.0
#     lat1_deg, lon1_deg = coord1_deg
#     lat2_deg, lon2_deg = coord2_deg

#     lat1_rad = math.radians(lat1_deg)
#     lon1_rad = math.radians(lon1_deg)
#     lat2_rad = math.radians(lat2_deg)
#     lon2_rad = math.radians(lon2_deg)

#     dlat_rad = lat2_rad - lat1_rad
#     dlon_rad = lon2_rad - lon1_rad
#     a = math.sin(dlat_rad / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon_rad / 2)**2
#     c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

#     distance_km = R * c
#     return distance_km

# s_latitude = df_DQ["latitude"]
# s_longitude = df_DQ["longitude"]

# average_coord = (s_latitude.mean(), s_longitude.mean())

# max_distance = 0
# for i in range(s_latitude.len()):
#     coord = (s_latitude[i], s_longitude[i])
#     if coord[0] == 0:
#         continue
#     distance_km = haversine(coord, average_coord)
#     if distance_km > max_distance:
#         max_distance = distance_km

# print(max_distance)

# s_latitude = df_A["latitude"]
# s_longitude = df_A["longitude"]

# average_coord = (s_latitude.mean(), s_longitude.mean())

# max_distance = 0
# for i in range(s_latitude.len()):
#     coord = (s_latitude[i], s_longitude[i])
#     if coord[0] == 0:
#         continue
#     distance_km = haversine(coord, average_coord)
#     if distance_km > max_distance:
#         max_distance = distance_km

# print(max_distance)

In [49]:
# Patch GPS
import math
def haversine(coord1_deg, coord2_deg):
    R = 6371.0
    lat1_deg, lon1_deg = coord1_deg
    lat2_deg, lon2_deg = coord2_deg

    lat1_rad = math.radians(lat1_deg)
    lon1_rad = math.radians(lon1_deg)
    lat2_rad = math.radians(lat2_deg)
    lon2_rad = math.radians(lon2_deg)

    dlat_rad = lat2_rad - lat1_rad
    dlon_rad = lon2_rad - lon1_rad
    a = math.sin(dlat_rad / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon_rad / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance_km = R * c
    return distance_km

s_latitude = df_deployments["latitude"]
s_longitude = df_deployments["longitude"]

average_coord = (s_latitude.mean(), s_longitude.mean())

max_distance = 0
for i in range(s_latitude.len()):
    coord = (s_latitude[i], s_longitude[i])
    if coord[0] == 0:
        continue
    distance_km = haversine(coord, average_coord)
    if distance_km > max_distance:
        max_distance = distance_km

print(max_distance)

gps_uncertainty = {
    "GPS": 100,
    "guess": 150*1000, # Estimate by grid setup
    "null": int(max_distance) * 1000 # Calculated
}


df_deployments_patched = df_deployments.with_columns(
    coordinateUncertainty = pl.col("gpsSource").replace(gps_uncertainty),
    latitude = pl.col("latitude").replace(0.0, average_coord[0]),
    longitude = pl.col("latitude").replace(0.0, average_coord[1]),
)
df_deployments_patched

50.45037504108415


deploymentID,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,f64,f64,str,str,str,str,str
"""0004_202312-202405""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_QL202305-202312""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作0日; 该型号相机视频过大""","""null"""
"""0008_202312-202405""",38.90337,38.90337,"""50000""",,"""coordinate:null | name:0008""","""有效工作99日; 点位撤销；但有雪豹""","""null"""
"""0008_QL202305-202312""",38.90337,38.90337,"""50000""",,"""coordinate:null | name:0008""","""有效工作82日; 这个型号相机视频太大，占空间""","""null"""
"""0018_202312-202405""",38.91918,38.91918,"""50000""",,"""coordinate:null | name:0018""","""有效工作142日; 点位撤销；但有雪豹""","""null"""
…,…,…,…,…,…,…,…
"""TJA04_QL202305-202312""",38.911,38.911,"""50000""",,"""coordinate:null | name:TJA04""","""有效工作211日; 本次维护换位点：38.91173，97.…","""null"""
"""TJA1_202312-202405""",38.9497,38.9497,"""50000""",,"""coordinate:null | name:TJA1""","""有效工作178日; 202405换UOV""","""null"""
"""TJA1_QL202305-202312""",38.9497,38.9497,"""50000""",,"""coordinate:null | name:TJA1""","""有效工作383日; ""","""null"""
"""TJA20_202312-202405""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""


In [50]:
df_result = df_species_info.join(df_deployments_patched, on="deploymentID", how="left")
df_result

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName,mazeNameCN,taxonID,rank,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str,str,str,str,f64,f64,str,str,str,str,str
"""0004_202312-202405""","""Blank""",2023-12-08 06:19:50 UTC,1,1285505,1350698,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0258""","""无动物""","""BLANK""","""null""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-08 07:08:00 UTC,1,1285506,1350699,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0261""","""无动物""","""BLANK""","""null""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-08 19:35:54 UTC,1,1285507,1350700,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0264""","""无动物""","""BLANK""","""null""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-09 11:20:00 UTC,1,1285508,1350701,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0267""","""无动物""","""BLANK""","""null""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_202312-202405""","""Blank""",2023-12-10 05:39:22 UTC,1,1285511,1350704,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0276""","""无动物""","""BLANK""","""null""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 14:47:48 UTC,1,1293710,1358903,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 18:32:06 UTC,1,1293714,1358907,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-14 20:52:34 UTC,1,1285391,1350584,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
"""TJA20_202312-202405""","""Leucosticte brandti""",2023-12-15 01:06:58 UTC,1,1293725,1358918,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高山岭雀""","""6Q5TT""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""


In [51]:
df_result_under_species = df_result.filter(pl.col("rank").is_in(["species", "subspecies"]))
df_result_under_species.write_csv(Path(camtrapdp_package_path).joinpath("qilian_202312-202405_encounter_under_species.csv"), include_bom=True)
df_result_under_species

deploymentID,scientificName,timestamp,count,mediaID,observationID,filePath,fileName,mazeNameCN,taxonID,rank,latitude,longitude,coordinateUncertainty,setupBy,deploymentTags,deploymentComments,gpsSource
str,str,"datetime[μs, UTC]",u32,i64,i64,str,str,str,str,str,f64,f64,str,str,str,str,str
"""0004_202312-202405""","""Pseudois nayaur""",2023-12-13 05:43:02 UTC,1,1285522,1350715,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0309""","""岩羊""","""4NYLG""","""species""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_202312-202405""","""Pseudois nayaur""",2023-12-17 03:51:48 UTC,1,1285537,1350730,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0354""","""岩羊""","""4NYLG""","""species""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0004_202312-202405""","""Pseudois nayaur""",2023-12-17 07:04:24 UTC,1,1285598,1350791,"""https://localhost/storage/reso…","""0004_202312-202405-DSCF0537""","""岩羊""","""4NYLG""","""species""",38.88777,38.88777,"""50000""",,"""coordinate:null | name:0004""","""有效工作10日; 202405OMG换UOV""","""null"""
"""0008_202312-202405""","""Bos grunniens""",2024-03-18 09:07:16 UTC,1,1285891,1351084,"""https://localhost/storage/reso…","""0008_202312-202405-DSCF0708""","""家牦牛""","""MLPW""","""species""",38.90337,38.90337,"""50000""",,"""coordinate:null | name:0008""","""有效工作99日; 点位撤销；但有雪豹""","""null"""
"""0008_202312-202405""","""Buteo hemilasius""",2024-02-05 06:11:09 UTC,1,1258484,1323677,"""https://localhost/storage/reso…","""0008_202312-202405-DSCF0310""","""大鵟""","""NYV9""","""species""",38.90337,38.90337,"""50000""",,"""coordinate:null | name:0008""","""有效工作99日; 点位撤销；但有雪豹""","""null"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 14:47:48 UTC,1,1293710,1358903,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-11 18:32:06 UTC,1,1293714,1358907,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
"""TJA20_202312-202405""","""Lepus oiostolus""",2023-12-14 20:52:34 UTC,1,1285391,1350584,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高原兔""","""6PPPY""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
"""TJA20_202312-202405""","""Leucosticte brandti""",2023-12-15 01:06:58 UTC,1,1293725,1358918,"""https://localhost/storage/reso…","""202312-202405-TJA20_202312-202…","""高山岭雀""","""6Q5TT""","""species""",38.94872,38.94872,"""50000""",,"""coordinate:null | name:TJA20""","""有效工作7日; 202405OMG换UOV；里面有历史数据，…","""null"""
