In [1]:
import numpy as np
import pandas as pd
import time

# 1. Create a Numpy Array (simulating wing lengths in mm)
wing_lengths = np.array([74.5, 72.1, 78.0, 69.5, 75.3])

# 2. Vectorized Operation
# Instead of a loop, we divide the whole array at once
# This uses SIMD (Single Instruction, Multiple Data) on the CPU
wing_lengths_cm = wing_lengths / 10.0

print(f"Original (mm): {wing_lengths}")
print(f"Converted (cm): {wing_lengths_cm}")

# Statistical functions are built-in and highly optimized
print(f"Mean: {wing_lengths.mean()}")
print(f"Max:  {wing_lengths.max()}")

Original (mm): [74.5 72.1 78.  69.5 75.3]
Converted (cm): [7.45 7.21 7.8  6.95 7.53]
Mean: 73.88000000000001
Max:  78.0


In [2]:
# Create a dictionary (Column Name -> List of Data)
data = {
    "ring_id": ["AX-01", "AX-02", "AX-03", "AX-04", "AX-05"],
    "species": ["Parus major", "Turdus merula", "Parus major", "Cyanistes caeruleus", "Turdus merula"],
    "wing_length_mm": [74.5, 128.0, 73.2, 62.1, 125.5],
    "weight_g": [18.5, 98.2, 17.9, 11.2, 95.0],
    "date": ["2024-09-01", "2024-09-01", "2024-09-02", "2024-09-02", "2024-09-03"]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# In reality, you would use:
# df = pd.read_csv("capture_data.csv")

# Display the first few rows
display(df.head())

Unnamed: 0,ring_id,species,wing_length_mm,weight_g,date
0,AX-01,Parus major,74.5,18.5,2024-09-01
1,AX-02,Turdus merula,128.0,98.2,2024-09-01
2,AX-03,Parus major,73.2,17.9,2024-09-02
3,AX-04,Cyanistes caeruleus,62.1,11.2,2024-09-02
4,AX-05,Turdus merula,125.5,95.0,2024-09-03


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ring_id         5 non-null      object 
 1   species         5 non-null      object 
 2   wing_length_mm  5 non-null      float64
 3   weight_g        5 non-null      float64
 4   date            5 non-null      object 
dtypes: float64(2), object(3)
memory usage: 332.0+ bytes


In [6]:
# A. Selecting Columns (Returns a Series)
species_col = df['species']
print(f"Selected Column type: {type(species_col)}\n")

mask_heavy = df['weight_g'] > 20.0
print("Filter Mask:")
print(mask_heavy)

heavy_birds = df[mask_heavy]
print("\nFiltered DataFrame (Heavy Birds):")
display(heavy_birds)

target_birds = df[
    (df['species'] == 'Parus major') &
    (df['weight_g'] > 18.0)
]
display(target_birds)

Selected Column type: <class 'pandas.core.series.Series'>

Filter Mask:
0    False
1     True
2    False
3    False
4     True
Name: weight_g, dtype: bool

Filtered DataFrame (Heavy Birds):


Unnamed: 0,ring_id,species,wing_length_mm,weight_g,date
1,AX-02,Turdus merula,128.0,98.2,2024-09-01
4,AX-05,Turdus merula,125.5,95.0,2024-09-03


Unnamed: 0,ring_id,species,wing_length_mm,weight_g,date
0,AX-01,Parus major,74.5,18.5,2024-09-01
