### 1. Libraries

In [2]:
import pandas as pd
import time
from datetime import datetime
import re

### 2. Split 54 sensors based on 'moteid' and convert into .csv files

In [None]:
# Define column names
column_names = ['date', 'time', 'epoch', 'moteid', 'temperature', 'humidity', 'light', 'voltage']

# Read the txt file
data = []
with open('../data/raw/data.txt', 'r') as file:
    invalid_row = []
    for line in file:
        parts = line.strip().split(' ')
        if len(parts) < 8:
            invalid_row.append(parts)
        data.append(parts)

    # for row in invalid_row:
    #     print(row)

df = pd.DataFrame(data, columns=column_names)

# Group by 'moteid' and write each group to a separate CSV file
for moteid, group in df.groupby('moteid'):
    # Define a file name based on moteid
    file_name = f'../data/processed/unsorted/moteid_{moteid}.csv'
    # Write the group to a CSV file
    group.to_csv(file_name, index=False)

print("Files saved successfully!")

### 3. Combine date and time columns and sort the data based on the resulting datetime values

In [21]:
# Define a function to clean and ensure consistent time formatting
def clean_time(time_str):
    try:
        # If the time contains microseconds, it's fine, return it as-is
        if '.' in time_str:
            return time_str
        else:
            # If the time is missing microseconds, add '.000000' for consistency
            return time_str + '.000000'
    except:
        return None

# Loop through moteid values from 1 to 54
for moteid in range(1, 55):  # Assuming moteid goes from 1 to 54
    # Construct the file path for each moteid
    file_path = f'../data/processed/unsorted/moteid_{moteid}.csv'
    
    # Read the CSV file
    try:
        df = pd.read_csv(file_path)
        
        # Apply the cleaning function to the 'time' column
        df['time'] = df['time'].apply(clean_time)
        
        # Combine 'date' and cleaned 'time' columns into a single 'datetime' column
        df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], errors='coerce')
        
        # Drop any rows where 'datetime' could not be parsed
        df = df.dropna(subset=['datetime'])
        
        # Sort the DataFrame by 'datetime'
        df_sorted = df.sort_values(by='datetime').reset_index(drop=True)

        # # Drop the 'datetime' column after sorting
        # df_sorted = df_sorted.drop(columns=['datetime'])
        
        # Save the sorted DataFrame back to a CSV file
        output_path = f'../data/processed/sorted/moteid_{moteid}.csv'
        df_sorted.to_csv(output_path, index=False)
        
        print(f"Processed and saved: {output_path}")
    
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue

Processed and saved: ../data/processed/sorted/moteid_1.csv
Processed and saved: ../data/processed/sorted/moteid_2.csv
Processed and saved: ../data/processed/sorted/moteid_3.csv
Processed and saved: ../data/processed/sorted/moteid_4.csv
Processed and saved: ../data/processed/sorted/moteid_5.csv
Processed and saved: ../data/processed/sorted/moteid_6.csv
Processed and saved: ../data/processed/sorted/moteid_7.csv
Processed and saved: ../data/processed/sorted/moteid_8.csv
Processed and saved: ../data/processed/sorted/moteid_9.csv
Processed and saved: ../data/processed/sorted/moteid_10.csv
Processed and saved: ../data/processed/sorted/moteid_11.csv
Processed and saved: ../data/processed/sorted/moteid_12.csv
Processed and saved: ../data/processed/sorted/moteid_13.csv
Processed and saved: ../data/processed/sorted/moteid_14.csv
Processed and saved: ../data/processed/sorted/moteid_15.csv
Processed and saved: ../data/processed/sorted/moteid_16.csv
Processed and saved: ../data/processed/sorted/mot