In [None]:
from google.colab import drive

drive.mount('/content/drive')
import os
import pandas as pd
from datetime import datetime

In [None]:
#Goal of the function below is to calculate the average CO2 and NO2 concentrations for a given data point at a specific timeframe

# Function to extract home ID from the file name
def extract_home_id(file_name):
    return file_name.replace('DataTable', '').replace('.csv', '')

# Function to calculate average concentrations for a given date, time range, and home ID
def calculate_average_concentrations(data, start_time, end_time):
    # Check if the required columns exist
    required_columns = ['Time', 'CO2_ETC_IN1', 'NO2_AQL_IN1']
    if not all(column in data.columns for column in required_columns):
        return None, None

    # Filter data based on the specified time range
    time_mask = (data['Time'].dt.time >= start_time) & (data['Time'].dt.time <= end_time)
    filtered_data = data[time_mask]

    # Calculate average concentrations for CO2 and NO2
    avg_co2 = filtered_data['CO2_ETC_IN1'].mean()
    avg_no2 = filtered_data['NO2_AQL_IN1'].mean()

    return avg_co2, avg_no2

# Specify the path to your IAQ_Monitoring folder
folder_path = '/content/drive/MyDrive/Yazan/data/'

# Specify the initial and end times
start_time = datetime.strptime('0:00:00', '%H:%M:%S').time()
end_time = datetime.strptime('7:00:00', '%H:%M:%S').time()

# Specify the date
date = '2017-03-29'

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['HomeID', 'Date', 'CO2', 'NO2'])

# Loop through the files in the specified folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv') and file_name.startswith('DataTable'):
        # Extract home ID from the file name
        home_id = extract_home_id(file_name)

        # Construct the file path
        file_path = os.path.join(folder_path, file_name)

        # Read the CSV file and explicitly parse the 'Time' column as datetime
        data = pd.read_csv(file_path, parse_dates=['Time'])

        # Check if the required columns exist in the file
        avg_co2, avg_no2 = calculate_average_concentrations(data, start_time, end_time)

        # If the required columns exist in the file
        if avg_co2 is not None and avg_no2 is not None:
            # Append the results to the DataFrame
            results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)

# Display the results
print(results_df)

  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home

   HomeID        Date         CO2       NO2
0     025  2017-03-29  411.736342  0.065259
1     028  2017-03-29  689.170003  0.051153
2     029  2017-03-29  649.184255  0.001987
3     030  2017-03-29  536.232779  0.061103
4     032  2017-03-29  694.531388  0.010958
5     034  2017-03-29  590.460808  0.008686
6     037  2017-03-29  511.217170  0.006065
7     038  2017-03-29  767.554462  0.008964
8     039  2017-03-29  769.329827  0.001206
9     040  2017-03-29  756.907363  0.002724
10    041  2017-03-29  617.482525  0.039026
11    042  2017-03-29  580.050221  0.102122
12    024  2017-03-29  530.907024  0.055314
13    026  2017-03-29  573.893790  0.009090
14    102  2017-03-29  712.175093  0.024091
15    104  2017-03-29  408.437648  0.029457
16    105  2017-03-29  577.315575  0.040518
17    107  2017-03-29  491.006108  0.054170
18    109  2017-03-29  503.625382  0.031272
19    112  2017-03-29  446.473702  0.018772
20    115  2017-03-29  475.444774  0.004600
21    121  2017-03-29  662.83966

  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)
  results_df = results_df.append({'HomeID': home_id, 'Date': date, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)


In [None]:
# this is another test point; the goal is to compute average NO2 and average CO2 concentrations

# Function to calculate average concentrations for a given date, time range, and home ID
def calculate_average_concentrations(data, start_time, end_time, dates, timestamp_column):
    # Convert the timestamp column to datetime format
    data[timestamp_column] = pd.to_datetime(data[timestamp_column], format='%Y-%m-%d %H:%M')

    # Filter data based on the specified date range
    filtered_data = data[data[timestamp_column].dt.date.isin(dates)]

    time_mask = (filtered_data[timestamp_column].dt.time >= start_time) & (filtered_data[timestamp_column].dt.time <= end_time)
    filtered_data = filtered_data[time_mask]

    # Calculate average concentrations for CO2 and NO2
    avg_co2 = filtered_data['CO2_ETC_IN1'].mean()
    avg_no2 = filtered_data['NO2_AQL_IN1'].mean()

    return avg_co2, avg_no2

# Specify the path to your IAQ_Monitoring folder
folder_path = '/content/drive/MyDrive/Yazan/data/'

# Specify the home ID
target_home_id = '024'

# Specify the initial and end times
start_time = datetime.strptime('0:00', '%H:%M').time()
end_time = datetime.strptime('7:00', '%H:%M').time()

# Specify the dates
dates = ['2017-02-18']

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['HomeID', 'Date', 'CO2', 'NO2'])

# Loop through the files in the specified folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv') and file_name.startswith(f'DataTable{target_home_id}'):

        # Construct the file path
        file_path = os.path.join(folder_path, file_name)

        # Read the CSV file
        data = pd.read_csv(file_path)

        # Calculate average concentrations for the specified parameters
        avg_co2, avg_no2 = calculate_average_concentrations(data, start_time, end_time, dates, 'Time')

        # Append the results to the DataFrame
        results_df = results_df.append({'HomeID': target_home_id, 'Date': dates, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)

# Display the results
print(results_df)



  HomeID          Date  CO2  NO2
0    024  [2017-02-18]  NaN  NaN


  results_df = results_df.append({'HomeID': target_home_id, 'Date': dates, 'CO2': avg_co2, 'NO2': avg_no2}, ignore_index=True)


In [None]:
# this is the complete list to analyze for all the timeframes and dates
#the goal is to calculate mean NO2 and CO2 concentrations

# Function to extract home ID from the file name
def extract_home_id(file_name):
    return file_name.replace('DataTable', '').replace('.csv', '')

# Function to calculate average concentrations for a given date, time range, and home ID
def calculate_average_concentrations(data, start_time, end_time):
    # Check if the required columns exist
    required_columns = ['Time', 'CO2_ETC_IN1', 'NO2_AQL_IN1']
    if not all(column in data.columns for column in required_columns):
        return None, None

    # Filter data based on the specified date
    data['Time'] = pd.to_datetime(data['Time'], format='%Y-%m-%d %H:%M:%S')

    # Extract date from the 'Time' column
    data['Date'] = data['Time'].dt.date

    # Filter data based on the specified time range
    time_mask = (data['Time'].dt.time >= start_time) & (data['Time'].dt.time <= end_time)
    filtered_data = data[time_mask]

    # Calculate average concentrations for CO2 and NO2
    avg_co2 = filtered_data['CO2_ETC_IN1'].mean()
    avg_no2 = filtered_data['NO2_AQL_IN1'].mean()

    return avg_co2, avg_no2

# Specify the path to your IAQ_Monitoring folder in Google Drive
folder_path = '/content/drive/MyDrive/Yazan/data/'

# Specify multiple dates
dates = ['2017-05-12', '2017-05-14', '2017-05-15', '2017-05-16', '2017-05-17', '2017-05-18',
         '2016-07-12', '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18',
         '2016-12-06', '2016-12-07', '2016-12-08', '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12',
         '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20',
         '2016-07-25', '2016-07-26', '2016-07-28', '2016-07-29', '2016-07-30', '2016-07-31', '2016-08-01',
         '2016-07-27', '2016-07-28', '2016-07-29', '2016-07-30', '2016-07-31', '2016-08-01', '2016-08-02',
         '2016-10-12', '2016-10-13', '2016-10-14', '2016-10-15', '2016-10-16', '2016-10-17', '2016-10-18',
         '2016-10-25', '2016-10-26', '2016-10-27', '2016-10-28', '2016-10-29', '2016-10-30', '2016-10-31',
         '2016-10-04', '2016-10-05', '2016-10-06', '2016-10-07', '2016-10-07', '2016-10-08', '2016-10-09',
         '2016-10-10', '2016-11-30', '2016-12-01', '2016-12-02', '2016-12-03', '2016-12-05', '2016-12-06',
         '2016-08-25', '2016-08-26', '2016-08-27', '2016-08-28', '2016-08-29', '2016-08-30', '2016-08-31',
         '2016-08-17', '2016-08-18', '2016-08-19', '2016-08-20', '2016-08-21', '2016-08-22', '2016-08-23',
         '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12', '2016-12-13', '2016-12-14', '2016-12-15',
         '2017-02-07', '2017-02-08', '2017-02-09', '2017-02-10', '2017-02-11', '2017-02-12', '2017-02-13',
         '2016-09-20', '2016-09-21', '2016-09-22', '2016-09-23', '2016-09-24', '2016-09-25', '2016-09-26',
         '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21', '2017-01-22', '2017-01-23',
         '2017-02-17', '2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21', '2017-02-22', '2017-02-23',
         '2017-03-23', '2017-03-24', '2017-03-25', '2017-03-26', '2017-03-27', '2017-03-28', '2017-03-29',
         '2017-04-18', '2017-04-19', '2017-04-20', '2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
         '2017-03-15', '2017-03-16', '2017-03-18', '2017-03-18', '2017-03-19', '2017-03-20', '2017-03-21',
         '2017-06-09', '2017-06-10', '2017-06-11', '2017-06-12', '2017-06-13', '2017-06-14', '2017-06-15',
         '2017-04-07', '2017-04-08', '2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12', '2017-04-13',
         '2017-05-03', '2017-05-04', '2017-05-05', '2017-05-06', '2017-05-07', '2017-05-08', '2017-05-09',
         '2017-05-26', '2017-05-27', '2017-05-28', '2017-05-29', '2017-05-30', '2017-05-31', '2017-06-01',
         '2018-02-20', '2018-02-21', '2018-02-22', '2018-02-23', '2018-02-24', '2018-02-25', '2018-02-26',
         '2017-11-27', '2017-11-28','2017-11-29', '2017-11-30', '2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
    '2017-12-05', '2017-12-06', '2018-01-05', '2018-01-04', '2018-01-06', '2018-01-07', '2018-01-08',
    '2018-01-09', '2018-01-10', '2018-01-15', '2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19',
    '2018-01-20', '2018-01-21', '2018-02-05', '2018-02-06', '2018-02-07', '2018-02-08', '2018-02-09',
    '2018-02-10', '2018-02-11', '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
    '2018-01-29', '2018-01-30', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-15', '2017-07-16',
    '2017-07-17', '2017-07-18', '2017-07-11', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-15',
    '2017-07-16', '2017-08-21', '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-26',
    '2017-08-27', '2017-09-18', '2017-09-19', '2017-09-20', '2017-09-21', '2017-09-22', '2017-09-23',
    '2017-09-24', '2017-11-02', '2017-11-03', '2017-11-04', '2017-11-05', '2017-11-06', '2017-11-07',
    '2018-11-08', '2017-09-27', '2017-09-28', '2017-09-29', '2017-09-30', '2017-10-01', '2017-10-02',
    '2017-10-03', '2017-09-05', '2017-09-06', '2017-09-07', '2017-09-08', '2017-09-09', '2017-09-10',
    '2017-09-11', '2017-10-19', '2017-10-20', '2017-10-21', '2017-10-22', '2017-10-23', '2017-10-24',
    '2017-10-25', '2017-08-11', '2017-08-12', '2017-08-13', '2017-08-14', '2017-08-15', '2017-08-16',
    '2017-08-17', '2017-11-11', '2017-11-12', '2017-11-13', '2017-11-14', '2017-11-15', '2017-11-16',
    '2017-11-17', '2017-12-20', '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24', '2017-12-25',
    '2017-12-26', '2018-01-22', '2018-01-23', '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27',
    '2018-01-28', '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20', '2018-01-21', '2018-01-22',
    '2018-01-23', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08', '2018-01-09',
    '2018-01-10', '2018-02-12', '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17',
    '2018-02-18', '2018-03-09', '2018-03-10', '2018-03-11', '2018-03-12', '2018-03-13', '2018-03-14',
    '2018-03-15', '2018-03-01', '2018-03-02', '2018-03-03', '2018-03-04', '2018-03-05', '2018-03-06',
    '2018-03-07', '2018-03-06', '2018-03-07', '2018-03-08', '2018-03-09', '2018-03-10', '2018-03-11',
    '2018-03-12', '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17', '2018-02-18',
    '2018-02-19', '2018-04-04', '2018-04-05', '2018-04-06', '2018-04-07', '2018-04-08', '2018-04-09',
    '2018-04-10', '2018-03-12', '2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17',
    '2018-03-18']

# Specify the time ranges
time_ranges = {
    '7-12': {'start': '07:00:00', 'end': '11:00:00'},
    '11-13': {'start': '11:00:00', 'end': '13:00:00'},
    '13-17': {'start': '13:00:00', 'end': '17:00:00'},
    '17-19': {'start': '17:00:00', 'end': '19:00:00'},
    '19-0': {'start': '19:00:00', 'end': '23:59:59'},
}

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=['HomeID', 'Date', 'TimeRange', 'CO2', 'NO2'])

# Iterate over dates and home IDs
for date in dates:
    for file_name in os.listdir(folder_path):
        if file_name.startswith('DataTable') and file_name.endswith('.csv'):
            home_id = extract_home_id(file_name)

            # Read the CSV file
            # Read the CSV file
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)

            # Filter data based on the specified date
            date_mask = data['Time'].str.contains(date)
            filtered_data = data[date_mask]

            # Iterate over time ranges
            for time_range, times in time_ranges.items():
                start_time = datetime.strptime(times['start'], '%H:%M:%S').time()
                end_time = datetime.strptime(times['end'], '%H:%M:%S').time()

                # Calculate average concentrations for the specified time range
                avg_co2, avg_no2 = calculate_average_concentrations(filtered_data, start_time, end_time)

                # Append the results to the DataFrame
                results_df = results_df.append({
                    'HomeID': home_id,
                    'Date': date,
                    'TimeRange': time_range,
                    'CO2': avg_co2,
                    'NO2': avg_no2
                }, ignore_index=True)


# Save the results to CSV file
results_df.to_csv('/content/drive/MyDrive/Yazan/average_concentrations_results.csv', index=False)

Mounted at /content/drive


  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = resul

ParserError: ignored

In [None]:
#dividing the datapoints into two datasets; first set is seen below.
#this is because dataset took too long to run the first time

# Function to extract home ID from the file name
def extract_home_id(file_name):
    return file_name.replace('DataTable', '').replace('.csv', '')

# Function to calculate average concentrations for a given date, time range, and home ID
def calculate_average_concentrations(data, start_time, end_time):
    # Check if the required columns exist
    required_columns = ['Time', 'CO2_ETC_IN1', 'NO2_AQL_IN1']
    if not all(column in data.columns for column in required_columns):
        return None, None

    # Filter data based on the specified time range
    time_mask = (data['Time'].dt.time >= start_time) & (data['Time'].dt.time <= end_time)
    filtered_data = data[time_mask]

    # Calculate average concentrations for CO2 and NO2
    avg_co2 = filtered_data['CO2_ETC_IN1'].mean()
    avg_no2 = filtered_data['NO2_AQL_IN1'].mean()

    return avg_co2, avg_no2

# Specify the path to your IAQ_Monitoring folder in Google Drive
folder_path = '/content/drive/MyDrive/Yazan/data/'

# Specify multiple dates
dates = ['2017-05-12', '2017-05-14', '2017-05-15', '2017-05-16', '2017-05-17', '2017-05-18',
         '2016-07-12', '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18',
         '2016-12-06', '2016-12-07', '2016-12-08', '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12',
         '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20',
         '2016-07-25', '2016-07-26', '2016-07-28', '2016-07-29', '2016-07-30', '2016-07-31', '2016-08-01',
         '2016-07-27', '2016-07-28', '2016-07-29', '2016-07-30', '2016-07-31', '2016-08-01', '2016-08-02',
         '2016-10-12', '2016-10-13', '2016-10-14', '2016-10-15', '2016-10-16', '2016-10-17', '2016-10-18',
         '2016-10-25', '2016-10-26', '2016-10-27', '2016-10-28', '2016-10-29', '2016-10-30', '2016-10-31',
         '2016-10-04', '2016-10-05', '2016-10-06', '2016-10-07', '2016-10-07', '2016-10-08', '2016-10-09',
         '2016-10-10', '2016-11-30', '2016-12-01', '2016-12-02', '2016-12-03', '2016-12-05', '2016-12-06',
         '2016-08-25', '2016-08-26', '2016-08-27', '2016-08-28', '2016-08-29', '2016-08-30', '2016-08-31',
         '2016-08-17', '2016-08-18', '2016-08-19', '2016-08-20', '2016-08-21', '2016-08-22', '2016-08-23',
         '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12', '2016-12-13', '2016-12-14', '2016-12-15',
         '2017-02-07', '2017-02-08', '2017-02-09', '2017-02-10', '2017-02-11', '2017-02-12', '2017-02-13',
         '2016-09-20', '2016-09-21', '2016-09-22', '2016-09-23', '2016-09-24', '2016-09-25', '2016-09-26',
         '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21', '2017-01-22', '2017-01-23',
         '2017-02-17', '2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21', '2017-02-22', '2017-02-23',
         '2017-03-23', '2017-03-24', '2017-03-25', '2017-03-26', '2017-03-27', '2017-03-28', '2017-03-29',
         '2017-04-18', '2017-04-19', '2017-04-20', '2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
         '2017-03-15', '2017-03-16', '2017-03-18', '2017-03-19', '2017-03-20', '2017-03-21',]


# Specify the time ranges
time_ranges = {
    '7-11': {'start': '07:00', 'end': '11:00'},
    '11-13': {'start': '11:00', 'end': '13:00'},
    '13-17': {'start': '13:00', 'end': '17:00'},
    '17-21': {'start': '17:00', 'end': '21:00'},
    '21-0': {'start': '21:00', 'end': '23:59'},
}

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=['HomeID', 'Date', 'TimeRange', 'CO2', 'NO2'])

# Iterate over dates and home IDs
for date in dates:
    for file_name in os.listdir(folder_path):
        if file_name.startswith('DataTable') and file_name.endswith('.csv'):
            home_id = extract_home_id(file_name)

            # Read the CSV file
            file_path = os.path.join(folder_path, file_name)

            # Add a check to ensure the file exists before reading
            if not os.path.exists(file_path):
                continue

            data = pd.read_csv(file_path)

            # Convert 'Time' column to datetime format with errors='coerce'
            data['Time'] = pd.to_datetime(data['Time'], errors='coerce')

            # Filter data based on the specified date and non-null 'Time' values
            date_mask = data['Time'].dt.strftime('%Y-%m-%d').str.contains(date) & data['Time'].notna()
            filtered_data = data[date_mask]

            if not filtered_data.empty:
                # Iterate over time ranges
                for time_range, times in time_ranges.items():
                    start_time = datetime.strptime(times['start'], '%H:%M').time()
                    end_time = datetime.strptime(times['end'], '%H:%M').time()

                    # Calculate average concentrations for the specified time range
                    avg_co2, avg_no2 = calculate_average_concentrations(filtered_data, start_time, end_time)

                    # Append the results to the DataFrame
                    results_df = results_df.append({
                        'HomeID': home_id,
                        'Date': date,
                        'TimeRange': time_range,
                        'CO2': avg_co2,
                        'NO2': avg_no2
                    }, ignore_index=True)

# Save the results to a CSV file
results_df.to_csv('/content/drive/MyDrive/Yazan/average_concentrations_results_1.csv', index=False)



  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = resul

In [None]:
#this is second portion of the dataset
#this is consistently because the complete list function takes long time to run

# Function to extract home ID from the file name
def extract_home_id(file_name):
    return file_name.replace('DataTable', '').replace('.csv', '')

# Function to calculate average concentrations for a given date, time range, and home ID
def calculate_average_concentrations(data, start_time, end_time):
    # Check if the required columns exist
    required_columns = ['Time', 'CO2_ETC_IN1', 'NO2_AQL_IN1']
    if not all(column in data.columns for column in required_columns):
        return None, None

    # Filter data based on the specified time range
    time_mask = (data['Time'].dt.time >= start_time) & (data['Time'].dt.time <= end_time)
    filtered_data = data[time_mask]

    # Calculate average concentrations for CO2 and NO2
    avg_co2 = filtered_data['CO2_ETC_IN1'].mean()
    avg_no2 = filtered_data['NO2_AQL_IN1'].mean()

    return avg_co2, avg_no2

# Specify the path to your IAQ_Monitoring folder in Google Drive
folder_path = '/content/drive/MyDrive/Yazan/data/'

# Specify multiple dates
dates = ['2017-06-09', '2017-06-10', '2017-06-11', '2017-06-12', '2017-06-13', '2017-06-14', '2017-06-15',
         '2017-04-07', '2017-04-08', '2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12', '2017-04-13',
         '2017-05-03', '2017-05-04', '2017-05-05', '2017-05-06', '2017-05-07', '2017-05-08', '2017-05-09',
         '2017-05-26', '2017-05-27', '2017-05-28', '2017-05-29', '2017-05-30', '2017-05-31', '2017-06-01',
         '2018-02-20', '2018-02-21', '2018-02-22', '2018-02-23', '2018-02-24', '2018-02-25', '2018-02-26',
         '2017-11-27', '2017-11-28','2017-11-29', '2017-11-30', '2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
    '2017-12-05', '2017-12-06', '2018-01-05', '2018-01-04', '2018-01-06', '2018-01-07', '2018-01-08',
    '2018-01-09', '2018-01-10', '2018-01-15', '2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19',
    '2018-01-20', '2018-01-21', '2018-02-05', '2018-02-06', '2018-02-07', '2018-02-08', '2018-02-09',
    '2018-02-10', '2018-02-11', '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
    '2018-01-29', '2018-01-30', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-15', '2017-07-16',
    '2017-07-17', '2017-07-18', '2017-07-11', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-15',
    '2017-07-16', '2017-08-21', '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-26',
    '2017-08-27', '2017-09-18', '2017-09-19', '2017-09-20', '2017-09-21', '2017-09-22', '2017-09-23',
    '2017-09-24', '2017-11-02', '2017-11-03', '2017-11-04', '2017-11-05', '2017-11-06', '2017-11-07',
    '2018-11-08', '2017-09-27', '2017-09-28', '2017-09-29', '2017-09-30', '2017-10-01', '2017-10-02',
    '2017-10-03', '2017-09-05', '2017-09-06', '2017-09-07', '2017-09-08', '2017-09-09', '2017-09-10',
    '2017-09-11', '2017-10-19', '2017-10-20', '2017-10-21', '2017-10-22', '2017-10-23', '2017-10-24',
    '2017-10-25', '2017-08-11', '2017-08-12', '2017-08-13', '2017-08-14', '2017-08-15', '2017-08-16',
    '2017-08-17', '2017-11-11', '2017-11-12', '2017-11-13', '2017-11-14', '2017-11-15', '2017-11-16',
    '2017-11-17', '2017-12-20', '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24', '2017-12-25',
    '2017-12-26', '2018-01-22', '2018-01-23', '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27',
    '2018-01-28', '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20', '2018-01-21', '2018-01-22',
    '2018-01-23', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08', '2018-01-09',
    '2018-01-10', '2018-02-12', '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17',
    '2018-02-18', '2018-03-09', '2018-03-10', '2018-03-11', '2018-03-12', '2018-03-13', '2018-03-14',
    '2018-03-15', '2018-03-01', '2018-03-02', '2018-03-03', '2018-03-04', '2018-03-05', '2018-03-06',
    '2018-03-07', '2018-03-06', '2018-03-07', '2018-03-08', '2018-03-09', '2018-03-10', '2018-03-11',
    '2018-03-12', '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17', '2018-02-18',
    '2018-02-19', '2018-04-04', '2018-04-05', '2018-04-06', '2018-04-07', '2018-04-08', '2018-04-09',
    '2018-04-10', '2018-03-12', '2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17',
    '2018-03-18']


# Specify the time ranges
time_ranges = {
    '7-11': {'start': '07:00', 'end': '11:00'},
    '11-13': {'start': '11:00', 'end': '13:00'},
    '13-17': {'start': '13:00', 'end': '17:00'},
    '17-21': {'start': '17:00', 'end': '21:00'},
    '21-0': {'start': '21:00', 'end': '23:59'},
}

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=['HomeID', 'Date', 'TimeRange', 'CO2', 'NO2'])


# Iterate over dates and home IDs
for date in dates:
    for file_name in os.listdir(folder_path):
        if file_name.startswith('DataTable') and file_name.endswith('.csv'):
            home_id = extract_home_id(file_name)

            # Read the CSV file
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)

            # Convert 'Time' column to datetime format with errors='coerce'
            data['Time'] = pd.to_datetime(data['Time'], errors='coerce')

            # Filter data based on the specified date and non-null 'Time' values
            date_mask = data['Time'].dt.strftime('%Y-%m-%d').str.contains(date) & data['Time'].notna()
            filtered_data = data[date_mask]

            if not filtered_data.empty:
                # Iterate over time ranges
                for time_range, times in time_ranges.items():
                    start_time = datetime.strptime(times['start'], '%H:%M').time()
                    end_time = datetime.strptime(times['end'], '%H:%M').time()

                    # Calculate average concentrations for the specified time range
                    avg_co2, avg_no2 = calculate_average_concentrations(filtered_data, start_time, end_time)

                    # Append the results to the DataFrame
                    results_df = results_df.append({
                        'HomeID': home_id,
                        'Date': date,
                        'TimeRange': time_range,
                        'CO2': avg_co2,
                        'NO2': avg_no2
                    }, ignore_index=True)

#to a CSV file
results_df.to_csv('/content/drive/MyDrive/Yazan/average_concentrations_results_2.csv', index=False)

  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = resul

In [None]:
# this is the complete list to analyze
#this is to compute the average PM2.5 concentrations for the entire dataset

# Function to extract home ID from the file name
def extract_home_id(file_name):
    return file_name.replace('DataTable', '').replace('.csv', '')

# Function to calculate average concentrations for a given date, time range, and home ID
def calculate_average_concentrations(data, start_time, end_time):
    # Check if the required columns exist
    required_columns = ['Time', 'PM_MET_IN1']
    if not all(column in data.columns for column in required_columns):
        return None, None

    # Filter data based on the specified date
    data['Time'] = pd.to_datetime(data['Time'], format='%m/%d/%Y %H:%M')

    # Extract date from the 'Time' column
    data['Date'] = data['Time'].dt.date

    # Filter data based on the specified time range
    time_mask = (data['Time'].dt.time >= start_time) & (data['Time'].dt.time <= end_time)
    filtered_data = data[time_mask]

    # Calculate average concentrations for CO2 and NO2
    avg_PM_in = filtered_data['PM_MET_IN1'].mean()

    return avg_PM_in

# Specify the path to your IAQ_Monitoring folder in Google Drive
folder_path = '/content/drive/MyDrive/Yazan/data/'

# Specify multiple dates
dates = ['2017-05-12', '2017-05-14', '2017-05-15', '2017-05-16', '2017-05-17', '2017-05-18',
         '2016-07-12', '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18',
         '2016-12-06', '2016-12-07', '2016-12-08', '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12',
         '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20',
         '2016-07-25', '2016-07-26', '2016-07-28', '2016-07-29', '2016-07-30', '2016-07-31', '2016-08-01',
         '2016-07-27', '2016-07-28', '2016-07-29', '2016-07-30', '2016-07-31', '2016-08-01', '2016-08-02',
         '2016-10-12', '2016-10-13', '2016-10-14', '2016-10-15', '2016-10-16', '2016-10-17', '2016-10-18',
         '2016-10-25', '2016-10-26', '2016-10-27', '2016-10-28', '2016-10-29', '2016-10-30', '2016-10-31',
         '2016-10-04', '2016-10-05', '2016-10-06', '2016-10-07', '2016-10-07', '2016-10-08', '2016-10-09',
         '2016-10-10', '2016-11-30', '2016-12-01', '2016-12-02', '2016-12-03', '2016-12-05', '2016-12-06',
         '2016-08-25', '2016-08-26', '2016-08-27', '2016-08-28', '2016-08-29', '2016-08-30', '2016-08-31',
         '2016-08-17', '2016-08-18', '2016-08-19', '2016-08-20', '2016-08-21', '2016-08-22', '2016-08-23',
         '2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12', '2016-12-13', '2016-12-14', '2016-12-15',
         '2017-02-07', '2017-02-08', '2017-02-09', '2017-02-10', '2017-02-11', '2017-02-12', '2017-02-13',
         '2016-09-20', '2016-09-21', '2016-09-22', '2016-09-23', '2016-09-24', '2016-09-25', '2016-09-26',
         '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21', '2017-01-22', '2017-01-23',
         '2017-02-17', '2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21', '2017-02-22', '2017-02-23',
         '2017-03-23', '2017-03-24', '2017-03-25', '2017-03-26', '2017-03-27', '2017-03-28', '2017-03-29',
         '2017-04-18', '2017-04-19', '2017-04-20', '2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
         '2017-03-15', '2017-03-16', '2017-03-18', '2017-03-18', '2017-03-19', '2017-03-20', '2017-03-21',
         '2017-06-09', '2017-06-10', '2017-06-11', '2017-06-12', '2017-06-13', '2017-06-14', '2017-06-15',
         '2017-04-07', '2017-04-08', '2017-04-09', '2017-04-10', '2017-04-11', '2017-04-12', '2017-04-13',
         '2017-05-03', '2017-05-04', '2017-05-05', '2017-05-06', '2017-05-07', '2017-05-08', '2017-05-09',
         '2017-05-26', '2017-05-27', '2017-05-28', '2017-05-29', '2017-05-30', '2017-05-31', '2017-06-01',
         '2018-02-20', '2018-02-21', '2018-02-22', '2018-02-23', '2018-02-24', '2018-02-25', '2018-02-26',
         '2017-11-27', '2017-11-28','2017-11-29', '2017-11-30', '2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',
    '2017-12-05', '2017-12-06', '2018-01-05', '2018-01-04', '2018-01-06', '2018-01-07', '2018-01-08',
    '2018-01-09', '2018-01-10', '2018-01-15', '2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19',
    '2018-01-20', '2018-01-21', '2018-02-05', '2018-02-06', '2018-02-07', '2018-02-08', '2018-02-09',
    '2018-02-10', '2018-02-11', '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
    '2018-01-29', '2018-01-30', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-15', '2017-07-16',
    '2017-07-17', '2017-07-18', '2017-07-11', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-15',
    '2017-07-16', '2017-08-21', '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-26',
    '2017-08-27', '2017-09-18', '2017-09-19', '2017-09-20', '2017-09-21', '2017-09-22', '2017-09-23',
    '2017-09-24', '2017-11-02', '2017-11-03', '2017-11-04', '2017-11-05', '2017-11-06', '2017-11-07',
    '2018-11-08', '2017-09-27', '2017-09-28', '2017-09-29', '2017-09-30', '2017-10-01', '2017-10-02',
    '2017-10-03', '2017-09-05', '2017-09-06', '2017-09-07', '2017-09-08', '2017-09-09', '2017-09-10',
    '2017-09-11', '2017-10-19', '2017-10-20', '2017-10-21', '2017-10-22', '2017-10-23', '2017-10-24',
    '2017-10-25', '2017-08-11', '2017-08-12', '2017-08-13', '2017-08-14', '2017-08-15', '2017-08-16',
    '2017-08-17', '2017-11-11', '2017-11-12', '2017-11-13', '2017-11-14', '2017-11-15', '2017-11-16',
    '2017-11-17', '2017-12-20', '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24', '2017-12-25',
    '2017-12-26', '2018-01-22', '2018-01-23', '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27',
    '2018-01-28', '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20', '2018-01-21', '2018-01-22',
    '2018-01-23', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08', '2018-01-09',
    '2018-01-10', '2018-02-12', '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17',
    '2018-02-18', '2018-03-09', '2018-03-10', '2018-03-11', '2018-03-12', '2018-03-13', '2018-03-14',
    '2018-03-15', '2018-03-01', '2018-03-02', '2018-03-03', '2018-03-04', '2018-03-05', '2018-03-06',
    '2018-03-07', '2018-03-06', '2018-03-07', '2018-03-08', '2018-03-09', '2018-03-10', '2018-03-11',
    '2018-03-12', '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16', '2018-02-17', '2018-02-18',
    '2018-02-19', '2018-04-04', '2018-04-05', '2018-04-06', '2018-04-07', '2018-04-08', '2018-04-09',
    '2018-04-10', '2018-03-12', '2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17',
    '2018-03-18']

# Specify the time ranges
time_ranges = {
    '7-12': {'start': '07:00:00', 'end': '11:00:00'},
    '11-13': {'start': '11:00:00', 'end': '13:00:00'},
    '13-17': {'start': '13:00:00', 'end': '17:00:00'},
    '17-19': {'start': '17:00:00', 'end': '19:00:00'},
    '19-0': {'start': '19:00:00', 'end': '23:59:59'},
}

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=['HomeID', 'Date', 'TimeRange', 'PM in'])

for date in dates:
    for file_name in os.listdir(folder_path):
        if file_name.startswith('DataTable') and file_name.endswith('.csv'):
            home_id = extract_home_id(file_name)

            # Read the CSV file
            file_path = os.path.join(folder_path, file_name)
            data = pd.read_csv(file_path)

            # Convert 'Time' column to datetime
            data['Time'] = pd.to_datetime(data['Time'], format='%m/%d/%Y %H:%M', errors='coerce')


            # Filter data based on the specified date
            date_mask = data['Time'].dt.date == datetime.strptime(date, '%Y-%m-%d').date()
            filtered_data = data[date_mask]

            # Iterate over time ranges
            for time_range, times in time_ranges.items():
                start_time = datetime.strptime(times['start'], '%H:%M:%S').time()
                end_time = datetime.strptime(times['end'], '%H:%M:%S').time()

                # Calculate average concentrations for the specified time range
                avg_PM_in = calculate_average_concentrations(filtered_data, start_time, end_time)

                # Append the results to the DataFrame
                results_df = results_df.append({
                    'HomeID': home_id,
                    'Date': date,
                    'TimeRange': time_range,
                    'PM-in': avg_PM_in
                }, ignore_index=True)

# Save the results to a CSV file
results_df.to_csv('/content/drive/MyDrive/Yazan/average_concentrations_PM2.5.csv', index=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df.append({
  results_df = results_df

KeyboardInterrupt: ignored