# Table to Measurement Converter
__authors__: ufukbombar@gmail.com

__date__: 2024-11-18

__goal__: This notebook converts the table names into measurement data.

In [2]:
# Required packages
%pip install pandas requests python-dotenv httpx aiofiles dataclasses tqdm pyarrow

# External imports
import pandas as pd 
from dataclasses import dataclass
import json
from datetime import datetime
from tqdm.asyncio import tqdm
import os

# Internal imports
from config import config
import common
import file as ff

Note: you may need to restart the kernel to use updated packages.


In [3]:
# This is the output of table_downloader
input_tables_name = "../data/tables/tables-2024-11-18 20:10:39.feather"

In [8]:
@dataclass
class MeasurementTable():
    measurement_uuid: str 
    type: str 
    cleaned: bool 
    prefix: str 
    agent_uuid: str 
    table_name: str 
    retrieved: datetime
    processed: datetime

async def main():
    input_tables_df = pd.read_feather(input_tables_name)

    measurement_tables = []

    for index, row in tqdm(input_tables_df.iterrows()):
        table_name = row['table_name']
        creation_date = row['creation_date']

        split_array = table_name.split("__")
        if len(split_array) != 3:
            continue

        prefix, measurement_uuid, agent_uuid = split_array
        
        measurement_tables.append(MeasurementTable(
            measurement_uuid=measurement_uuid,
            agent_uuid=agent_uuid,
            prefix=prefix,
            cleaned="clean" in prefix,
            table_name=table_name,
            type=prefix.replace("cleaned_", ""),
            retrieved=creation_date,
            processed=datetime.now()
        ))

    return pd.DataFrame(measurement_tables)

measurement_tables_df = await main()
measurement_tables_df

0it [00:00, ?it/s]

100555it [00:01, 76402.11it/s]


Unnamed: 0,measurement_uuid,type,cleaned,prefix,agent_uuid,table_name,retrieved,processed
0,007046a9_518e_46cb_8e70_e598b8bce831,links,True,cleaned_links,400a3c9b_57ed_4315_9489_917e601f3604,cleaned_links__007046a9_518e_46cb_8e70_e598b8b...,2024-11-18 20:10:38.865040,2024-11-18 20:36:54.249173
1,007046a9_518e_46cb_8e70_e598b8bce831,links,True,cleaned_links,4f6137c8_dfc5_4043_8005_d617407b7be3,cleaned_links__007046a9_518e_46cb_8e70_e598b8b...,2024-11-18 20:10:38.865040,2024-11-18 20:36:54.249229
2,007046a9_518e_46cb_8e70_e598b8bce831,links,True,cleaned_links,51fb56f4_5217_4108_a212_2d05806b16dc,cleaned_links__007046a9_518e_46cb_8e70_e598b8b...,2024-11-18 20:10:38.865040,2024-11-18 20:36:54.249250
3,007046a9_518e_46cb_8e70_e598b8bce831,links,True,cleaned_links,5bdab212_3d82_4eca_8c5a_39e1c8ded842,cleaned_links__007046a9_518e_46cb_8e70_e598b8b...,2024-11-18 20:10:38.865040,2024-11-18 20:36:54.249270
4,007046a9_518e_46cb_8e70_e598b8bce831,links,True,cleaned_links,70ce196e_9cb7_4467_b1a8_c40400f6f5df,cleaned_links__007046a9_518e_46cb_8e70_e598b8b...,2024-11-18 20:10:38.865040,2024-11-18 20:36:54.249287
...,...,...,...,...,...,...,...,...
100548,ffd2d86c_05c1_4607_a5b1_4ab04b77ee41,results,False,results,e05ddb8f_c572_4d28_9ffb_8e24bbf2027a,results__ffd2d86c_05c1_4607_a5b1_4ab04b77ee41_...,2024-11-18 20:10:38.865040,2024-11-18 20:36:55.489539
100549,ffd9d901_2127_4048_8f9a_805e3fa4930c,results,False,results,ddd8541d_b4f5_42ce_b163_e3e9bfcd0a47,results__ffd9d901_2127_4048_8f9a_805e3fa4930c_...,2024-11-18 20:10:38.865040,2024-11-18 20:36:55.489550
100550,ffebc346_5c58_4d73_8866_1746dedadc50,results,False,results,ddd8541d_b4f5_42ce_b163_e3e9bfcd0a47,results__ffebc346_5c58_4d73_8866_1746dedadc50_...,2024-11-18 20:10:38.865040,2024-11-18 20:36:55.489562
100551,ffec2848_65c7_44d1_8f15_8bd08e2d41f0,results,False,results,ddd8541d_b4f5_42ce_b163_e3e9bfcd0a47,results__ffec2848_65c7_44d1_8f15_8bd08e2d41f0_...,2024-11-18 20:10:38.865040,2024-11-18 20:36:55.489573


In [10]:
ff.save_dataframe_feather(measurement_tables_df, "../data/measurements")
print("Done")

Done
