In [30]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import os
import pandas as pd
from datetime import datetime

# Función para convertir coordenadas en formato E7 a valores decimales
def e7_to_decimal(e7_value):
    if e7_value is None:
        return None
    return e7_value * 1e-7

# Función para formatear coordenadas en formato decimal a español con comas
def format_coordinates(latitude, longitude):
    if latitude is None or longitude is None:
        return None
    return f"{latitude:.7f}, {longitude:.7f}"

# Función para convertir una cadena ISO8601 en un objeto datetime
def iso8601_to_datetime(iso8601_string):
    if iso8601_string is None:
        return None
    try:
        return datetime.strptime(iso8601_string, '%Y-%m-%dT%H:%M:%S.%fZ')
    except ValueError:
        return datetime.strptime(iso8601_string, '%Y-%m-%dT%H:%M:%SZ')

# Función para calcular la duración formateada
def format_duration(start_timestamp, end_timestamp):
    if start_timestamp is None or end_timestamp is None:
        return None
    
    # Calcular la diferencia en segundos
    time_difference = (end_timestamp - start_timestamp).total_seconds()
    
    # Calcular los componentes de la duración en días, horas, minutos y segundos
    days = int(time_difference // (24 * 3600))
    time_difference %= 24 * 3600
    hours = int(time_difference // 3600)
    time_difference %= 3600
    minutes = int(time_difference // 60)
    seconds = int(time_difference % 60)
    
    # Formatear la duración en el formato "DD:HH:MM:SS"
    duration_formatted = f"{days:02d}:{hours:02d}:{minutes:02d}:{seconds:02d}"
    return duration_formatted

# Función para convertir el timestamp en el momento del día
def timestamp_to_moment(timestamp):
    if timestamp is None:
        return None
    hour = timestamp.hour
    if 5 <= hour < 12:
        return "mañana"
    elif 12 <= hour < 16:
        return "medio día"
    elif 16 <= hour < 21:
        return "tarde"
    else:
        return "noche"

# Función para agrupar la distancia en bloques
def group_distance(distance):
    if pd.isna(distance):
        return "desconocido"
    elif distance < 1000:
        return "menos de 1km"
    elif 1000 <= distance < 10000:
        return "de 1km a 10km"
    elif 10000 <= distance < 100000:
        return "de 10km a 100km"
    else:
        return "más de 100km"

  # Carpeta raíz que contiene las carpetas "2014" hasta "2023"
root_folder = '/Users/valentincortespuya/Downloads/Takeout/Historial de ubicaciones/Semantic Location History'

    
  
 # Lista para almacenar todos los DataFrames resultantes
all_dfs = []

# Recorrer todas las carpetas desde "2022" hasta "2023"
for year in range(2021, 2024):
    year_folder = os.path.join(root_folder, str(year))
    if os.path.exists(year_folder):
        # Recorrer todas las carpetas y subcarpetas dentro de cada año
        for dirpath, dirnames, filenames in os.walk(year_folder):
            for filename in filenames:
                if filename.endswith('.json'):
                    # Leer el archivo JSON y cargarlo como un DataFrame
                    file_path = os.path.join(dirpath, filename)
                    df = pd.read_json(file_path)

                    # Extraer columnas 'timelineObjects' y expandir en un nuevo DataFrame
                    df2 = df['timelineObjects'].apply(pd.Series)

                    # Eliminar filas con NaN en la columna 'activitySegment'
                    df2_cleaned = df2.dropna(subset=['activitySegment'])

                    # Aplicar las funciones de extracción y transformación de datos
                    df_extracted = df2_cleaned['activitySegment'].apply(extract_data)
                    df_extracted['momento_del_dia'] = df_extracted['start_timestamp'].apply(timestamp_to_moment)
                    df_extracted['distance_group'] = df_extracted['distance'].apply(group_distance)

                    # Concatenar las columnas extraídas con el DataFrame original
                    df_final = pd.concat([df2_cleaned, df_extracted], axis=1)

                    # Eliminar la columna original 'activitySegment' que contenía los datos completos (opcional)
                    df_final = df_final.drop(columns=['activitySegment'])

                    # Agregar el DataFrame resultante a la lista
                    all_dfs.append(df_final)

# Concatenar todos los DataFrames en uno solo
df_combined = pd.concat(all_dfs, ignore_index=True)

# Mostrar el DataFrame resultante
df_combined
# Mapeo de los valores de la columna 'activity_type'
mapping = {
    'IN_PASSENGER_VEHICLE': 'COCHE',
    'IN_BUS': 'BUS',
    'IN_TRAIN': 'TREN',
    'WALKING': 'ANDANDO',
    'IN_SUBWAY': 'METRO',
    'UNKNOWN_ACTIVITY_TYPE': 'OTROS',
    'FLYING': 'AVION',
    'CYCLING': 'BICI',
    'IN_FERRY': 'BARCO'
}

# Reemplazar los valores en la columna 'activity_type'
df_combined['activity_type'] = df_combined['activity_type'].replace(mapping)


# Exportar el DataFrame combinado a un archivo CSV
df_combined.to_csv("ubicaciones_historicas.csv", index=False)


In [25]:
!pip install folium



In [26]:
import folium

# Crear un mapa centrado en una ubicación específica
map_center = [40.468159, -3.875562]
m = folium.Map(location=map_center, zoom_start=10)

# Iterar a través del DataFrame y agregar marcadores al mapa
for index, row in df_combined.iterrows():
    start_lat = row['start_latitude']
    start_lon = row['start_longitude']
    
    if not pd.isna(start_lat) and not pd.isna(start_lon):
        folium.Marker([start_lat, start_lon]).add_to(m)

# Guardar el mapa como un archivo HTML
m.save('mapa.html')


In [27]:
import folium

# Crear un mapa centrado en una ubicación específica
map_center = [40.468159, -3.875562]
m = folium.Map(location=map_center, zoom_start=10)

# Iterar a través del DataFrame y agregar marcadores al mapa
for index, row in df_combined.iterrows():
    start_lat = row['start_latitude']
    start_lon = row['start_longitude']
    end_lat = row['end_latitude']
    end_lon = row['end_longitude']
    
    if not pd.isna(start_lat) and not pd.isna(start_lon) and not pd.isna(end_lat) and not pd.isna(end_lon):
        folium.Marker([start_lat, start_lon]).add_to(m)
        folium.Marker([end_lat, end_lon]).add_to(m)
        folium.PolyLine([(start_lat, start_lon), (end_lat, end_lon)], color="blue").add_to(m)

# Guardar el mapa como un archivo HTML
m.save('mapa_con_lineas_filtro-trasnporte.html')


In [28]:
df_combined


Unnamed: 0,placeVisit,start_latitude,start_longitude,end_latitude,end_longitude,start_timestamp,end_timestamp,duration_formatted,distance,activity_type,confidence,momento_del_dia,distance_group
0,,40.475175,-3.880849,40.452713,-3.881988,2022-05-01 10:29:39.080,2022-05-01 10:45:52.101,00:00:16:13,4742.0,COCHE,HIGH,mañana,de 1km a 10km
1,,40.452594,-3.882202,40.434205,-3.800727,2022-05-01 11:15:59.053,2022-05-01 11:32:25.803,00:00:16:26,8296.0,COCHE,HIGH,mañana,de 1km a 10km
2,,40.434375,-3.797649,40.425776,-3.778815,2022-05-01 11:49:38.000,2022-05-01 11:54:53.000,00:00:05:15,2137.0,COCHE,HIGH,mañana,de 1km a 10km
3,,40.426226,-3.779403,40.455316,-3.860894,2022-05-01 17:32:03.000,2022-05-01 17:45:08.236,00:00:13:05,9366.0,COCHE,HIGH,tarde,de 1km a 10km
4,,40.455526,-3.859239,40.475243,-3.880705,2022-05-01 17:53:59.000,2022-05-01 18:00:09.000,00:00:06:10,3146.0,COCHE,HIGH,tarde,de 1km a 10km
5,,40.474498,-3.88034,40.502655,-3.8889,2022-05-02 11:41:39.207,2022-05-02 11:49:29.341,00:00:07:50,3563.0,COCHE,HIGH,mañana,de 1km a 10km
6,,40.502577,-3.889105,40.474849,-3.880375,2022-05-02 13:35:15.000,2022-05-02 13:43:38.247,00:00:08:23,4271.0,COCHE,HIGH,medio día,de 1km a 10km
7,,40.474846,-3.880455,40.454474,-3.873495,2022-05-03 04:53:24.548,2022-05-03 04:59:52.000,00:00:06:27,2411.0,COCHE,HIGH,noche,de 1km a 10km
8,,40.455025,-3.87325,40.475057,-3.845368,2022-05-03 06:06:07.743,2022-05-03 06:18:48.243,00:00:12:40,3433.0,BUS,LOW,mañana,de 1km a 10km
9,,40.47413,-3.845639,40.400304,-3.692619,2022-05-03 06:27:54.000,2022-05-03 06:48:28.000,00:00:20:34,15631.0,TREN,MEDIUM,mañana,de 10km a 100km
