In [88]:
import polars as pl
import pandas as pd
import json

In [89]:
df_data = pl.read_json('data.json')
df2 = pd.read_json('data.json')
df_old_data = pl.read_excel(r'Backup\Lojas Assaí.xlsx')

In [90]:
def start_pipeline(dataf):
    return dataf.clone()

def drop_columns_new(dataf):
    dataf = dataf.drop('url', 'subRegiao', 'subRegiaoTid', 'telefone', 'televendas', 'horario', 'email', 'ico_sust', 'voce_encontra', 'destaques', 'eslug', 'eid', 'e', 'tid', 'whatsapp', 'mapa', 'complemento', 'nid', 'cep', 'loja_id')
    return dataf

def drop_columns_old(dataf):
    dataf = dataf.drop('Unnamed: 4', 'Código Município')
    return dataf

def strip_chars_new(dataf):
    dataf = dataf.with_columns(pl.col("n").str.strip_chars())
    return dataf

def strip_chars_old(dataf):
    dataf = dataf.with_columns(pl.col("Unidade").str.strip_chars())
    return dataf

def rename_columns(dataf):
    dataf = dataf.rename({'n':'Unidade', 'c':'Município', 'uf':'UF', 'lat':'LAT', 'lon':'LONG', 'logradouro':'Endereço'})
    dataf = dataf.cast({"LAT": pl.Float64, "LONG": pl.Float64})
    return dataf

def sort_columns(dataf):
    dataf = dataf.select(['Unidade','Endereço','Município','UF','LAT','LONG'])
    return dataf




In [91]:
df = (df_data
 .pipe(start_pipeline)
 .pipe(drop_columns_new)
 .pipe(strip_chars_new)
 .pipe(rename_columns)
 .pipe(sort_columns))

df_old = (df_old_data
 .pipe(start_pipeline)
 .pipe(strip_chars_old)
 .pipe(drop_columns_old))

In [92]:

df_lower = df
df_old_lower = df_old
# Values in 'column_a' from df1 but not in 'column_b' from df2
only_in_df1 = df.filter(~pl.col("Unidade").str.to_lowercase().is_in(df_old['Unidade'].str.to_lowercase())).with_columns(pl.lit("Nova").alias("status"))

# Values in 'column_b' from df2 but not in 'column_a' from df1
only_in_df2 = df_old.filter(~pl.col('Unidade').str.to_lowercase().is_in(df["Unidade"].str.to_lowercase())).with_columns(pl.lit("Fechou").alias("status"))

# Common values in both dataframes with 'Antigas' status
common_in_both = df.filter(pl.col("Unidade").str.to_lowercase().is_in(df_old['Unidade'].str.to_lowercase())).with_columns(pl.lit("Antigas").alias("status"))

# Combine the results into one DataFrame
df_diff = pl.concat([
    only_in_df1.rename({"Unidade": "value"}),
    only_in_df2.rename({"Unidade": "value"}),
    common_in_both.rename({"Unidade": "value"})
])



In [93]:
df_diff

value,Endereço,Município,UF,LAT,LONG,status
str,str,str,str,f64,f64,str
"""Assaí Zona Norte""","""Rua Tancredo Neves, 528""","""Macapá""","""AP""",0.06762,-51.05688,"""Nova"""
"""Assaí Manaus Bola da Suframa""","""Rua Francisco Pereira da Silva…","""Manaus""","""AM""",-3.131969,-59.985704,"""Nova"""
"""Assaí Salvador Paralela""","""Avenida Governador Luis Viana …","""Salvador""","""BA""",-12.96494,-38.43848,"""Nova"""
"""Assaí Cais do Porto""","""Av. José Sabóia, 521""","""Fortaleza""","""CE""",-3.71718,-38.46671,"""Nova"""
"""Assaí Montese""","""Avenida Dos Expedicionários, 4…","""Fortaleza""","""CE""",-3.753169,-38.537952,"""Nova"""
…,…,…,…,…,…,…
"""Assaí Taboão da Serra""","""Rodovia Regis Bittencourt, 340""","""Taboão da Serra""","""SP""",-23.613221,-46.781066,"""Antigas"""
"""Assaí Marginal Tietê - Tatuapé""","""Rua Ulisses Cruz, nº 993""","""Tatuapé""","""SP""",-23.529796,-46.578482,"""Antigas"""
"""Assaí Taubaté""","""Avenida Dom Pedro I, 630 E,""","""Taubaté""","""SP""",-23.024431,-45.55644,"""Antigas"""
"""Assaí Palmas""","""Avenida Joaquim Teotônio Segur…","""Palmas""","""TO""",-10.250802,-48.333348,"""Antigas"""


In [94]:
# df_diff = df.filter(~pl.col("column_a").is_in(pl.col("column_b")))
df.write_excel("lojasAssai.xlsx", worksheet='lojasSite')
df_diff.write_excel("lojasAssaiDiff.xlsx", worksheet='lojasSite')


<xlsxwriter.workbook.Workbook at 0x19be8c220f0>

In [95]:
import polars as pl
from geopy.distance import great_circle

# Create lists for the new columns, initialized with None for all rows
closest_values = [None] * df_diff.height
closest_distances = [None] * df_diff.height


# Filter for "Antigo" values
nova_df = df_diff.filter(pl.col("status") == "Nova")
antigo_df = df_diff.filter(pl.col("status") == "Antigas")

# Iterate through each "Antigo" row
for index, row in enumerate(nova_df.iter_rows(named=True)):
    current_coords = (row["LAT"], row["LONG"])
    # Calculate distances to other "Antigo" values
    distances_old = [
        (other_row["value"], great_circle(current_coords, (other_row["LAT"], other_row["LONG"])).kilometers)
        for other_row in antigo_df.iter_rows(named=True)
        if other_row["value"] != row["value"]  # Skip itself
    ]
    print(distances_old)
    # Find the closest "Antigo" value
    if distances_old:
        closest_value, min_distance = min(distances_old, key=lambda x: x[1])
        closest_values[index] = closest_value  # Set in the original position
        closest_distances[index] = min_distance
    # No need to append since we already initialized with None

# Add the new columns to the original DataFrame
df_diff = df_diff.with_columns([
    pl.Series("closest_value", closest_values),
    pl.Series("closest_distance_km", closest_distances)
])


df_diff.write_excel("lojasAssaiMenorDistancia.xlsx", worksheet='lojasSite')



[('Assaí Rio Branco', 2167.838172712017), ('Assaí Arapiraca', 1934.0558411385166), ('Assaí Maceió Farol', 2008.128259472093), ('Assaí Maceió Mangabeiras', 2012.2861650282905), ('Assaí Menino Marcelo', 2010.0733390610835), ('Assaí Tabuleiro', 2002.3679277710562), ('Assaí Macapá', 2483.0173507267386), ('Assaí Manaus Grande Circular', 1046.1891021710117), ('Assaí Manaus II', 1053.9416228373277), ('Assaí Manaus Torquato', 1055.4509147505837), ('Assaí Barreiras', 1514.1729420484412), ('Assaí Camaçari', 1997.938756900823), ('Assaí Feira de Santana', 1910.086031826536), ('Assaí Tomba', 1911.2117762654468), ('Assaí Guanambi', 1830.7572420625204), ('Assaí Ilhéus', 2121.91480862281), ('Assaí Itapetinga', 2482.9092090048625), ('Assaí Jequié', 1964.0213135233091), ('Assaí Juazeiro da Bahia', 1574.231328328615), ('Assaí Lauro de Freitas', 2011.7215569306104), ('Assaí Paulo Afonso', 1766.651900182989), ('Assaí Barris', 2004.5072170320898), ('Assaí Cabula', 2006.1146540270872), ('Assaí Cidade Baixa',

<xlsxwriter.workbook.Workbook at 0x19be88f72c0>

In [96]:
import pandas as pd
import plotly.express as px

df_diff = df_diff.with_columns(
    dummy_column_for_size = 1.
)
# Create the scatter mapbox
fig = px.scatter_mapbox(df_diff, 
                        lat="LAT", 
                        lon="LONG", 
                        hover_name="value",  # Shows when hovering over points
                        zoom=4,  # Zoom level
                        height=600,
                        
                        color='status',
                        size='dummy_column_for_size',
                        size_max=10,)

# Set mapbox style and access token (you can use 'open-street-map' without a token)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(mapbox_style="carto-positron")

# Make sure the map takes up the full width of the output area
fig.update_layout(
    autosize=True, # Set this to your preferred width (can be omitted if autosize works)
    height=1080,  # Set this to your preferred height
    margin={"r":0,"t":0,"l":0,"b":0}
)

# Show the interactive map
fig.show()
# Export the map as an HTML file
fig.write_html("localizacoesAssai.html")

# Optionally, export as an image (png, jpg, svg)
# fig.write_image("brazil_map_image.png", scale=3)