In [2]:
from typing import Dict, List, Tuple, Iterator, Literal, Union, Optional
from dataclasses import dataclass
from pathlib import Path
from datetime import date
import pandas as pd
import re

# Constantes

In [3]:
# Constantes para nombres de grupos de la regex
G_EST    : str = 'est'
G_VAL_571: str = 'val_571'
G_VAL_572: str = 'val_572'
G_VAL_573: str = 'val_573'
G_VAL_574: str = 'val_574'
G_VAL_83 : str = 'val_83'

SCALE: Dict[str, float] = {'6': 1e4, '8': 1e5, '0': 1e3}

# Paths y REGEX

In [26]:
PATH = Path("C:\\Users\\samuel.calderon\\Documents\\CodigoAjuste\\Python\\data\\L5_GSI_1")
L5Crudos = PATH / 'CRUDOS_L5'
L5Orden = PATH / 'Orden_L5' / 'L5_Principio_Linea;Fin_linea.txt'
crudos = [p for p in L5Crudos.iterdir() if p.is_file() is True]

RE_estacion_ini = re.compile(r''' ^(\d{6})\+           # 1: seis dígitos y '+'
(?P<id_v_ini>                                       # 2: token después del '+'
    (?:                                            #    alternancia excluyente
        (?=\d{8}(?=\s|$))(?!\d{0,5}00000)\d{8}      #    numérico: 8 dígitos y sin '0000'
      | (?=[A-Za-z0-9-]{3,8}(?=\s|$))              #    alfanumérico: 3–8, termina en espacio/fin
        (?=[A-Za-z0-9-]*[A-Za-z])                  #    contiene ≥1 letra
        [A-Za-z0-9-]{3,8}
    )
)
.*?                                                # separador
# ---- Bloque 83 (exactamente 15 caracteres) ----
(83\.\.(?P<corr_83_ini>\d)?(?P<esc_83_ini>\d)(?P<med_83_ini>[+-]\d{8}))
''', re.VERBOSE)

RE_FINAL_2 = re.compile(r"""
^
(?!.*\b57(?:3|4)\.\.)                   # ← basta con que aparezca '573..' o '574..' para invalidar
(\d{6}\+)
(?P<id_v>
    (?:\d{8}(?=\s|$)(?<!00000))|
    (?=(?P<tok>[A-Za-z0-9-]{3,8})(?=\s|$))(?=[A-Za-z0-9-]*[A-Za-z])(?P=tok)
)
.*?
(83\.\.[25](?P<code83>\d)(?P<med_83>[+-]\d{8}))
$
""", re.VERBOSE | re.DOTALL)

RE_FINAL = re.compile(r"""
^
(\d{6}\+)
(?P<id_v>
    (?:\d{8}(?=\s|$)(?<!00000))|
    (?=(?P<tok>[A-Za-z0-9-]{3,8})(?=\s|$))(?=[A-Za-z0-9-]*[A-Za-z])(?P=tok)
)
(?=.*\b573\.\.[068][+-]\d{8})(?=.*\b574\.\.[068][+-]\d{8})   # debe haber 573 y 574
.*?
(573\.\.(?P<esc_573>[068])(?P<med_573>[+-]\d{8}))
.*?
(574\.\.(?P<esc_574>[068])(?P<med_574>[+-]\d{8}))
.*?
(83\.\.[25](?P<code83>\d)(?P<med_83>[+-]\d{8}))
$
""", re.VERBOSE)

# VERT: Patron que estandariza los vertices XXXX-XXXX-09
VERT_RE = re.compile(r'^([A-Z0-9]+)([A-Z]{2,4})(\d{1,2})$')

In [15]:
@dataclass
class vert:
    nom : str # Debe ser el nombre estandarizado
    tipo : str # Inicio o Fin
    fecha : str # Extracción de la fecha
    dif_dist : float = 0.0 # Es la diferencia de distancias acumulada de v.atras y v.adelante
    dist_tot : float = 0.0 # Es la distancia total acumulada
    obs : float = 0.0 # Es el valor de altura obtenido 

# Estandarización de la Nomenclatura

In [9]:
# Función que estandariza las nomenclaturas
def vert_std(codigo:str) -> str:
    code_mayusc = codigo.replace("-", "").replace(" ", "").upper()
    m = VERT_RE.match(code_mayusc)
    if m:
        g1, g2, g3 = m.groups()
        return f"{g1}-{g2}-{g3}"
    
    return codigo

In [27]:
def pts_ini(linea : str):
    m_ini = RE_FINAL_2.search(linea.strip())
    if m_ini:
        print(vert_std(m_ini.group('id_v').lstrip('0')),float(m_ini.group('med_83'))/SCALE.get(m_ini.group('code83')))
        m_ini.group()
def read_lines(crudos : Path):
    for p in sorted(crudos.iterdir()):
        if p.is_file() is True:
            with open(p, "r") as f:
                ln = f.readlines()
                for lin in ln:
                    pts_ini(lin)
        else:
            continue

read_lines(L5Crudos)

6-CB-3 0.0
5-CB-3 0.0
4-CB-3 0.0
A3-CB-3 0.0
A2-CB-3 0.0
23660003 0.0
B70-CW-7 0.0
23660003 0.0
A2-CB-3 0.0
A3-CB-3 0.0
4-CB-3 0.0
5-CB-3 0.0
19-CB-3 0.0
23189005 0.0
18-CB-3 0.0
23189004 0.0
A16-CB-3 0.0
23189003 0.0
6-CB-3 0.0
23189001 0.0
8-CB-3 0.0
9-CB-3 0.0
10-CB-3 0.0
23189002 0.0
12-CB-3 0.0
13-CB-3 0.0
12-CB-3 0.0
23189002 0.0
10-CB-3 0.0
9-CB-3 0.0
8-CB-3 0.0
23189001 0.0
13-CB-3 0.0
23189003 0.0
A16-CB-3 0.0
23189004 0.0
18-CB-3 0.0
23189005 0.0
19-CB-3 0.0
23162002 0.0
23162001 0.0
28-CB-3 0.0
27-CB-3 0.0
A26-CB-3 0.0
A25-CB-3 0.0
15-TE-2 0.0
A20-CB-3 0.0
A21-CB-3 0.0
A22-CB-3 0.0
23-CB-3 0.0
23189006 0.0
23189006 0.0
23-CB-3 0.0
A22-CB-3 0.0
A21-CB-3 0.0
A20-CB-3 0.0
15-TE-2 0.0
A25-CB-3 0.0
A26-CB-3 0.0
27-CB-3 0.0
28-CB-3 0.0
23162001 0.0
23162002 0.0
44-CB-3 0.0
23001003 0.0
A42-CB-3 2.2618
23001002 0.0
A40-CB-3 0.0
23001001 0.0
23162003 0.0
23162004 0.0
23162005 0.0
23162006 0.0
A38-CB-3 0.0
39-CB-3 0.0
23001001 0.0
23001005 0.0
23001004 0.0
39-CB-3 0.0
A38-CB-3 0.0
23