In [14]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\uma_i\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\uma_i\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [15]:
import pandas as pd
import os
import numpy as np
from unidecode import unidecode
import json

def create_dataframe(path,public = False):
    total_df = pd.DataFrame()
    folders = os.listdir(path)
    for folder in folders:
        files = os.listdir(os.path.join(path, folder))
        for file in files:
            if file.endswith(".json"):
                df = pd.read_json(os.path.join(path,folder,file))
                df['provenienteDe'] = folder
                df['actor'] = "servidor publico" if public else "particular"   
                total_df = pd.concat([total_df,df])
    return  total_df


def get_nested(data, keys):
    data = dict(data)
    for key in keys[:-1]:
        if not isinstance(data, dict):
            return None
        data = data.get(key, None)
    last_key = keys[-1] if keys else None
    return data.get(last_key) if isinstance(data, dict) and last_key in data else None

def get_resolution_value(resolution):
    options = {
        0: ("dato no proporcionado",0),
        1: ("absolutoria",0),
        2: ("sancionatoria",5),
        3: ("sancionatoria con multa",7),
        4: ("sancionatoria con multa e inhabilitacion",10),
        5: ("sentencia condenatoria",10),
        6: ("sancionatoria economica",4)
    }
    if resolution is None:
        return options[0]
    resolution = unidecode(resolution.lower())
    if resolution == " " or resolution == "dato no proporcionado":
        return options[0]
    if resolution == "absolutoria":
        return options[1]
    if resolution == "sancionatoria":
        return options[2]
    if resolution.find("inhabilitac") != -1:
        return options[4]
    if resolution.find("economica") != -1:
        return options[3]
    if resolution.find("multa") != -1:
        return options[3]
    if resolution == "sentencia condenatoria":
        return options[5]
    return options[0]
    
    
def get_act_value(act):
    options= {
        "dato no proporcionado": ("dato no proporcionado",0),
        "delito cometido por particular": ("delito cometido por particular",8.1),
        "denuncia": ("denuncia",3.6),
        "licitacion publica": ("licitacion publica",6.4),
        "soborno": ("soborno",4.9),
        "NOPR": ("dato no proporcionado",0)
    }
    if act is None or isinstance(act,float):
        return options["NOPR"]
    act = unidecode(act.lower())
    return options.get(act,options["NOPR"])

def get_sanctions_values(sanctions):
    options = {
        "I": ("inhabilitado",10),
        "M": ("multado",3.6),
        "S": ("suspension del empleo, cargo o comision",4.9),
        "D": ("destitucion del empleo, cargo o comision",8.1),
        "O": ("otro",2.5),
        "IRSC": ("indeminizacion resarcitoria",6.4),
        "SE": ("sancion economica",4.9),
        "NOPR": ("dato no proporcionado",0),
    }

    if sanctions is None or isinstance(sanctions, float) or len(sanctions) == 0:
        return [options["NOPR"]]
    
    results = [options.get(sanction.get("clave"), options["NOPR"]) for sanction in sanctions]
    strings = []
    mean = []
    for res in results:
        strings.append(res[0])
        mean.append(res[1])
    return (', '.join(strings),np.mean(mean))

def scalar_inh_days(days):
    if 0 <= days <= 190:
        return 2.5 * (days / 190)
    elif 191 <= days <= 365:
        return 2.5 + (5 - 2.5) * ((days - 191) / (365 - 191)) 
    elif 366 <= days <= 36500:
        return 5 + (7.5 - 5) * ((days - 366) / (36500 - 366)) 
    elif 36501 <= days <= 365000:
        return 7.5  # Valor fijo de 7.5 en este rango
    elif 365001 <= days <= 723000:
        return 7.5 + (10 - 7.5) * ((days - 365001) / (723000 - 365001))
    elif days > 723000:
        return 10
    return 0


def get_inh_days_value(inhabilitacion):
    if inhabilitacion is None or isinstance(inhabilitacion,float):
        return (0,0)
    
    if('fechaInicial' in inhabilitacion and 'fechaFinal' in inhabilitacion and 'plazo' in inhabilitacion):
        if(inhabilitacion['fechaInicial'] != "-" and inhabilitacion['fechaInicial'] != " " and inhabilitacion['fechaFinal'] != "-" and inhabilitacion['fechaFinal'] != " "):
            try:
                fechaInicial = pd.to_datetime(inhabilitacion['fechaInicial'])
                fechaFinal = pd.to_datetime(inhabilitacion['fechaFinal'])
                duracion = (fechaFinal - fechaInicial).days
                if(type(duracion) != float):
                    return (duracion,scalar_inh_days(duracion))
            except Exception as e:
                return (0,0)
        else:
            return (0,0)
    elif('plazo' in inhabilitacion):
        if inhabilitacion['plazo'] != 'Dato no proporcionado' and inhabilitacion['plazo'] != "":
            if isinstance(inhabilitacion['plazo'],float):
                return (inhabilitacion['plazo'],scalar_inh_days(inhabilitacion['plazo']))
            else:
                return (0,0)
        else:
            return (0,0)
    else:
        return (0,0)
    return (0,0)
    
def get_fine_value(fine):
    if fine is None or isinstance(fine,float):
        return (0,0)
    if 'monto' in fine:
        if fine['monto'] is None:
            return (0,0)
        else:
            return (fine['monto'],min(fine['monto']/100000,10))
    else:
        return (0,0)        


def get_faults_value(fault):
    options = {
        "ABCI": ("actuacion bajo conflicto de intereses",4.9),
        "AFN": ("abuso de funciones",6.4),
        "AG": ("administrativa grave",6.4),
        "ANG": ("administrativa no grave",0.9),
        "ASEX": ("cometer o tolerar conductas de acoso sexual",8.1),
        "AUT": ("abuso de autoridad",6.4),
        "CEX": ("cohecho o extorsion",10),
        "DRP": ("desvio de recursos publicos",10),
        "DSCT": ("desacato",2.5),
        "HSEX": ("cometer o tolerar conductas de hostigamiento sexual",9.2),
        "IDSP": ("incumplimiento en declaracion de situacion patrimonial",3.6),
        "NAD": ("negligencia administrativa",0.9),
        "OJUST": ("obstruccion de la justicia",10),
        "OTRO": ("otro",0.4),
        "PEC": ("peculado",10),
        "VDH": ("violacion a los derechos humanos",10),
        "VLNP": ("violacion leyes y normatividad presupuestal",6.4),
        "VPC": ("violacion procedimientos de contratacion",4.9),
        "DCSP": ("delito cometido por servidores publicos",8.1),
        "EIFM": ("ejercicio indebido de sus funciones en materia migratoria",6.4),
        "UII": ("utilizacion indebida de informacion",6.4),
        "CIND": ("contratacion indebida",6.4),
        "EOCI": ("enriquecimiento oculto u ocultamiento de conflicto de interes",8.1),
        "TINF": ("trafico de influencias",8.1),
        "ENCB": ("encubrimiento",4.9),
        "NOPR": ("dato no proporcionado",0),
    }
    if fault is None or isinstance(fault,float):
       return options["NOPR"]
    return options.get(fault,options["NOPR"])




def get_charge(charge):
    charges = {
    "auxiliar": 1,
    "jef": 5,
    "policia": 4,
    "agente": 4,
    "general": 3,
    "director": 8,
    "administrativ": 3,
    "tecnic": 2,
    "operativ": 2,
    "especialista": 6,
    "president": 10,
    "medic": 4,
    "secretari": 7,
    "subdirector": 9,
    "fiscal": 10,
    "oficinist":2,
    "supervisor":3,
    "investigador":2,
    "asesor":3,
    "analist":4,
    }
    if charge is None or isinstance(charge,float):
        return ("dato no proporcionado",0)
    stop_words = set(stopwords.words('spanish'))
    words = word_tokenize(charge.lower())
    filtered_words = [unidecode(word) for word in words if word.isalpha() and word not in stop_words]
    ratings = [
    charges[key]
    for key in charges.keys() 
    for word in filtered_words 
    if key in word
    ]
    if len(ratings)== 0:
        return (charge,0)
    
    return (charge,np.mean(ratings))


## S3P

In [16]:
s3p_df = create_dataframe("s3p",public=False)

In [17]:
s3p_new_data = {}
for index, row in s3p_df.iterrows():
    name_sanctioned = row['particularSancionado']['nombreRazonSocial']
    
    resolucion = get_resolution_value(get_nested(row, ['resolucion', 'sentido']))
    acto = get_act_value(get_nested(row, ['acto']))
    sancion = get_sanctions_values(get_nested(row, ['tipoSancion']))
    dias_inhabilitado = get_inh_days_value(get_nested(row, ['inhabilitacion']))
    multa = get_fine_value(get_nested(row, ['multa']))
    suma_total = (
        0.5 * resolucion[1] +
        0.3 * acto[1] +
        0.1 * multa[1] +
        0.1 * dias_inhabilitado[1]
    )
    
    if name_sanctioned in s3p_new_data:
        s3p_new_data[name_sanctioned]['resoluciones'].append(resolucion)
        s3p_new_data[name_sanctioned]['actos'].append(acto)
        s3p_new_data[name_sanctioned]['sanciones'].append(sancion)
        s3p_new_data[name_sanctioned]['diasInhabilitado'].append(dias_inhabilitado)
        s3p_new_data[name_sanctioned]['multas'].append(multa)
        s3p_new_data[name_sanctioned]['cantidadReportes'] += 1
        
        total_reportes = s3p_new_data[name_sanctioned]['cantidadReportes']
        s3p_new_data[name_sanctioned]['sumaTotal'] += (
            0.5 * s3p_new_data[name_sanctioned]['resoluciones'][total_reportes-1][1] +
            0.3 * s3p_new_data[name_sanctioned]['actos'][total_reportes-1][1] +
            0.1 * s3p_new_data[name_sanctioned]['multas'][total_reportes-1][1] +
            0.1 * s3p_new_data[name_sanctioned]['diasInhabilitado'][total_reportes-1][1]
        ) 
    else:
        s3p_new_data[name_sanctioned] = {
            'resoluciones': [resolucion],
            'actos': [acto],
            'sanciones': [sancion],
            'diasInhabilitado': [dias_inhabilitado],
            'multas': [multa],
            'actor': row['actor'],
            'provenienteDe': row['provenienteDe'],
            'sumaTotal': suma_total,
            'cantidadReportes': 1,
            'nombre':name_sanctioned
        }
    

### Validación

In [18]:
actos  = 0
resoluciones = 0
multas = 0
inhn = 0
total = 0
values = list(s3p_new_data.items())[:10]
for value in values:
    print(value)
    total += 1
    primer_acto = 0
    for acto in value[1]['actos']:   
        if acto[0] == 'dato no proporcionado':
            #if(primer_acto >= 1):
                #print(value[0])
            actos += 1
        primer_acto += 1
    for resolucion in value[1]['resoluciones']:
        if resolucion[0] == 'dato no proporcionado':
            resoluciones += 1
    for multa in value[1]['multas']:
        if multa[0] == 0:
            multas += 1
    for dias in value[1]['diasInhabilitado']:
        if dias[0] == 0:
            inhn += 1

('ABRAHAM ERNESTO COSIO TORRES', {'resoluciones': [('sentencia condenatoria', 10)], 'actos': [('delito cometido por particular', 8.1)], 'sanciones': [('inhabilitado, multado, sancion economica', 6.166666666666667)], 'diasInhabilitado': [(5540, 5.357973100127304)], 'multas': [(3047, 0.03047)], 'actor': 'particular', 'provenienteDe': 'BAJA CALIFORNIA SUR', 'sumaTotal': 7.96884431001273, 'cantidadReportes': 1, 'nombre': 'ABRAHAM ERNESTO COSIO TORRES'})
('ALVARO CAMPOS OROZCO', {'resoluciones': [('sentencia condenatoria', 10)], 'actos': [('delito cometido por particular', 8.1)], 'sanciones': [('inhabilitado, multado, sancion economica', 6.166666666666667)], 'diasInhabilitado': [(1461, 5.0757596723307685)], 'multas': [(19244, 0.19244)], 'actor': 'particular', 'provenienteDe': 'BAJA CALIFORNIA SUR', 'sumaTotal': 7.956819967233076, 'cantidadReportes': 1, 'nombre': 'ALVARO CAMPOS OROZCO'})
('CRISTIAN SALVADOR AGUILLON OSUNA', {'resoluciones': [('sentencia condenatoria', 10)], 'actos': [('delit

## S3S

In [19]:
s3s_df = create_dataframe("s3s",public=True)

In [20]:
s3s_new_data = {}
peso_sanciones = 0.4
peso_puesto = 0.2
peso_faltas = 0.2
peso_multas = 0.1
peso_dias = 0.1

for index, row in s3s_df.iterrows():
    punished = row['servidorPublicoSancionado']
    name_sanctioned = punished.get("nombres", "N/A") + " " + punished.get("primerApellido", "N/A") + " " + punished.get("segundoApellido", "N/A")
    
    faltas = get_faults_value(get_nested(row, ['tipoFalta', 'clave']))
    sanciones = get_sanctions_values(get_nested(row, ['tipoSancion']))
    puesto = get_charge(get_nested(punished, ['puesto']))
    dias_inhabilitado = get_inh_days_value(get_nested(row, ['inhabilitacion']))
    multa = get_fine_value(get_nested(row, ['multa']))
    suma_total = (
        peso_sanciones * sanciones[1] +
        peso_puesto * puesto[1] +
        peso_faltas * faltas[1] +
        peso_multas * multa[1] +
        peso_dias * dias_inhabilitado[1]
    )
    
    if name_sanctioned in s3s_new_data:
        s3s_new_data[name_sanctioned]['faltas'].append(faltas)
        s3s_new_data[name_sanctioned]['sanciones'].append(sanciones)
        s3s_new_data[name_sanctioned]['puesto'] = [puesto]
        s3s_new_data[name_sanctioned]['diasInhabilitado'].append(dias_inhabilitado)
        s3s_new_data[name_sanctioned]['multas'].append(multa)
        s3s_new_data[name_sanctioned]['cantidadReportes'] += 1
        
        total_reportes = s3s_new_data[name_sanctioned]['cantidadReportes']
        s3s_new_data[name_sanctioned]['sumaTotal'] += (
            peso_sanciones * s3s_new_data[name_sanctioned]['sanciones'][total_reportes - 1][1] +
            peso_puesto * s3s_new_data[name_sanctioned]['puesto'][0][1] +
            peso_faltas * s3s_new_data[name_sanctioned]['faltas'][total_reportes - 1][1] +
            peso_multas * s3s_new_data[name_sanctioned]['multas'][total_reportes - 1][1] +
            peso_dias * s3s_new_data[name_sanctioned]['diasInhabilitado'][total_reportes - 1][1]
        )
    else:
        relevant_data = {}
        relevant_data['faltas'] = [faltas]
        relevant_data['sanciones'] = [sanciones]
        relevant_data['puesto'] = [puesto]
        relevant_data['diasInhabilitado'] = [dias_inhabilitado]
        relevant_data['multas'] = [multa]
        relevant_data['actor'] = row['actor']
        relevant_data['provenienteDe'] = row['provenienteDe']
        relevant_data['sumaTotal'] = suma_total
        relevant_data['cantidadReportes'] = 1
        relevant_data["nombre"] = name_sanctioned
        s3s_new_data[name_sanctioned] = relevant_data


  fechaInicial = pd.to_datetime(inhabilitacion['fechaInicial'])
  fechaFinal = pd.to_datetime(inhabilitacion['fechaFinal'])


In [21]:
faltas  = 0
sanciones = 0
multas = 0
inhn = 0
total = 0
puestos = 0
values = list(s3s_new_data.items())
for value in values:
    total += 1
    primer_acto = 0
    for falta in value[1]['faltas']:   
        if falta[0] == 'dato no proporcionado':
            #if(primer_acto >= 1):
                #print(value[0])
            faltas += 1
        primer_acto += 1
    for sancion in value[1]['sanciones']:
        if sancion[0] == 'dato no proporcionado':
            sanciones += 1
    for multa in value[1]['multas']:
        if multa[0] == 0:
            multas += 1
    for dias in value[1]['diasInhabilitado']:
        if dias[0] == 0:
            inhn += 1
    for puesto in value[1]['puesto']:
        if puesto[1] == 0:
            puestos += 1
print(total)
print(faltas)
print(multas)
print(sanciones)
print(inhn)
print(puestos)

6360
2999
7028
1205
1604
2132


In [22]:
appended_map = {}
for key,value in s3s_new_data.items():
    appended_map[key] = value

for key,value in s3p_new_data.items():
    appended_map[key] = value

## Verificar colisiones con jueces

In [23]:
df_aspirantes = pd.read_json("aspirantes.json")
for index,row in df_aspirantes.iterrows():
        if(row['nombreCompleto'] in appended_map):
            appended_map[row['nombreCompleto']]['buscaCargo'] = True
            print(row['nombreCompleto'])

JOSE RAMON ANGUIANO GOMEZ
JUAN CARLOS REYES SANCHEZ
LEONILA MENDOZA MONTIEL
MARIA TERESA RAMIREZ HERNANDEZ
PABLO CAMPOS RAMIREZ
RICARDO JIMENEZ SANCHEZ
ROBERTO GACHUZ HERNANDEZ
VERONICA GUTIERREZ FUENTES
VICTOR MANUEL PEREZ CABRERA
CORINA ELIZABETH CASTELLANOS DIAZ
JUAN MANUEL SERRATOS GARCIA
SALVADOR PEREZ GOMEZ
ALBERTO JAVIER GOMEZ PINTO
EDGAR RAMON COSIO MARIA
JORGE HERNANDEZ MARTINEZ
FERNANDO LETIPICHIA TORRES
GERARDO MARTINEZ APORTELA
HAHNEMANN VALDOVINOS GONZALEZ
JORGE MENDOZA RUIZ
JORGE LUIS GONZALEZ ESPINOSA
JOSE LUIS MARISCAL OLIVAREZ


In [24]:
appended_map['CORINA ELIZABETH CASTELLANOS DIAZ']

{'faltas': [('incumplimiento en declaracion de situacion patrimonial', 3.6),
  ('incumplimiento en declaracion de situacion patrimonial', 3.6),
  ('incumplimiento en declaracion de situacion patrimonial', 3.6)],
 'sanciones': [('inhabilitado', 10.0),
  ('dato no proporcionado', 0.0),
  ('inhabilitado', 10.0)],
 'puesto': [('CONSEJERO DISTRITAL', 0)],
 'diasInhabilitado': [(61, 0.8026315789473684),
  (0, 0),
  (184, 2.4210526315789473)],
 'multas': [(0, 0), (0, 0), (0, 0)],
 'actor': 'servidor publico',
 'provenienteDe': 'JALISCO',
 'sumaTotal': 10.48236842105263,
 'cantidadReportes': 3,
 'nombre': 'CORINA ELIZABETH CASTELLANOS DIAZ',
 'buscaCargo': True}

In [25]:
final_data = []
for value in appended_map.values():
    final_data.append(value)

In [26]:
json_appended_map = json.dumps(final_data,allow_nan=False)
with open("sancionometro.json","w",encoding="UTF-8") as file:
    file.write(json_appended_map)