In [1]:
import re
import math

from data_io.oss.obs_client import ObsLib
from data_io.mongo.mongo_client import MongoPoolDao
from data_io.mysql.mysql_client import MysqlPoolDao

%load_ext autoreload
%autoreload 2

from tqdm.notebook import tqdm
from tqdm.contrib.concurrent import thread_map 

import pandas as pd
from collections import defaultdict

from prefect import task, Flow, Parameter
from prefect.executors import LocalDaskExecutor
from prefect.schedules import IntervalSchedule
from datetime import timedelta, datetime
import pendulum

In [2]:
# A separator used to break the code into two parts to aid memorability.
SEPARATOR_ = '+'

# The number of characters to place before the separator.
SEPARATOR_POSITION_ = 8

# The character used to pad codes.
PADDING_CHARACTER_ = '0'

# The character set used to encode the values.
CODE_ALPHABET_ = '23456789CFGHJMPQRVWX'

# The base to use to convert numbers to/from.
ENCODING_BASE_ = len(CODE_ALPHABET_)

# The maximum value for latitude in degrees.
LATITUDE_MAX_ = 90

# The maximum value for longitude in degrees.
LONGITUDE_MAX_ = 180

# The max number of digits to process in a plus code.
MAX_DIGIT_COUNT_ = 15

# Maximum code length using lat/lng pair encoding. The area of such a
# code is approximately 13x13 meters (at the equator), and should be suitable
# for identifying buildings. This excludes prefix and separator characters.
PAIR_CODE_LENGTH_ = 10

# First place value of the pairs (if the last pair value is 1).
PAIR_FIRST_PLACE_VALUE_ = ENCODING_BASE_**(PAIR_CODE_LENGTH_ / 2 - 1)

# Inverse of the precision of the pair section of the code.
PAIR_PRECISION_ = ENCODING_BASE_**3

# The resolution values in degrees for each position in the lat/lng pair
# encoding. These give the place value of each position, and therefore the
# dimensions of the resulting area.
PAIR_RESOLUTIONS_ = [20.0, 1.0, .05, .0025, .000125]

# Number of digits in the grid precision part of the code.
GRID_CODE_LENGTH_ = MAX_DIGIT_COUNT_ - PAIR_CODE_LENGTH_

# Number of columns in the grid refinement method.
GRID_COLUMNS_ = 4

# Number of rows in the grid refinement method.
GRID_ROWS_ = 5

# First place value of the latitude grid (if the last place is 1).
GRID_LAT_FIRST_PLACE_VALUE_ = GRID_ROWS_**(GRID_CODE_LENGTH_ - 1)

# First place value of the longitude grid (if the last place is 1).
GRID_LNG_FIRST_PLACE_VALUE_ = GRID_COLUMNS_**(GRID_CODE_LENGTH_ - 1)

# Multiply latitude by this much to make it a multiple of the finest
# precision.
FINAL_LAT_PRECISION_ = PAIR_PRECISION_ * GRID_ROWS_**(MAX_DIGIT_COUNT_ -
                                                      PAIR_CODE_LENGTH_)

# Multiply longitude by this much to make it a multiple of the finest
# precision.
FINAL_LNG_PRECISION_ = PAIR_PRECISION_ * GRID_COLUMNS_**(MAX_DIGIT_COUNT_ -
                                                         PAIR_CODE_LENGTH_)

# Minimum length of a code that can be shortened.
MIN_TRIMMABLE_CODE_LEN_ = 6

GRID_SIZE_DEGREES_ = 0.000125


In [3]:
class NewLocationCode:
    
    @classmethod
    def encode(cls, latitude, longitude, codeLength=PAIR_CODE_LENGTH_):
        if codeLength < 2 or (codeLength < PAIR_CODE_LENGTH_ and
                              codeLength % 2 == 1):
            raise ValueError('Invalid Open Location Code length - ' +
                             str(codeLength))
        codeLength = min(codeLength, MAX_DIGIT_COUNT_)
        # Ensure that latitude and longitude are valid.
        latitude = cls.clipLatitude(latitude)
        longitude = cls.normalizeLongitude(longitude)
        # Latitude 90 needs to be adjusted to be just less, so the returned code
        # can also be decoded.
        if latitude == 90:
            latitude = latitude - cls.computeLatitudePrecision(codeLength)
        code = ''

        # Compute the code.
        # This approach converts each value to an integer after multiplying it by
        # the final precision. This allows us to use only integer operations, so
        # avoiding any accumulation of floating point representation errors.

        # Multiply values by their precision and convert to positive.
        # Force to integers so the division operations will have integer results.
        # Note: Python requires rounding before truncating to ensure precision!
        latVal = int(round((latitude + LATITUDE_MAX_) * FINAL_LAT_PRECISION_, 6))
        lngVal = int(round((longitude + LONGITUDE_MAX_) * FINAL_LNG_PRECISION_, 6))

        # Compute the grid part of the code if necessary.
        if codeLength > PAIR_CODE_LENGTH_:
            for i in range(0, MAX_DIGIT_COUNT_ - PAIR_CODE_LENGTH_):
                latDigit = latVal % GRID_ROWS_
                lngDigit = lngVal % GRID_COLUMNS_
                ndx = latDigit * GRID_COLUMNS_ + lngDigit
                code = CODE_ALPHABET_[ndx] + code
                latVal //= GRID_ROWS_
                lngVal //= GRID_COLUMNS_
        else:
            latVal //= pow(GRID_ROWS_, GRID_CODE_LENGTH_)
            lngVal //= pow(GRID_COLUMNS_, GRID_CODE_LENGTH_)
        # Compute the pair section of the code.
        for i in range(0, PAIR_CODE_LENGTH_ // 2):
            code = CODE_ALPHABET_[lngVal % ENCODING_BASE_] + code
            code = CODE_ALPHABET_[latVal % ENCODING_BASE_] + code
            latVal //= ENCODING_BASE_
            lngVal //= ENCODING_BASE_

        # Add the separator character.
        code = code[:SEPARATOR_POSITION_] + SEPARATOR_ + code[SEPARATOR_POSITION_:]

        # If we don't need to pad the code, return the requested section.
        if codeLength >= SEPARATOR_POSITION_:
            return code[0:codeLength + 1]

        # Pad and return the code.
        return code[0:codeLength] + SEPARATOR_
    
    @classmethod
    def clipLatitude(cls, latitude):
        return min(90, max(-90, latitude))
    
    @classmethod
    def computeLatitudePrecision(cls, codeLength):
        if codeLength <= 10:
            return pow(20, math.floor((codeLength / -2) + 2))
        return pow(20, -3) / pow(GRID_ROWS_, codeLength - 10)
    
    @classmethod
    def normalizeLongitude(cls, longitude):
        while longitude < -180:
            longitude = longitude + 360
        while longitude >= 180:
            longitude = longitude - 360
        return longitude

    

In [4]:
frames=[]

In [5]:
class PlusCodeCluster:
    
    # 每48小时plucode聚集情况(不区分app)
    @classmethod
    def get_everyday_pluscode_cluster(cls):
        global frames
        db_super = MysqlPoolDao(202102)
        db_incash = MysqlPoolDao(202103)
        db_others = MysqlPoolDao(202104)
        
        everyday_apply = db_super.get_many('''
        select
            repay_plan.order_id,
            check_status,
            FROM_UNIXTIME(orders.apply_time/1000, "%Y-%m-%d") apply_time,
            is_reloan
        from orders inner join repay_plan on orders.id=repay_plan.order_id
        where orders.apply_time > UNIX_TIMESTAMP((NOW()-INTERVAL 2 DAY))*1000 and check_status = 7
        '''
        )
        everyday_apply.extend(db_incash.get_many('''
        select
            repay_plan.order_id,
            check_status,
            FROM_UNIXTIME(orders.apply_time/1000, "%Y-%m-%d") apply_time,
            is_reloan
        from orders inner join repay_plan on orders.id=repay_plan.order_id
        where orders.apply_time > UNIX_TIMESTAMP((NOW()-INTERVAL 2 DAY))*1000 and check_status = 7
        '''
        )
                           )

        everyday_apply.extend(db_others.get_many('''
        select
            repay_plan.order_id,
            check_status,
            FROM_UNIXTIME(orders.apply_time/1000, "%Y-%m-%d") apply_time,
            is_reloan
        from orders inner join repay_plan on orders.id=repay_plan.order_id
        where orders.apply_time > UNIX_TIMESTAMP((NOW()-INTERVAL 2 DAY))*1000 and check_status = 7
        '''
        )
                           )
        _ = thread_map(cls.get_plus_code,everyday_apply,max_workers=10)
        df_orders = pd.DataFrame(everyday_apply).dropna(axis=0, subset=['plus_code'])
        grouped = df_orders.groupby(df_orders.plus_code.apply(lambda x:x[:7] if x else False), as_index=True)
        
        for dtype, group in grouped:
            if len(group)>5:
                group.loc[:, 'cluster_name']=dtype
#                 print(group)
                frames.append(group)
                
        result = pd.concat(frames)
        result = result.drop_duplicates(subset='order_id', keep='first', inplace=False, ignore_index=False)
        return result
                
    
    #GPS
    @classmethod
    def get_plus_code(cls, bx):
        mongo_client = MongoPoolDao().get_mongo_client()
        origin_dao = mongo_client['mexico']['origin_data']
        i = bx['order_id']
        bean = origin_dao.find_one({'order_id':i},{'file_key':1})
        if not bean: 
            bx['plus_code'] = None
            return 
        file_key = bean['file_key']
        d = ObsLib.obs_get(file_key)['user_auth']['base']
        if d['lon'] and d['lat']:
            plus_code = NewLocationCode.encode(float(d['lat']), float(d['lon']))
            bx['plus_code'] = plus_code
            bx['lat'] =  d['lat']
            bx['lon'] = d['lon']
        else:
            bx['plus_code'] = None


In [6]:
from sqlalchemy import create_engine

In [7]:
def save_to_sql(df, table_name):
    print(f">>> saving to sql {table_name}...")
    engine = create_engine('mysql+pymysql://mexico_risk:BupQ$H4UFNgvy5!#@10.10.1.153:3306/risk_center', pool_pre_ping=True)
    df.to_sql(table_name, engine, index=False, if_exists='replace')

In [8]:
@task(max_retries=3, retry_delay=timedelta(seconds=10), log_stdout=True)
def result_output():
    df_cluster = PlusCodeCluster.get_everyday_pluscode_cluster()
    save_to_sql(df_cluster, 'everyday_pluscode_cluster')

In [9]:
# result_output()

In [10]:
executor = LocalDaskExecutor(scheduler="threads")

schedule = IntervalSchedule(
    start_date=pendulum.now().utcnow().add(seconds=3),
    interval=timedelta(days=1)
)

with Flow(
    "mx_everyday_pluscode_cluster",
    executor=executor,
    schedule=schedule
)as flow:
    result_output()
#     flow.run()
flow.register(project_name='mx-server')

Flow URL: https://cloud.prefect.io/risk-doowintech-com-s-account/flow/4718c833-cb51-401c-aba7-888adb3dcc24
 └── ID: 010368e9-30c9-4960-a3c4-466d8052513d
 └── Project: mx-server
 └── Labels: ['ecs-64b9']


'010368e9-30c9-4960-a3c4-466d8052513d'

In [11]:
# df_code

In [12]:
# df_code[(df_code['apply_cnt']>10) & (df_code['rate']>.5)]

In [13]:
# df_75GQG