In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import geopandas as gpd
from typing import Optional
from dataclasses import dataclass, field
from tqdm import tqdm
tqdm.pandas()

In [None]:
class Generate_data:
    def __init__(self, start_date: str = '2020-03-01', end_date: str = '2022-06-06', big_N: int = 50000):
        self.start_date = start_date
        self.end_date = end_date
        self.big_N = big_N

    def faker_data(self, x_gap=10, y_gap=10):

        # big_N = 200000
        # # part1 generate date
        # start_date = '2021-03-01'
        # end_date = '2022-06-06'
        date_list = [(datetime.strptime(self.start_date, '%Y-%m-%d') + timedelta(days=i)).date()
                     for i in range((datetime.strptime(self.end_date, '%Y-%m-%d') - datetime.strptime(self.start_date, '%Y-%m-%d')).days)]

        # process p
        value = np.arange(len(date_list))
        value = np.log(value **3+20)+20
        value = value / np.sum(value)

        generate_date = np.random.choice(date_list, self.big_N, p=value)

        # part 2
        # https://geo.datav.aliyun.com/areas_v3/bound/100000_full.json
        chinamap = gpd.read_file(
            "https://geo.datav.aliyun.com/areas_v3/bound/geojson?code=100000_full")
        chinamap['center_x'] = chinamap.centroid.x
        chinamap['center_y'] = chinamap.centroid.y

        def generate_location_from_center(x, y, n):
            # x_gap, y_gap = 10, 10
            # value_x = np.random.choice(np.linspace(x-x_gap, x+x_gap, n), n)
            # value_y = np.random.choice(np.linspace(y-y_gap, y+y_gap, n), n)
            # data = pd.DataFrame({'lon': value_x, 'lat': value_y}).sample(
            #     frac=1).reset_index(drop=True)
            data_ = np.vstack([chinamap.sample(n=1)[['center_x','center_y']].values.reshape(-1, 2) for i in tqdm(range(n))])
            data = pd.DataFrame(data_)
            data.columns = ['lon', 'lat']
            data['lon'] += np.random.random(size=data.shape[0])*5
            data['lat']+= np.random.random(size=data.shape[0])*5
            return data

        faker_data = pd.concat([pd.DataFrame({'date': generate_date}),
                                generate_location_from_center(chinamap['center_x'].mean(), chinamap['center_y'].mean(), self.big_N)], axis=1
                               ).sort_values(by=['date']).reset_index(drop=True)

        return faker_data


fk = Generate_data()
faker_data = fk.faker_data()
faker_data


In [None]:
@dataclass
class PlotParam:
    #first plot
    plot_x_range_low:int = field(default=0,metadata={'help':'default0'})
    plot_x_range_high:int = field(default=1000, metadata={'help':'default 1000'})
    plot_y_range_low:int = field(default=0,metadata={'help':'default0'})
    plot_y_range_high:int = field(default=1000, metadata={'help':'default 1000'})

    # sec plot
    plot_sec_start_prop_x:float = field(default=0.2, metadata={'help':''})
    plot_sec_end_prop_x:float = field(default=0.8, metadata={'help':''})

    plot_sec_start_prop_y:float = field(default=0.1, metadata={'help':''})
    plot_sec_end_prop_y:float = field(default=0.4, metadata={'help':''})

    epoch_num:int = field(default=1000, metadata={'help':''})
    end_run_num_prop:float = field(default=0.8, metadata={'help':''})
    trans_length:int = field(default=50, metadata={'help':''})


    plot_3th_start_prop_y:float = field(default=0.2, metadata={'help':''})
    plot_3th_end_prop_y:float = field(default=0.8, metadata={'help':''})

plotparm = PlotParam()

# get data
rawdata = faker_data.copy()
rawdata['unique_id'] = rawdata.index
rawdata


In [None]:
# plot 1
# np.int32(np.sqrt(rawdata.shape[0])) + 1

# plot_x_range = (0, 1000)
# plot_y_range = (0, 1000)

plot_init_df = pd.DataFrame({'x': np.random.randint(plotparm.plot_x_range_low, plotparm.plot_x_range_high, size=rawdata.shape[0]),
                             'y':np.random.randint(plotparm.plot_x_range_low, plotparm.plot_x_range_high,  size=rawdata.shape[0])})
plot_init_df['unique_id'] = rawdata['unique_id']

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(plot_init_df['x'], plot_init_df['y'],s=1, color='black')

In [None]:
plot_init_df

In [None]:
test = rawdata.groupby(['date']).agg(
    value=('unique_id', 'count')
).reset_index(drop=False)
test['sec_x_value'] = test.index / np.max(test.index) * (plotparm.plot_x_range_high - plotparm.plot_x_range_low) * (
    plotparm.plot_sec_end_prop_x - plotparm.plot_sec_start_prop_x) + \
    (plotparm.plot_x_range_high - plotparm.plot_x_range_low) * \
    plotparm.plot_sec_start_prop_x
test['sec_y_value'] = test['value'] / (np.max(test['value']) - 0) * (plotparm.plot_y_range_high - plotparm.plot_y_range_low) * (
    plotparm.plot_sec_end_prop_y - plotparm.plot_sec_start_prop_y
) + \
    (plotparm.plot_y_range_high - plotparm.plot_y_range_low) * plotparm.plot_sec_start_prop_y

def split_data(x):
    value = np.linspace((plotparm.plot_y_range_high - plotparm.plot_y_range_low) * plotparm.plot_sec_start_prop_y, x['sec_y_value']+0.01, x['value'])
    data = pd.DataFrame({'sec_y_value2':value})
    data['date'] = x['date']
    return data

test2 = pd.concat(test.apply(lambda x: split_data(x), axis=1).tolist()).merge(
    right=test, how='left', left_on=['date'], right_on=['date']
)


fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(test2['sec_x_value'],test2['sec_y_value2'],s=1, color='black')
ax.set_xlim([0, 1000])
ax.set_ylim([0, 1000])

In [None]:
# plot_init_df

In [None]:
test2['sec_y_value2'].max()

In [None]:
# epoch_num = 1000
# end_run_num_prop = 0.8
# trans_length = int(epoch_num * (1-end_run_num_prop) * 0.5)
data_1_2 = pd.concat([plot_init_df, test2[['sec_x_value', 'sec_y_value2']].rename(columns={'sec_y_value2':'sec_y_value'})], axis=1)
data_1_2['start_run_time'] = np.int32(data_1_2.index / data_1_2.shape[0] * plotparm.epoch_num * plotparm.end_run_num_prop)
data_1_2

In [None]:
chinamap = gpd.read_file(
            "https://geo.datav.aliyun.com/areas_v3/bound/geojson?code=100000_full")

boundary = chinamap.boundary.bounds
boundary_minx, boundary_miny, boundary_maxx, boundary_maxy = boundary['minx'].min(), boundary['miny'].min(), boundary['maxx'].max(), boundary['maxy'].max()


In [None]:
width = boundary_maxx - boundary_minx
height = boundary_maxy - boundary_miny

plot_3th_start_x_prop = 0.5 - \
    (plotparm.plot_3th_end_prop_y - plotparm.plot_3th_start_prop_y) * width / height / 2
plot_3th_start_y_prop = plotparm.plot_3th_start_prop_y

map_lp = (plot_3th_start_x_prop * plotparm.plot_x_range_high,
          plot_3th_start_y_prop * plotparm.plot_y_range_high)
map_lp

map_rh = ((0.5 + (plotparm.plot_3th_end_prop_y - plotparm.plot_3th_start_prop_y) * width / height / 2)
          * plotparm.plot_x_range_high, (plotparm.plot_3th_end_prop_y) * plotparm.plot_y_range_high)
map_rh

In [None]:
np.array([map_lp])

In [None]:
np.array([map_rh])
rawdata

In [None]:

cal_data3 = pd.DataFrame(((rawdata[['lon', 'lat']].values - np.array([[boundary_minx,
 boundary_miny]])) / (np.array([[boundary_maxx,
                                 boundary_maxy]]) -
                      np.array([[boundary_minx, boundary_miny]])) * (np.array([map_rh]) - np.array([map_lp])) + np.array([map_lp])).reshape(-1, 2))
cal_data3.columns = ['3th_x', '3th_y']

data_2_3 = pd.concat([data_1_2, cal_data3], axis=1)
data_2_3

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.scatter(data_2_3['3th_x'], data_2_3['3th_y'],s=1, color='black')
ax.set_xlim([0, 1000])
ax.set_ylim([0, 1000])

In [None]:
# 代码加速
def trans2(x, cur_epoch):
    temp_data = x.copy()

    result_df = temp_data[['x', 'y']].copy()

    mask1 = temp_data['start_run_time'] > cur_epoch
    result_df.loc[mask1, :] = temp_data.loc[mask1, :][['x', 'y']]

    mask1 = temp_data['start_run_time'] < cur_epoch-plotparm.trans_length
    result_df.loc[mask1, :] = temp_data.loc[mask1,
                                            :][['sec_x_value', 'sec_y_value']].values

    mask1 = (cur_epoch - \
        plotparm.trans_length <= temp_data['start_run_time']) &(temp_data['start_run_time'] <= cur_epoch)

    # cur_trans_num = cur_epoch - x['start_run_time']
    temp_data2 = temp_data.loc[mask1, :]

    temp3 = (1 - (cur_epoch - temp_data2['start_run_time'])/plotparm.trans_length).values.reshape(-1, 1)
    temp3 =  np.hstack([temp3, temp3])
    temp_data2 = (temp_data2[['x', 'y']].values -
         temp_data2[['sec_x_value', 'sec_y_value']].values) * temp3 + temp_data2[['sec_x_value', 'sec_y_value']].values

    result_df.loc[mask1,:] = temp_data2
    return result_df



temp_trans_df = trans2(data_1_2, 600)

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(temp_trans_df['x'], temp_trans_df['y'],s=1, color='black')
ax.set_xlim([0, 1000])
ax.set_ylim([0, 1000])
# temp_trans_df

In [None]:
# render_data = {i:data_1_2.progress_apply(lambda x: trans(x, i), axis=1).rename(columns={0:'x', 1:'y'}) for i in range(epoch_num)}
from joblib import delayed, Parallel
def render_data_funcTion(i):
    # res = data_1_2.apply(lambda x: trans2(x, i), axis=1).rename(columns={0:'x', 1:'y'})
    res = trans2(x=data_1_2, cur_epoch=i)
    return res

render_data = Parallel(n_jobs=-1, verbose=20)(delayed(render_data_funcTion)(i) for i in range(plotparm.epoch_num))

In [None]:
data_2_3

In [None]:
# 代码加速 render 3
def trans3(x, cur_epoch):
    temp_data = x.copy()

    result_df = temp_data[['sec_x_value', 'sec_y_value']].copy().rename(columns={'sec_x_value':'x', 'sec_y_value':'y'})

    mask1 = temp_data['start_run_time'] > cur_epoch
    result_df.loc[mask1, :] = temp_data.loc[mask1, :][['sec_x_value', 'sec_y_value']].values

    mask1 = temp_data['start_run_time'] < cur_epoch-plotparm.trans_length
    result_df.loc[mask1, :] = temp_data.loc[mask1,
                                            :][['3th_x', '3th_y']].values

    mask1 = (cur_epoch - \
        plotparm.trans_length <= temp_data['start_run_time']) &(temp_data['start_run_time'] <= cur_epoch)

    # cur_trans_num = cur_epoch - x['start_run_time']
    temp_data2 = temp_data.loc[mask1, :]

    temp3 = (1 - (cur_epoch - temp_data2['start_run_time'])/plotparm.trans_length).values.reshape(-1, 1)
    temp3 =  np.hstack([temp3, temp3])
    temp_data2 = (temp_data2[['sec_x_value', 'sec_y_value']].values -
         temp_data2[['3th_x', '3th_y']].values) * temp3 + temp_data2[['3th_x', '3th_y']].values

    result_df.loc[mask1,:] = temp_data2
    return result_df



temp_trans_df = trans3(data_2_3, 100)

fig, ax = plt.subplots(figsize=(10, 10))
ax.scatter(temp_trans_df['x'], temp_trans_df['y'],s=1, color='black')
ax.set_xlim([0, 1000])
ax.set_ylim([0, 1000])
# temp_trans_df

In [None]:
def render_data_funcTion_3(i):
    # res = data_1_2.apply(lambda x: trans2(x, i), axis=1).rename(columns={0:'x', 1:'y'})
    res = trans3(x=data_2_3, cur_epoch=i)
    return res

render_data3 = Parallel(n_jobs=-1, verbose=20)(delayed(render_data_funcTion_3)(i) for i in range(plotparm.epoch_num))

In [None]:
from shapely.geometry import Polygon, MultiPolygon

def geometry2normal(index):

    slice_one = chinamap.geometry[index]

    if isinstance(slice_one, MultiPolygon):
        coordlist = [poly.exterior.coords for poly in list(slice_one)][0]
        x, y = coordlist.xy
    elif isinstance(slice_one, Polygon):
        x, y = slice_one.exterior.coords.xy

    x, y = np.array(x), np.array(y)


    result = pd.DataFrame(((pd.DataFrame({'x':x, 'y':y}).values - np.array([[boundary_minx,
    boundary_miny]])) / (np.array([[boundary_maxx,
                                    boundary_maxy]]) -
                        np.array([[boundary_minx, boundary_miny]])) * (np.array([map_rh]) - np.array([map_lp])) + np.array([map_lp])).reshape(-1, 2))
    result.columns = ['x', 'y']
    
    return result


map_list_trans = [geometry2normal(index=index) for index in range(chinamap.shape[0])]


fig, ax = plt.subplots(figsize=(10, 10))



for index in range(35):
    temp_map_df = geometry2normal(index=index)
    ax.plot(temp_map_df['x'], temp_map_df['y'],color='black')



# for index in range(35):
#     temp_map_df = geometry2normal(index=index)
#     ax.plot(temp_map_df['x'], temp_map_df['y'],color='black')


ax.set_xlim([0, 1000])
ax.set_ylim([0, 1000])



In [None]:
# chinamap

In [None]:
# %matplotlib
from matplotlib.animation import FuncAnimation
from matplotlib import animation

fig, ax = plt.subplots(figsize=(10, 10), dpi=300)
xdata, ydata = [], []
ln, = plt.plot([], [], '.', ms=1, color='gray')

# lines = []
# for index in range(len(map_list_trans)):

#     map_line, = plt.plot([], [], color='red')
#     lines.append(map_line)

def init():
    ax.set_xlim(0, 1000)
    ax.set_ylim(0, 1000)
    # for line in lines:
    #     line.set_data([], [])

    return ln, #lines,


def update(frame):
    print(f"{frame}")

    if frame < 1000:
        temp_value = render_data[frame]
    else:
        temp_value = render_data3[frame-1000]

    if frame == 1300:
        for index in range(len(map_list_trans)):
            ax.plot(map_list_trans[index]['x'], map_list_trans[index]['y'], c='red')
    # if frame > 1500:
    #     for index, line in enumerate(lines):
    #         line.set_data(map_list_trans[index]['x'], map_list_trans[index]['y'])

    ln.set_data(temp_value['x'], temp_value['y'])
    return ln,#, lines


ani = FuncAnimation(fig, update, frames=range(plotparm.epoch_num*2),init_func=init, blit=True)
# plt.show()
# writegif = animation.PillowWriter(fps=200)
# ani.save("test_big.gif", writer=writegif)


writevideo = animation.FFMpegFileWriter(fps=200)
ani.save("test_big_map.mp4", writer=writevideo)
