In [37]:
import os
import pandas as pd
import re
from tqdm import tqdm

def build_data(raw_path='./csv/'):
    raw_files = os.listdir(raw_path)
    if not raw_path:
        return
    data = []
    print('reading from '+str(raw_path))
    for raw_file_name in tqdm(raw_files):
        raw_file = pd.read_csv(os.path.join(raw_path, raw_file_name), header = None)
        parking_name = raw_file_name[:-4]
        
        loc_str = raw_file[1][2]
        start_co = loc_str.rindex('：')
        longitude, latitude = loc_str[start_co+1:].split(',') # 经纬度
        total_space = int(raw_file[1][3])
        monthly_fee = try_get_monthly_fee(raw_file[1][5])
        
        building_type = raw_file[1][6]
        data.append([parking_name, float(longitude), float(latitude), int(total_space), int(monthly_fee), building_type])
    df = pd.DataFrame(data, columns=['parking_name', 'latitude', 'longitude', 'total_space','monthly_fee','building_type'])
    print("finished build_data!")
    return df
    

def try_get_monthly_fee(fee_str):
    monthly_fee = 300  # 300 is monthly mean
    fee_str = str(fee_str)
    if fee_str:
        fee_str = re.findall('\d+', fee_str)
    if fee_str:
        monthly_fee = int(fee_str[0])
    if monthly_fee < 50: # daily fee convert to monthly fee, 300 is monthly mean, 10 is hourly mean
        monthly_fee = 300*monthly_fee/10
    return monthly_fee


def xlsx2csv(xlsx_path='./xlsx'):
    raw_files = os.listdir(xlsx_path)
    if not xlsx_path:
        return
    save_path = './csv/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    data = []
    print(raw_files)
    for raw_file in tqdm(raw_files):
        new_file = save_path+os.path.splitext(raw_file)[0]+'.csv'
        if os.path.exists(new_file):
            continue
        print(raw_file)
        raw_excel = pd.read_excel(os.path.join(xlsx_path, raw_file), header = None).drop(0, 1)
        raw_excel.to_csv(new_file,encoding='utf-8', index=False, header=False)
    print("finished xlsx2csv!")

In [38]:
pl_df = build_data()

  2%|█▋                                                                                | 2/100 [00:00<00:09, 10.01it/s]

reading from ./csv/


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:09<00:00, 10.36it/s]

finished build_data!





In [39]:
# import folium

# inital location
latitude = 22.573682
longitude = 114.134991

# Create map for Luohu and display it
luohu_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# Instantiate a feature group for the parkings in the dataframe
parkings = folium.map.FeatureGroup()

# Loop through and add each to the parkings feature group
for lat, lng, name in zip(pl_df.latitude, pl_df.longitude, pl_df.parking_name):
    parkings.add_child(
        folium.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='red',
            fill_opacity=0.6,
            popup=name,
            encode='uft-8'
        )
    )

# Add parkings to map
luohu_map = folium.Map(location=[latitude, longitude], zoom_start=13)
luohu_map.add_child(parkings)

In [40]:
left_top = pl_df.loc[pl_df["parking_name"] =='宝丽大厦'].iloc[0]
right_bottom = pl_df.loc[pl_df["parking_name"] =='合作金融大厦'].iloc[0]

In [43]:
pl_df.loc[(pl_df["parking_name"] =='宝丽大厦') | (pl_df["parking_name"] =='合作金融大厦')]

Unnamed: 0,parking_name,latitude,longitude,total_space,monthly_fee,building_type
29,合作金融大厦,22.54983,114.123911,26,900,商业
39,宝丽大厦,22.553417,114.11695,73,110,住宅


In [44]:
pl_df.loc[(pl_df["latitude"] <=left_top.latitude)&(pl_df["longitude"] >=left_top.longitude)&
          (pl_df["latitude"] >=right_bottom.latitude)&(pl_df["longitude"] <=right_bottom.longitude)]

Unnamed: 0,parking_name,latitude,longitude,total_space,monthly_fee,building_type
22,半岛大厦,22.552334,114.123596,30,450,写字楼
24,华安国际大酒店,22.550429,114.117016,135,500,写字楼
29,合作金融大厦,22.54983,114.123911,26,900,商业
39,宝丽大厦,22.553417,114.11695,73,110,住宅
42,工人文化宫,22.553094,114.122586,150,300,公共
45,振业大厦,22.551051,114.11769,90,500,写字楼
50,桂花大厦,22.553402,114.119239,93,110,写字楼
54,永新商业城,22.550572,114.123378,48,1200,商业
63,电影大厦,22.551416,114.120013,58,250,住宅
67,红围坊停车场,22.551483,114.119224,103,110,住宅


In [49]:
from geopy import distance

coords_1 = (left_top.latitude, left_top.longitude)
coords_2 = (right_bottom.latitude, right_bottom.longitude)

print(distance.distance(coords_1, coords_2).km)

0.8187975024701322


In [104]:
int(re.findall('\d+', excel_data[2][5])[0])

350

In [105]:
excel_data[2][6]

'住宅'

In [26]:
start_co = excel_data[2][2].rindex('：')

In [34]:
latitude, longitude = excel_data[2][2][start_co+1:].split(',')

In [61]:
excel_dat

0                               NaN
1                            停车场名称:
2                             地理位置：
3                         规划的停车位数量：
4                           是否对外开放：
                    ...            
69960    2016-12-25 22:49:47.656000
69961    2016-12-25 23:17:01.656000
69962    2016-12-25 18:27:18.656000
69963    2016-12-25 23:31:42.656000
69964    2016-12-25 23:19:27.656000
Name: 1, Length: 69965, dtype: object

In [22]:
excel_data[2][2]

'丰园酒店停车场地址：深圳市罗湖区禾塘路32-2号附近 坐标：114.130616,22.552875'

In [None]:
all_files = []

In [4]:
import os
import pandas as pd
import re

def build_data_from_excel(raw_path):
    raw_files = os.listdir(raw_path)
    if not raw_path:
        return
    data = []
    for raw_file in raw_files:
        raw_excel = pd.read_excel(os.path.join(raw_path, raw_file), header = None)
        parking_name = raw_file[:-5]
        
        loc_str = raw_excel[2][2]
        start_co = loc_str.rindex('：')
        latitude, longitude = loc_str[start_co+1:].split(',')
        
        total_space = int(raw_excel[2][3])
        
        monthly_fee = int(re.findall('\d+', raw_excel[2][5])[0])
        if monthly_fee < 50: # daily fee convert to monthly fee
            monthly_fee = 300*monthly_fee/10
        
        building_type = raw_excel[2][6]
        data.append([parking_name, latitude, longitude, total_space, monthly_fee, building_type])
        print(parking_name, latitude, longitude, total_space, monthly_fee, building_type)
    df = pd.DataFrame(data, columns=['parking_name', 'latitude', 'longitude', 'total_space','monthly_fee','building_type'])
    return df

In [123]:
pl_df.loc[pl_df["monthly_fee"] >50]['monthly_fee'].mean()

308.06451612903226

In [None]:
# todo: 重现别人的GCN+LSTM，or build your graph