In [5]:
# -*- coding: utf-8 -*-
import json
import requests
import math

key = 'your key here'  # 这里填写你的百度开放平台的key
x_pi = 3.14159265358979324 * 3000.0 / 180.0
pi = 3.1415926535897932384626  # π
a = 6378245.0  # 长半轴
ee = 0.00669342162296594323  # 扁率


def dist(lat1, lng1, lat2, lng2):
    dx = lng1 - lng2
    dy = lat1 - lat2
    b = (lat1 + lat2) / 2.0
    Lx = dx * (pi / 180.0) * 6367000.0 * math.cos( b * (pi / 180.0))
    Ly = 6367000.0 * dy * (pi / 180.0)
    return math.sqrt(Lx*Lx+Ly*Ly)

def geocode(address):
    """
    利用百度geocoding服务解析地址获取位置坐标
    :param address:需要解析的地址
    :return:
    """
    geocoding = {'s': 'rsv3',
                 'key': key,
                 'city': '全国',
                 'address': address}
    res = requests.get(
        "http://restapi.amap.com/v3/geocode/geo", params=geocoding)
    if res.status_code == 200:
        json = res.json()
        status = json.get('status')
        count = json.get('count')
        if status == '1' and int(count) >= 1:
            geocodes = json.get('geocodes')[0]
            lng = float(geocodes.get('location').split(',')[0])
            lat = float(geocodes.get('location').split(',')[1])
            return [lng, lat]
        else:
            return None
    else:
        return None

def gcj02tobd09(lng, lat):
    """
    火星坐标系(GCJ-02)转百度坐标系(BD-09)
    谷歌、高德——>百度
    :param lng:火星坐标经度
    :param lat:火星坐标纬度
    :return:
    """
    z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
    theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
    bd_lng = z * math.cos(theta) + 0.0065
    bd_lat = z * math.sin(theta) + 0.006
    return [bd_lng, bd_lat]

def bd09togcj02(bd_lon, bd_lat):
    """
    百度坐标系(BD-09)转火星坐标系(GCJ-02)
    百度——>谷歌、高德
    :param bd_lat:百度坐标纬度
    :param bd_lon:百度坐标经度
    :return:转换后的坐标列表形式
    """
    x = bd_lon - 0.0065
    y = bd_lat - 0.006
    z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
    theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
    gg_lng = z * math.cos(theta)
    gg_lat = z * math.sin(theta)
    return [gg_lng, gg_lat]

def wgs84togcj02(lng, lat):
    """
    WGS84转GCJ02(火星坐标系)
    :param lng:WGS84坐标系的经度
    :param lat:WGS84坐标系的纬度
    :return:
    """
    if out_of_china(lng, lat):  # 判断是否在国内
        return lng, lat
    dlat = transformlat(lng - 105.0, lat - 35.0)
    dlng = transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [mglng, mglat]

def gcj02towgs84(lng, lat):
    """
    GCJ02(火星坐标系)转GPS84
    :param lng:火星坐标系的经度
    :param lat:火星坐标系纬度
    :return:
    """
    if out_of_china(lng, lat):
        return lng, lat
    dlat = transformlat(lng - 105.0, lat - 35.0)
    dlng = transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [lng * 2 - mglng, lat * 2 - mglat]

def transformlat(lng, lat):
    ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
        0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lat * pi) + 40.0 *
            math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
    ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
            math.sin(lat * pi / 30.0)) * 2.0 / 3.0
    return ret

def transformlng(lng, lat):
    ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
        0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lng * pi) + 40.0 *
            math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
    ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
            math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
    return ret

def out_of_china(lng, lat):
    """
    判断是否在国内，不在国内不做偏移
    :param lng:
    :param lat:
    :return:
    """
    if lng < 72.004 or lng > 137.8347:
        return True
    if lat < 0.8293 or lat > 55.8271:
        return True
    return False

In [6]:
#coding=utf8
import sys

import pandas as pd

obj=pd.read_csv(r"E://businfo.txt",encoding="utf-8",header=0)

obj = obj.sort_values(by=['timestamps']).where(obj['to_next_dist']==-1).dropna()

obj['timestamps'] = [ str(item)[8:10]+":"+str(item)[10:12]+":"+str(item)[12:14] for item in obj['timestamps'] ]

last = None
obj = obj.sort_values(by=['busid','timestamps'])
drops = []
for idx, row in obj.iterrows():
    if last is not None and row["busid"] == last["busid"] and row["direction"] == last["direction"] and row["next_idx"] == last["next_idx"]:
        dt1 = ( 25 if row["timestamps"][0:2] == "00" else int(row["timestamps"][0:2]) ) * 3600 +  int(row["timestamps"][3:5]) * 60
        dt2 = ( 25 if last["timestamps"][0:2] == "00" else int(last["timestamps"][0:2]) ) * 3600 +  int(last["timestamps"][3:5]) * 60
        if dt1 - dt2 < 1800: drops.append(idx)
    last = row 
obj = obj.drop(drops).dropna()

obj['pos'] = list(map(wgs84togcj02,obj['lng'],obj['lat']))
obj['lat'] = [ item[1] for item in obj['pos'] ]
obj['lng'] = [ item[0] for item in obj['pos'] ]
obj = obj.drop(["pos","to_next_dist"],axis=1)

station = {}
for line in open(r"e:\\lastBus\\StationTransAccCode.csv","r",encoding="utf-8"):
    items = line.strip().split(",")
    station[items[2]] = items[1]

check = {"300内":"B000", "运通201":"B001"}
obj["bc"] = [ check[filter(lambda x: x in item, check).__next__()] for item in obj['name'] ]
obj["pointid"] = [ "%05d"%item for item in obj['pointid']]
obj["busid"] = [ "%d"%item for item in obj['busid']]
obj["next_idx"] = [ "%d"%item for item in obj['next_idx']]
obj["direction"] = [ "%d"%item for item in obj['direction']]
obj["bsc"] = list(map(lambda x,y: x+y ,obj["bc"], obj["pointid"]))
obj = obj.drop(["bc","pointid"],axis=1)
obj["stationname"] = [ station[item] for item in obj['bsc']]
obj.to_csv(r"e://businfo.csv")
obj


Unnamed: 0,name,direction,busid,timestamps,lng,lat,next_idx,bsc,stationname
84551,运通201(来广营北-六里桥长途站),1,9239,07:20:03,116.461245,40.023219,1,B00100019,来广营北
84841,运通201(来广营北-六里桥长途站),1,9239,07:22:03,116.464288,40.020516,2,B00100055,来广营路口西
85421,运通201(来广营北-六里桥长途站),1,9239,07:25:47,116.467054,40.013724,3,B00100141,广顺桥南
85915,运通201(来广营北-六里桥长途站),1,9239,07:28:47,116.467447,40.006666,4,B00100202,望京花园西区
86469,运通201(来广营北-六里桥长途站),1,9239,07:32:17,116.462811,39.999442,5,B00100292,南湖东园
86763,运通201(来广营北-六里桥长途站),1,9239,07:34:03,116.462745,39.996200,6,B00100324,南湖南路北口
87028,运通201(来广营北-六里桥长途站),1,9239,07:35:47,116.462989,39.991002,7,B00100370,南湖南路
87394,运通201(来广营北-六里桥长途站),1,9239,07:38:02,116.457991,39.988952,8,B00100414,南湖西里
88297,运通201(来广营北-六里桥长途站),1,9239,07:43:47,116.451889,39.983199,9,B00100526,望京桥西
88499,运通201(来广营北-六里桥长途站),1,9239,07:45:03,116.447222,39.986090,10,B00100570,望和桥东


In [7]:
import datetime
obj['id'] = [ "%d"%(int(item) - 1) for item in obj['next_idx'] ] 
test1 = pd.merge(obj,obj,how='left',left_on=['next_idx','busid'],right_on = ['id','busid']).dropna()

test1['delta'] = (pd.to_datetime(test1['timestamps_y']) - pd.to_datetime(test1['timestamps_x'])).dt.total_seconds()
test1 = test1[ (test1['delta'] < 1800.0) & (test1['delta'] > 0.0) ]
test1['dist'] = [ int(item) for item in map(dist,test1['lat_x'],test1['lng_x'],test1['lat_y'],test1['lng_y'])  ]
detail = test1[["bsc_x","bsc_y","timestamps_x","timestamps_y"]]
test1 = test1[["bsc_x","bsc_y","dist"]].drop_duplicates()
test2 = test1[["dist"]]
test2["bsc_x"] = test1["bsc_y"]
test2["bsc_y"] = test1["bsc_x"]
test3 = pd.concat([test1,test2])
test3.to_csv(r"e:\\lastBus\\distances.csv",index=False)


detail["timestamps_x"] = [ item.strftime("%H:%M:%S") for item in (pd.to_datetime(detail['timestamps_x']) + datetime.timedelta(seconds=30)) ]
detail["timestamps_z"] = [ item.strftime("%H:%M:%S") for item in (pd.to_datetime(detail['timestamps_y']) + datetime.timedelta(seconds=30)) ]
detail["default"] = -1

detail = detail[["bsc_x","bsc_y","timestamps_x","timestamps_z","default","timestamps_y"]]
detail.to_csv(r"e:\\lastBus\\weekday_timetable_allstation0627s.csv",index=False)
detail

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0,bsc_x,bsc_y,timestamps_x,timestamps_z,default,timestamps_y
0,B00100019,B00100055,07:20:33,07:22:33,-1,07:22:03
6,B00100055,B00100141,07:22:33,07:26:17,-1,07:25:47
12,B00100141,B00100202,07:26:17,07:29:17,-1,07:28:47
18,B00100202,B00100292,07:29:17,07:32:47,-1,07:32:17
24,B00100292,B00100324,07:32:47,07:34:33,-1,07:34:03
30,B00100324,B00100370,07:34:33,07:36:17,-1,07:35:47
36,B00100370,B00100414,07:36:17,07:38:32,-1,07:38:02
42,B00100414,B00100526,07:38:32,07:44:17,-1,07:43:47
48,B00100526,B00100570,07:44:17,07:45:33,-1,07:45:03
53,B00100570,B00100653,07:45:33,07:47:47,-1,07:47:17


In [8]:
bsc=obj[["bsc","lat","lng"]].drop_duplicates()
bsc['value'] = 1
dup = pd.merge(bsc,bsc,how='left',on='value')
dup['dist'] = list(map(dist,dup['lat_x'],dup['lng_x'],dup['lat_y'],dup['lng_y']))

dup = dup[(dup['dist'] < 500.0) & (dup['dist'] > 0.01) & (dup['bsc_x'] > dup['bsc_y'])].dropna().drop(["lat_x","lat_y","lng_x","lng_y","value"],axis=1).drop_duplicates()
dup

Unnamed: 0,bsc_x,bsc_y,dist
115,B00100055,B00100019,396.584540
579,B00100324,B00100292,360.285155
811,B00100414,B00100370,482.709713
1275,B00100668,B00100653,166.995508
1507,B00100809,B00100760,479.833435
1951,B00100978,B00003793,397.279075
1971,B00101001,B00100978,292.056698
2066,B00101001,B00003793,109.398930
2180,B00101083,B00003723,51.246634
2294,B00101157,B00003648,17.394937


In [2]:
#coding=utf8

info = {}
nid = 0
for line in open(r"E://businfo.csv","r",encoding='utf8'):
    items = line.strip().split(",")
    if len(items[8]) < 5: continue
    info[items[8]] = (items[9],items[5],items[6], "B%05d"%(nid), items[1],items[2])
    nid += 1

with open(r"E://lastBus//StationTransAccCode.csv","w",encoding="utf8") as f:
    f.write("id,name,acc,lng,lat\n")
    for item in info: f.write(info[item][3]+","+info[item][0]+","+item+","+info[item][1]+","+info[item][2]+"\n")

dic = {"300内(和平东桥-和平东桥)1":"L300_0", "运通201(来广营北-六里桥长途站)1":"L201_1", "运通201(六里桥长途站-来广营北)0": "L201_0"}
with open(r"E://lastBus//StationTransAccCode_new.csv","w",encoding="utf8") as f:
    f.write("id,line,acc\n")
    for item in info: f.write(info[item][3]+","+dic[info[item][4]+info[item][5]]+","+item+"\n")
        
with open(r"E://lastBus//EntryExitTime.csv","w",encoding="utf8") as f:
    for item in info: 
        f.write(item+","+info[item][0]+",90,90,90,进站,高峰,工作日\n")
        f.write(item+","+info[item][0]+",90,90,90,进站,平峰,工作日\n")
        f.write(item+","+info[item][0]+",90,90,90,进站,全天,双休日\n")
        f.write(item+","+info[item][0]+",90,90,90,出站,全天,双休日\n")
        f.write(item+","+info[item][0]+",90,90,90,出站,高峰,工作日\n")
        f.write(item+","+info[item][0]+",90,90,90,出站,平峰,工作日\n")


In [9]:
# 公交地铁换乘
import sys

import pandas as pd

tb=pd.read_csv(r"E://StationTransAccCode.txt",encoding="utf-8",header=0)

obj['value']=1
tb['value']=1

prod = pd.merge(obj,tb,how='left',on='value')
prod['dist'] = list(map(dist,prod['lat_x'],prod['lng_x'],prod['lat_y'],prod['lng_y']))

prod = prod.where(prod['dist'] < 500.0).dropna().drop(["lat_x","lat_y","lng_x","lng_y","value"],axis=1)
prod["acc"] = [ "%d"%item for item in prod['acc']]
#prod.to_csv(r"E://bus_metro.csv")

prod["time"] = [ int(x) for x in prod["dist"] ]
prod["dist"] = prod["time"]
prod = prod[["bsc","acc","time","dist"]].drop_duplicates()
prod

Unnamed: 0,bsc,acc,time,dist
611,B00100055,150998619,230,230
987,B00100141,150998617,340,340
1364,B00100202,150998617,444,444
5728,B00100920,150996261,261,261
5833,B00100920,150997525,261,261
6106,B00100978,150996263,185,185
6483,B00101001,150996263,457,457
7313,B00101157,150997023,370,370
7690,B00101158,150997023,382,382
9239,B00101445,150998295,351,351


In [10]:
# 公交换乘 + 公交地铁换乘
tb=pd.read_csv(r"E://lastBus//StationTransAccCode.csv",encoding="utf-8",header=0)
tb1=pd.read_csv(r"E://lastBus//StationTransAccCode_new.csv",encoding="utf-8",header=0)

prod1 = pd.merge(tb,tb1,how='inner',on='acc')[["acc","name","line","lat","lng"]]
prod1["lineno"] = [ item.split("_")[0] for item in prod1["line"]]
prod1["value"] = 1
prod2 = pd.merge(prod1,prod1,how="left",on="value")

prod2 = prod2[(prod2['name_x'] != prod2['name_y']) & (prod2['lineno_x'] != prod2['lineno_y'])].dropna()

prod2['dist'] = list(map(dist,prod2['lat_x'],prod2['lng_x'],prod2['lat_y'],prod2['lng_y']))

prod2 = prod2[(prod2['dist'] < 500.0) & (prod2['dist'] > 0.01)].dropna().drop(["lat_x","lat_y","lng_x","lng_y","value","lineno_y","lineno_x"],axis=1)
prod2["time"] = [ int(x) for x in prod2["dist"] ]
prod2["dist"] = prod2["time"]

prod2 = prod2[["acc_x","acc_y","time","dist"]].drop_duplicates().rename(index=str, columns={"acc_x": "bsc", "acc_y": "acc"})
prod3 = prod.rename(index=str, columns={"bsc": "acc", "acc": "bsc"})

trans = pd.concat([prod,prod3,prod2])
trans.to_csv(r"E://lastBus//tansdist.csv",index=False)
trans

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0,acc,bsc,dist,time
611,150998619,B00100055,230,230
987,150998617,B00100141,340,340
1364,150998617,B00100202,444,444
5728,150996261,B00100920,261,261
5833,150997525,B00100920,261,261
6106,150996263,B00100978,185,185
6483,150996263,B00101001,457,457
7313,150997023,B00101157,370,370
7690,150997023,B00101158,382,382
9239,150998295,B00101445,351,351


In [2]:
# coding: utf8

f = open(r"C://Users//user//Desktop//competition//pr.csv","w")
for line in open(r"C://Users//user//Desktop//competition//pr20180901.csv","r"):
    items = line.strip().split(",")
    lat, lng = wgs84togcj02(float(items[1]),float(items[0]))
    #print("%.4f,%.4f,2018-09-01 %02d:00:00,"%(float(lat),float(lng),int(items[2])))
    f.write("%.4f,%.4f,2018-09-01 %02d:00:00,%.4f,%s\n"%(float(lat),float(lng),int(items[2]),float(items[3]),items[-1]))
f.close()

In [8]:
# coding: utf8
import collections
import json
check = collections.defaultdict(lambda : {"home":[0.0,0.0],"work":[0.0,0.0]})
for line in open(r"E://BaiduNetdiskDownload//homework.csv","r"):
    items = line.strip().split(",")
    lat0,lng0,types,mobileno = items
    lng, lat = wgs84togcj02(float(lng0),float(lat0))
    check[mobileno][types] = ["%.4f"%(float(lat)),"%.4f"%(float(lng))]
#    f.write("%.4f,%.4f,%s\n"%(float(lat),float(lng),types))
#f.close()

result = collections.defaultdict(lambda : 0)
f = open(r"C://Users//user//Desktop//speech//homework1.csv","w")
for line in check:
    if check[line]["home"][0] + check[line]["home"][1] == 0.0 or check[line]["work"][0] + check[line]["work"][1] == 0.0: continue
    #f.write(line+","+json.dumps(check[line])+"\n")
    name = check[line]["home"][0]+","+check[line]["home"][1]+","+check[line]["work"][0]+","+check[line]["work"][1]
    result[name] += 1
for line in result:
    f.write(line+","+str(result[line])+"\n")
f.close()