In [1]:
# -*- coding: utf-8 -*-
import json
import requests
import math

key = 'your key here'  # 这里填写你的百度开放平台的key
x_pi = 3.14159265358979324 * 3000.0 / 180.0
pi = 3.1415926535897932384626  # π
a = 6378245.0  # 长半轴
ee = 0.00669342162296594323  # 扁率


def dist(lat1, lng1, lat2, lng2):
    dx = lng1 - lng2
    dy = lat1 - lat2
    b = (lat1 + lat2) / 2.0
    Lx = dx * (pi / 180.0) * 6367000.0 * math.cos( b * (pi / 180.0))
    Ly = 6367000.0 * dy * (pi / 180.0)
    return math.sqrt(Lx*Lx+Ly*Ly)

def geocode(address):
    """
    利用百度geocoding服务解析地址获取位置坐标
    :param address:需要解析的地址
    :return:
    """
    geocoding = {'s': 'rsv3',
                 'key': key,
                 'city': '全国',
                 'address': address}
    res = requests.get(
        "http://restapi.amap.com/v3/geocode/geo", params=geocoding)
    if res.status_code == 200:
        json = res.json()
        status = json.get('status')
        count = json.get('count')
        if status == '1' and int(count) >= 1:
            geocodes = json.get('geocodes')[0]
            lng = float(geocodes.get('location').split(',')[0])
            lat = float(geocodes.get('location').split(',')[1])
            return [lng, lat]
        else:
            return None
    else:
        return None

def gcj02tobd09(lng, lat):
    """
    火星坐标系(GCJ-02)转百度坐标系(BD-09)
    谷歌、高德——>百度
    :param lng:火星坐标经度
    :param lat:火星坐标纬度
    :return:
    """
    z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
    theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
    bd_lng = z * math.cos(theta) + 0.0065
    bd_lat = z * math.sin(theta) + 0.006
    return [bd_lng, bd_lat]

def bd09togcj02(bd_lon, bd_lat):
    """
    百度坐标系(BD-09)转火星坐标系(GCJ-02)
    百度——>谷歌、高德
    :param bd_lat:百度坐标纬度
    :param bd_lon:百度坐标经度
    :return:转换后的坐标列表形式
    """
    x = bd_lon - 0.0065
    y = bd_lat - 0.006
    z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
    theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
    gg_lng = z * math.cos(theta)
    gg_lat = z * math.sin(theta)
    return [gg_lng, gg_lat]

def wgs84togcj02(lng, lat):
    """
    WGS84转GCJ02(火星坐标系)
    :param lng:WGS84坐标系的经度
    :param lat:WGS84坐标系的纬度
    :return:
    """
    if out_of_china(lng, lat):  # 判断是否在国内
        return lng, lat
    dlat = transformlat(lng - 105.0, lat - 35.0)
    dlng = transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [mglng, mglat]

def gcj02towgs84(lng, lat):
    """
    GCJ02(火星坐标系)转GPS84
    :param lng:火星坐标系的经度
    :param lat:火星坐标系纬度
    :return:
    """
    if out_of_china(lng, lat):
        return lng, lat
    dlat = transformlat(lng - 105.0, lat - 35.0)
    dlng = transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [lng * 2 - mglng, lat * 2 - mglat]

def transformlat(lng, lat):
    ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
        0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lat * pi) + 40.0 *
            math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
    ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
            math.sin(lat * pi / 30.0)) * 2.0 / 3.0
    return ret

def transformlng(lng, lat):
    ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
        0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lng * pi) + 40.0 *
            math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
    ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
            math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
    return ret

def out_of_china(lng, lat):
    """
    判断是否在国内，不在国内不做偏移
    :param lng:
    :param lat:
    :return:
    """
    if lng < 72.004 or lng > 137.8347:
        return True
    if lat < 0.8293 or lat > 55.8271:
        return True
    return False

In [7]:
#coding=utf8
import sys

import pandas as pd

obj=pd.read_csv(r"E://businfo.txt",encoding="utf-8",header=0)

obj = obj.sort_values(by=['timestamps']).where(obj['to_next_dist']==-1).dropna()

obj['timestamps'] = [ str(item)[8:10]+":"+str(item)[10:12]+":"+str(item)[12:14] for item in obj['timestamps'] ]

last = None
obj = obj.sort_values(by=['busid','timestamps'])
drops = []
for idx, row in obj.iterrows():
    if last is not None and row["busid"] == last["busid"] and row["direction"] == last["direction"] and row["next_idx"] == last["next_idx"]:
        dt1 = ( 25 if row["timestamps"][0:2] == "00" else int(row["timestamps"][0:2]) ) * 3600 +  int(row["timestamps"][3:5]) * 60
        dt2 = ( 25 if last["timestamps"][0:2] == "00" else int(last["timestamps"][0:2]) ) * 3600 +  int(last["timestamps"][3:5]) * 60
        if dt1 - dt2 < 1800: drops.append(idx)
    last = row 
obj = obj.drop(drops).dropna()

obj['pos'] = list(map(wgs84togcj02,obj['lng'],obj['lat']))
obj['lat'] = [ item[1] for item in obj['pos'] ]
obj['lng'] = [ item[0] for item in obj['pos'] ]
obj = obj.drop(["pos","to_next_dist"],axis=1)

station = {}
for line in open(r"e:\\lastBus\\StationTransAccCode.csv","r",encoding="utf-8"):
    items = line.strip().split(",")
    station[items[2]] = items[1]

check = {"300内":"B000", "运通201":"B001"}
obj["bc"] = [ check[filter(lambda x: x in item, check).__next__()] for item in obj['name'] ]
obj["pointid"] = [ "%05d"%item for item in obj['pointid']]
obj["busid"] = [ "%d"%item for item in obj['busid']]
obj["next_idx"] = [ "%d"%item for item in obj['next_idx']]
obj["direction"] = [ "%d"%item for item in obj['direction']]
obj["bsc"] = list(map(lambda x,y: x+y ,obj["bc"], obj["pointid"]))
obj = obj.drop(["bc","pointid"],axis=1)
obj["stationname"] = [ station[item] for item in obj['bsc']]
obj.to_csv(r"e://businfo.csv")
obj


Unnamed: 0,name,direction,busid,timestamps,lng,lat,next_idx,bsc,stationname
84551,运通201(来广营北-六里桥长途站),1,9239,07:20:03,116.461245,40.023219,1,B00100019,来广营北
84841,运通201(来广营北-六里桥长途站),1,9239,07:22:03,116.464288,40.020516,2,B00100055,来广营路口西
85421,运通201(来广营北-六里桥长途站),1,9239,07:25:47,116.467054,40.013724,3,B00100141,广顺桥南
85915,运通201(来广营北-六里桥长途站),1,9239,07:28:47,116.467447,40.006666,4,B00100202,望京花园西区
86469,运通201(来广营北-六里桥长途站),1,9239,07:32:17,116.462811,39.999442,5,B00100292,南湖东园
86763,运通201(来广营北-六里桥长途站),1,9239,07:34:03,116.462745,39.996200,6,B00100324,南湖南路北口
87028,运通201(来广营北-六里桥长途站),1,9239,07:35:47,116.462989,39.991002,7,B00100370,南湖南路
87394,运通201(来广营北-六里桥长途站),1,9239,07:38:02,116.457991,39.988952,8,B00100414,南湖西里
88297,运通201(来广营北-六里桥长途站),1,9239,07:43:47,116.451889,39.983199,9,B00100526,望京桥西
88499,运通201(来广营北-六里桥长途站),1,9239,07:45:03,116.447222,39.986090,10,B00100570,望和桥东


In [13]:
import datetime
obj['id'] = [ "%d"%(int(item) - 1) for item in obj['next_idx'] ] 
test1 = pd.merge(obj,obj,how='left',left_on=['next_idx','busid'],right_on = ['id','busid']).dropna()

test1['delta'] = (pd.to_datetime(test1['timestamps_y']) - pd.to_datetime(test1['timestamps_x'])).dt.total_seconds()
test1 = test1[ (test1['delta'] < 1800.0) & (test1['delta'] > 0.0) ]
test1['dist'] = [ int(item) for item in map(dist,test1['lat_x'],test1['lng_x'],test1['lat_y'],test1['lng_y'])  ]
detail = test1[["bsc_x","bsc_y","timestamps_x","timestamps_y"]]
test1 = test1[["bsc_x","bsc_y","dist"]].drop_duplicates()
test2 = test1[["dist"]]
test2["bsc_x"] = test1["bsc_y"]
test2["bsc_y"] = test1["bsc_x"]
test3 = pd.concat([test1,test2])
test3.to_csv(r"e:\\lastBus\\distances.csv",index=False)


detail["timestamps_x"] = [ item.strftime("%H:%M:%S") for item in (pd.to_datetime(detail['timestamps_x']) + datetime.timedelta(seconds=30)) ]
detail["timestamps_z"] = [ item.strftime("%H:%M:%S") for item in (pd.to_datetime(detail['timestamps_y']) + datetime.timedelta(seconds=30)) ]
detail["default"] = -1

detail = detail[["bsc_x","bsc_y","timestamps_x","timestamps_z","default","timestamps_y"]]
detail.to_csv(r"e:\\lastBus\\weekday_timetable_allstation0627s.csv",index=False)
detail

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  del sys.path[0]


Unnamed: 0,bsc_x,bsc_y,timestamps_x,timestamps_z,default,timestamps_y
0,B00100019,B00100055,07:20:33,07:22:33,-1,07:22:03
6,B00100055,B00100141,07:22:33,07:26:17,-1,07:25:47
12,B00100141,B00100202,07:26:17,07:29:17,-1,07:28:47
18,B00100202,B00100292,07:29:17,07:32:47,-1,07:32:17
24,B00100292,B00100324,07:32:47,07:34:33,-1,07:34:03
30,B00100324,B00100370,07:34:33,07:36:17,-1,07:35:47
36,B00100370,B00100414,07:36:17,07:38:32,-1,07:38:02
42,B00100414,B00100526,07:38:32,07:44:17,-1,07:43:47
48,B00100526,B00100570,07:44:17,07:45:33,-1,07:45:03
53,B00100570,B00100653,07:45:33,07:47:47,-1,07:47:17


In [89]:
bsc=obj[["bsc","lat","lng"]].drop_duplicates()
bsc['value'] = 1
dup = pd.merge(bsc,bsc,how='left',on='value')
dup['dist'] = list(map(dist,dup['lat_x'],dup['lng_x'],dup['lat_y'],dup['lng_y']))

dup = dup[(dup['dist'] < 500.0) & (dup['dist'] > 0.01) & (dup['bsc_x'] > dup['bsc_y'])].dropna().drop(["lat_x","lat_y","lng_x","lng_y","value"],axis=1).drop_duplicates()
dup

Unnamed: 0,bsc_x,bsc_y,dist
115,B00100055,B00100019,396.584540
579,B00100324,B00100292,360.285155
811,B00100414,B00100370,482.709713
1275,B00100668,B00100653,166.995508
1507,B00100809,B00100760,479.833435
1951,B00100978,B00003793,397.279075
1971,B00101001,B00100978,292.056698
2066,B00101001,B00003793,109.398930
2180,B00101083,B00003723,51.246634
2294,B00101157,B00003648,17.394937


In [90]:
#coding=utf8

info = {}
nid = 0
for line in open(r"E://businfo.csv","r",encoding='utf8'):
    items = line.strip().split(",")
    if len(items[8]) < 5: continue
    print(items)
    info[items[8]] = (items[9],items[5],items[6], "B%05d"%(nid), items[1],items[2])
    nid += 1

with open(r"E://lastBus//StationTransAccCode.csv","w",encoding="utf8") as f:
    f.write("id,name,acc,lng,lat\n")
    for item in info: f.write(info[item][3]+","+info[item][0]+","+item+","+info[item][1]+","+info[item][2]+"\n")

dic = {"300内(和平东桥-和平东桥)1":"L300_0", "运通201(来广营北-六里桥长途站)1":"L201_1", "运通201(六里桥长途站-来广营北)0": "L201_0"}
with open(r"E://lastBus//StationTransAccCode_new.csv","w",encoding="utf8") as f:
    f.write("id,line,acc\n")
    for item in info: f.write(info[item][3]+","+dic[info[item][4]+info[item][5]]+","+item+"\n")
        
with open(r"E://lastBus//EntryExitTime.csv","w",encoding="utf8") as f:
    for item in info: 
        f.write(item+","+info[item][0]+",90,90,90,进站,高峰,工作日\n")
        f.write(item+","+info[item][0]+",90,90,90,进站,平峰,工作日\n")
        f.write(item+","+info[item][0]+",90,90,90,进站,全天,双休日\n")
        f.write(item+","+info[item][0]+",90,90,90,出站,全天,双休日\n")
        f.write(item+","+info[item][0]+",90,90,90,出站,高峰,工作日\n")
        f.write(item+","+info[item][0]+",90,90,90,出站,平峰,工作日\n")

print(info)


['84551', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:20:03', '116.46124524277408', '40.02321887430823', '1', 'B00100019', '来广营北']
['84841', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:22:03', '116.46428776467268', '40.02051559903892', '2', 'B00100055', '来广营路口西']
['85421', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:25:47', '116.4670537675165', '40.01372444597477', '3', 'B00100141', '广顺桥南']
['85915', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:28:47', '116.46744706058386', '40.00666614657366', '4', 'B00100202', '望京花园西区']
['86469', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:32:17', '116.46281050303983', '39.99944194728712', '5', 'B00100292', '南湖东园']
['86763', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:34:03', '116.46274511204972', '39.99620017649198', '6', 'B00100324', '南湖南路北口']
['87028', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:35:47', '116.46298912607668', '39.991002075811075', '7', 'B00100370', '南湖南路']
['87394', '运通201(来广营北-六里桥长途站)', '1', '9239', '07:38:02', '116.45799059111472', '39.98895204277853', '8', 'B00100

['33481', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:39:36', '116.43699576190475', '39.988906455328866', '29', 'B00101882', '望和桥']
['33967', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:42:05', '116.44755315437216', '39.98535545337916', '30', 'B00101975', '望和桥东']
['34209', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:43:20', '116.45225004405451', '39.982405011846474', '31', 'B00102014', '望京桥西']
['35173', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:48:21', '116.45794364810318', '39.988946110841056', '32', 'B00102127', '南湖西里']
['35712', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:51:05', '116.46310330738956', '39.990818569323615', '33', 'B00102171', '南湖南路']
['36042', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:52:50', '116.46290154705966', '39.99528083404576', '34', 'B00102211', '南湖南路北口']
['36472', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:55:05', '116.46289720888377', '39.99944077124525', '35', 'B00102250', '南湖东园']
['37252', '运通201(六里桥长途站-来广营北)', '0', '9243', '17:59:25', '116.46776598533127', '40.00649379075296', '36', 

['82140', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:03:10', '116.3489368219336', '39.967861472904545', '25', 'B00101445', '蓟门桥西']
['82313', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:04:25', '116.34472291143608', '39.96781974298145', '26', 'B00101472', '地铁大钟寺站']
['82549', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:06:10', '116.3375063414877', '39.96776429413776', '27', 'B00101516', '大钟寺']
['83157', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:10:25', '116.32335553438197', '39.96731782691385', '28', 'B00101609', '四通桥东']
['83749', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:14:25', '116.31959128873137', '39.9666636006081', '29', 'B00101638', '四通桥西']
['83948', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:15:55', '116.31411829035157', '39.964671839597315', '30', 'B00101678', '三义庙']
['84299', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:18:25', '116.3085106846964', '39.958781382300515', '31', 'B00101763', '苏州桥南']
['84634', '运通201(来广营北-六里桥长途站)', '1', '9245', '07:20:40', '116.30909982167262', '39.954751678999216', '32', 'B001

['54162', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:27:04', '116.37266129143624', '39.967694656459706', '18', 'B00101268', '马甸桥西']
['54722', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:30:19', '116.38535542173534', '39.968064873287126', '19', 'B00101357', '马甸桥东']
['54937', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:31:34', '116.39060561316296', '39.96820056434268', '20', 'B00101392', '安华桥西']
['55333', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:34:04', '116.40088265525921', '39.96851166037775', '21', 'B00101461', '安贞桥西']
['55856', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:37:19', '116.41264672399295', '39.96895075565202', '22', 'B00101541', '安贞桥东']
['56020', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:38:19', '116.41799971257637', '39.97016282523233', '23', 'B00101589', '和平西桥北']
['56218', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:39:34', '116.41780443374039', '39.9745064575944', '24', 'B00101627', '樱花园西街']
['56719', '运通201(六里桥长途站-来广营北)', '0', '9247', '19:42:49', '116.41753617634292', '39.98206759796555', '25', 'B

['28286', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:09:12', '116.45799059111472', '39.98895204277853', '8', 'B00100414', '南湖西里']
['29048', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:13:42', '116.45188895947389', '39.98319922209448', '9', 'B00100526', '望京桥西']
['29247', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:14:57', '116.44722219217756', '39.98609006406184', '10', 'B00100570', '望和桥东']
['29632', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:17:27', '116.43785049403877', '39.98936778335244', '11', 'B00100653', '望和桥']
['29667', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:17:42', '116.43589492022166', '39.989483757782615', '12', 'B00100668', '育慧里']
['30241', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:21:12', '116.42407585886332', '39.98767238149227', '13', 'B00100760', '惠新东桥南']
['30665', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:23:42', '116.42276022437322', '39.98347373992585', '14', 'B00100809', '对外经贸大学']
['31391', '运通201(来广营北-六里桥长途站)', '1', '9250', '17:27:57', '116.41729890713411', '39.98165217204218', '15', 'B001

['116705', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:35:21', '116.37266129143624', '39.967694656459706', '18', 'B00101268', '马甸桥西']
['117104', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:38:05', '116.38535542173534', '39.968064873287126', '19', 'B00101357', '马甸桥东']
['117272', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:39:05', '116.39060561316296', '39.96820056434268', '20', 'B00101392', '安华桥西']
['117676', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:41:51', '116.40088265525921', '39.96851166037775', '21', 'B00101461', '安贞桥西']
['118051', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:44:20', '116.41264672399295', '39.96895075565202', '22', 'B00101541', '安贞桥东']
['118221', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:45:35', '116.41799971257637', '39.97016282523233', '23', 'B00101589', '和平西桥北']
['118394', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:46:51', '116.41780443374039', '39.9745064575944', '24', 'B00101627', '樱花园西街']
['118833', '运通201(六里桥长途站-来广营北)', '0', '9254', '10:49:51', '116.41753617634292', '39.98206759796555', 

['97030', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:33:05', '116.3100682974153', '39.94844395392241', '8', 'B00100708', '万寿寺']
['97480', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:35:36', '116.30952705586154', '39.95468316425629', '9', 'B00100763', '为公桥']
['98032', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:38:35', '116.3088095480562', '39.95994740655355', '10', 'B00100811', '苏州桥南']
['98390', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:40:35', '116.31294272694451', '39.963748324103065', '11', 'B00100866', '三义庙']
['99008', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:44:05', '116.32361640009418', '39.96689828833889', '12', 'B00100943', '四通桥东']
['99827', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:48:51', '116.33764024869768', '39.96737095455672', '13', 'B00101036', '大钟寺']
['100376', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:51:52', '116.34469608983986', '39.967414106801144', '14', 'B00101082', '地铁大钟寺站']
['100659', '运通201(六里桥长途站-来广营北)', '0', '9257', '08:53:37', '116.34977439137964', '39.96744860632516', '15', 'B00101

['110548', '运通201(来广营北-六里桥长途站)', '1', '9261', '09:54:21', '116.3675343514958', '39.96801900319591', '23', 'B00101311', '北太平桥西']
['110781', '运通201(来广营北-六里桥长途站)', '1', '9261', '09:55:51', '116.35943365836467', '39.967863238685375', '24', 'B00101363', '蓟门桥东']
['111064', '运通201(来广营北-六里桥长途站)', '1', '9261', '09:57:36', '116.3489368219336', '39.967861472904545', '25', 'B00101445', '蓟门桥西']
['111243', '运通201(来广营北-六里桥长途站)', '1', '9261', '09:58:51', '116.34472291143608', '39.96781974298145', '26', 'B00101472', '地铁大钟寺站']
['111469', '运通201(来广营北-六里桥长途站)', '1', '9261', '10:00:21', '116.3375063414877', '39.96776429413776', '27', 'B00101516', '大钟寺']
['112295', '运通201(来广营北-六里桥长途站)', '1', '9261', '10:05:36', '116.32335553438197', '39.96731782691385', '28', 'B00101609', '四通桥东']
['112448', '运通201(来广营北-六里桥长途站)', '1', '9261', '10:06:36', '116.31959128873137', '39.9666636006081', '29', 'B00101638', '四通桥西']
['112712', '运通201(来广营北-六里桥长途站)', '1', '9261', '10:08:06', '116.31411829035157', '39.964671839597315', '3

['39344', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:09:47', '116.37266129143624', '39.967694656459706', '18', 'B00101268', '马甸桥西']
['42692', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:27:17', '116.38535542173534', '39.968064873287126', '19', 'B00101357', '马甸桥东']
['43565', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:31:48', '116.39060561316296', '39.96820056434268', '20', 'B00101392', '安华桥西']
['44192', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:35:03', '116.40088265525921', '39.96851166037775', '21', 'B00101461', '安贞桥西']
['46081', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:44:18', '116.41799971257637', '39.97016282523233', '23', 'B00101589', '和平西桥北']
['46330', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:45:33', '116.41780443374039', '39.9745064575944', '24', 'B00101627', '樱花园西街']
['47143', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:49:33', '116.41753617634292', '39.98206759796555', '25', 'B00101693', '慧新苑']
['47718', '运通201(六里桥长途站-来广营北)', '0', '9264', '18:52:33', '116.42125380738341', '39.98861978998388', '26', 'B0

['23639', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:41:19', '116.4008778600314', '39.96895347624063', '19', 'B00101083', '安贞桥西']
['23975', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:43:19', '116.39029947118512', '39.968483573280004', '20', 'B00101157', '安华桥西']
['24409', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:46:04', '116.37660341956867', '39.968267577567', '22', 'B00101250', '马甸桥西']
['24760', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:48:19', '116.3675343514958', '39.96801900319591', '23', 'B00101311', '北太平桥西']
['25064', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:50:16', '116.35943365836467', '39.967863238685375', '24', 'B00101363', '蓟门桥东']
['25358', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:52:01', '116.3489368219336', '39.967861472904545', '25', 'B00101445', '蓟门桥西']
['25617', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:53:31', '116.34472291143608', '39.96781974298145', '26', 'B00101472', '地铁大钟寺站']
['25940', '运通201(来广营北-六里桥长途站)', '1', '9267', '16:55:16', '116.3375063414877', '39.96776429413776', '27', 'B001

['81710', '300内(和平东桥-和平东桥)', '1', '15022', '07:00:01', '116.4436034318584', '39.9648481856714', '3', 'B00000155', '静安庄']
['82205', '300内(和平东桥-和平东桥)', '1', '15022', '07:03:28', '116.45693537284677', '39.956057157728104', '4', 'B00000280', '三元桥']
['82513', '300内(和平东桥-和平东桥)', '1', '15022', '07:05:46', '116.46166696263626', '39.94435206264969', '5', 'B00000401', '亮马桥']
['83862', '300内(和平东桥-和平东桥)', '1', '15022', '07:15:19', '116.4613981831897', '39.89647365423946', '6', 'B00000842', '双井桥北']
['84058', '300内(和平东桥-和平东桥)', '1', '15022', '07:16:34', '116.46128927078922', '39.889492456797385', '7', 'B00000904', '双井桥南']
['84394', '300内(和平东桥-和平东桥)', '1', '15022', '07:18:58', '116.46111097954491', '39.87843052605122', '8', 'B00001007', '潘家园桥北']
['84759', '300内(和平东桥-和平东桥)', '1', '15022', '07:21:34', '116.46074839698609', '39.86860011131977', '9', 'B00001096', '十里河桥北']
['85356', '300内(和平东桥-和平东桥)', '1', '15022', '07:25:19', '116.44366872462847', '39.858974781823946', '10', 'B00001254', '方庄桥东']
['85653'

['1429', '300内(和平东桥-和平东桥)', '1', '15032', '14:04:58', '116.31103716221223', '39.881211145221066', '22', 'B00002385', '六里桥南']
['1530', '300内(和平东桥-和平东桥)', '1', '15032', '14:06:36', '116.31043891504405', '39.89058642522689', '23', 'B00002471', '六里桥北里']
['1995', '300内(和平东桥-和平东桥)', '1', '15032', '14:10:36', '116.31040339666202', '39.90278776116995', '24', 'B00002578', '公主坟南']
['2757', '300内(和平东桥-和平东桥)', '1', '15032', '14:16:54', '116.31019613546407', '39.924001102678886', '25', 'B00002762', '航天桥']
['3157', '300内(和平东桥-和平东桥)', '1', '15032', '14:20:24', '116.31020947567521', '39.93540184703448', '26', 'B00002861', '花园桥北']
['3380', '300内(和平东桥-和平东桥)', '1', '15032', '14:21:51', '116.3102350140837', '39.94162902800898', '27', 'B00002915', '紫竹桥']
['3541', '300内(和平东桥-和平东桥)', '1', '15032', '14:23:12', '116.30995410752953', '39.9473611680378', '28', 'B00002970', '万寿寺']
['4742', '300内(和平东桥-和平东桥)', '1', '15032', '14:32:46', '116.34950602788382', '39.96757140194019', '30', 'B00003378', '蓟门桥西']
['4862', '

['34647', '300内(和平东桥-和平东桥)', '1', '24574', '17:45:31', '116.31020947567521', '39.93540184703448', '26', 'B00002861', '花园桥北']
['34960', '300内(和平东桥-和平东桥)', '1', '24574', '17:47:02', '116.3102350140837', '39.94162902800898', '27', 'B00002915', '紫竹桥']
['35423', '300内(和平东桥-和平东桥)', '1', '24574', '17:49:08', '116.30995410752953', '39.9473611680378', '28', 'B00002970', '万寿寺']
['36075', '300内(和平东桥-和平东桥)', '1', '24574', '17:52:47', '116.3163489492008', '39.96516099258407', '29', 'B00003156', '四通桥西']
['37078', '300内(和平东桥-和平东桥)', '1', '24574', '17:57:53', '116.34950602788382', '39.96757140194019', '30', 'B00003378', '蓟门桥西']
['37746', '300内(和平东桥-和平东桥)', '1', '24574', '18:01:18', '116.36627631639648', '39.96770973700944', '31', 'B00003486', '北太平桥西']
['39458', '300内(和平东桥-和平东桥)', '1', '24574', '18:10:03', '116.38366864585127', '39.96815507069914', '32', 'B00003602', '马甸桥东']
['40444', '300内(和平东桥-和平东桥)', '1', '24574', '18:15:10', '116.390335142988', '39.96832944443238', '33', 'B00003648', '安华桥西']
['4114

['58088', '300内(和平东桥-和平东桥)', '1', '24581', '19:52:33', '116.31040339666202', '39.90278776116995', '24', 'B00002578', '公主坟南']
['58664', '300内(和平东桥-和平东桥)', '1', '24581', '19:56:33', '116.31019613546407', '39.924001102678886', '25', 'B00002762', '航天桥']
['58938', '300内(和平东桥-和平东桥)', '1', '24581', '19:58:33', '116.31020947567521', '39.93540184703448', '26', 'B00002861', '花园桥北']
['59119', '300内(和平东桥-和平东桥)', '1', '24581', '20:00:03', '116.3102350140837', '39.94162902800898', '27', 'B00002915', '紫竹桥']
['59273', '300内(和平东桥-和平东桥)', '1', '24581', '20:01:03', '116.30995410752953', '39.9473611680378', '28', 'B00002970', '万寿寺']
['59703', '300内(和平东桥-和平东桥)', '1', '24581', '20:04:34', '116.3163489492008', '39.96516099258407', '29', 'B00003156', '四通桥西']
['60405', '300内(和平东桥-和平东桥)', '1', '24581', '20:09:49', '116.34950602788382', '39.96757140194019', '30', 'B00003378', '蓟门桥西']
['60680', '300内(和平东桥-和平东桥)', '1', '24581', '20:12:04', '116.36627631639648', '39.96770973700944', '31', 'B00003486', '北太平桥西']
['61

['75506', '300内(和平东桥-和平东桥)', '1', '24586', '05:36:51', '116.4115120970729', '39.968911707840796', '35', 'B00003793', '安贞桥东']
['75542', '300内(和平东桥-和平东桥)', '1', '24586', '05:38:39', '116.42226424699872', '39.969077795510344', '36', 'B00003864', '和平东桥']
['75555', '300内(和平东桥-和平东桥)', '1', '24586', '05:39:21', '116.42347655963464', '39.96910733373253', '1', 'B00000002', '和平东桥']
['75592', '300内(和平东桥-和平东桥)', '1', '24586', '05:41:06', '116.43613296764381', '39.96931081073993', '2', 'B00000092', '西坝河']
['75633', '300内(和平东桥-和平东桥)', '1', '24586', '05:42:54', '116.4436034318584', '39.9648481856714', '3', 'B00000155', '静安庄']
['75695', '300内(和平东桥-和平东桥)', '1', '24586', '05:45:40', '116.45693537284677', '39.956057157728104', '4', 'B00000280', '三元桥']
['75752', '300内(和平东桥-和平东桥)', '1', '24586', '05:48:10', '116.46166696263626', '39.94435206264969', '5', 'B00000401', '亮马桥']
['75973', '300内(和平东桥-和平东桥)', '1', '24586', '05:55:41', '116.4613981831897', '39.89647365423946', '6', 'B00000842', '双井桥北']
['76030', '

['46472', '300内(和平东桥-和平东桥)', '1', '24615', '18:46:10', '116.38283418425812', '39.85716946472369', '16', 'B00001682', '洋桥西']
['47225', '300内(和平东桥-和平东桥)', '1', '24615', '18:49:55', '116.35965413120763', '39.8535593573031', '17', 'B00001838', '草桥']
['47906', '300内(和平东桥-和平东桥)', '1', '24615', '18:53:23', '116.33842704253796', '39.848982874592096', '18', 'B00001988', '玉泉营桥西']
['48141', '300内(和平东桥-和平东桥)', '1', '24615', '18:54:41', '116.326581912608', '39.84974415295869', '19', 'B00002068', '夏家胡同']
['48883', '300内(和平东桥-和平东桥)', '1', '24615', '18:58:38', '116.31258788099778', '39.86737860110798', '20', 'B00002262', '丽泽桥']
['49227', '300内(和平东桥-和平东桥)', '1', '24615', '19:00:38', '116.31177726184711', '39.874639276817', '21', 'B00002328', '西局']
['49419', '300内(和平东桥-和平东桥)', '1', '24615', '19:01:42', '116.31103716221223', '39.881211145221066', '22', 'B00002385', '六里桥南']
['49807', '300内(和平东桥-和平东桥)', '1', '24615', '19:03:42', '116.31043891504405', '39.89058642522689', '23', 'B00002471', '六里桥北里']
['50406

In [91]:
# 公交地铁换乘
import sys

import pandas as pd

tb=pd.read_csv(r"E://StationTransAccCode.txt",encoding="utf-8",header=0)

obj['value']=1
tb['value']=1

prod = pd.merge(obj,tb,how='left',on='value')
prod['dist'] = list(map(dist,prod['lat_x'],prod['lng_x'],prod['lat_y'],prod['lng_y']))

prod = prod.where(prod['dist'] < 500.0).dropna().drop(["lat_x","lat_y","lng_x","lng_y","value"],axis=1)
prod["acc"] = [ "%d"%item for item in prod['acc']]
#prod.to_csv(r"E://bus_metro.csv")

prod["time"] = [ int(x) for x in prod["dist"] ]
prod["dist"] = prod["time"]
prod = prod[["bsc","acc","time","dist"]].drop_duplicates()
prod


KeyboardInterrupt



In [85]:
# 公交换乘 + 公交地铁换乘
tb=pd.read_csv(r"E://lastBus//StationTransAccCode.csv",encoding="utf-8",header=0)
tb1=pd.read_csv(r"E://lastBus//StationTransAccCode_new.csv",encoding="utf-8",header=0)

prod1 = pd.merge(tb,tb1,how='inner',on='acc')[["acc","name","line","lat","lng"]]
prod1["lineno"] = [ item.split("_")[0] for item in prod1["line"]]
prod1["value"] = 1
prod2 = pd.merge(prod1,prod1,how="left",on="value")

prod2 = prod2[(prod2['name_x'] != prod2['name_y']) & (prod2['lineno_x'] != prod2['lineno_y'])].dropna()

prod2['dist'] = list(map(dist,prod2['lat_x'],prod2['lng_x'],prod2['lat_y'],prod2['lng_y']))

prod2 = prod2[(prod2['dist'] < 500.0) & (prod2['dist'] > 0.01)].dropna().drop(["lat_x","lat_y","lng_x","lng_y","value","lineno_y","lineno_x"],axis=1)
prod2["time"] = [ int(x) for x in prod2["dist"] ]
prod2["dist"] = prod2["time"]

prod2 = prod2[["acc_x","acc_y","time","dist"]].drop_duplicates().rename(index=str, columns={"acc_x": "bsc", "acc_y": "acc"})
prod3 = prod.rename(index=str, columns={"bsc": "acc", "acc": "bsc"})

trans = pd.concat([prod,prod3,prod2])
trans.to_csv(r"E://lastBus//tansdist.csv",index=False)
trans

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0,acc,bsc,dist,time
611,150998619,B00100055,230,230
987,150998617,B00100141,340,340
1364,150998617,B00100202,444,444
5728,150996261,B00100920,261,261
5833,150997525,B00100920,261,261
6106,150996263,B00100978,185,185
6483,150996263,B00101001,457,457
7313,150997023,B00101157,370,370
7690,150997023,B00101158,382,382
9239,150998295,B00101445,351,351


In [2]:
# coding: utf8

f = open(r"C://Users//user//Desktop//competition//pr.csv","w")
for line in open(r"C://Users//user//Desktop//competition//pr20180901.csv","r"):
    items = line.strip().split(",")
    lat, lng = wgs84togcj02(float(items[1]),float(items[0]))
    #print("%.4f,%.4f,2018-09-01 %02d:00:00,"%(float(lat),float(lng),int(items[2])))
    f.write("%.4f,%.4f,2018-09-01 %02d:00:00,%.4f,%s\n"%(float(lat),float(lng),int(items[2]),float(items[3]),items[-1]))
f.close()

In [8]:
# coding: utf8
import collections
import json
check = collections.defaultdict(lambda : {"home":[0.0,0.0],"work":[0.0,0.0]})
for line in open(r"E://BaiduNetdiskDownload//homework.csv","r"):
    items = line.strip().split(",")
    lat0,lng0,types,mobileno = items
    lng, lat = wgs84togcj02(float(lng0),float(lat0))
    check[mobileno][types] = ["%.4f"%(float(lat)),"%.4f"%(float(lng))]
#    f.write("%.4f,%.4f,%s\n"%(float(lat),float(lng),types))
#f.close()

result = collections.defaultdict(lambda : 0)
f = open(r"C://Users//user//Desktop//speech//homework1.csv","w")
for line in check:
    if check[line]["home"][0] + check[line]["home"][1] == 0.0 or check[line]["work"][0] + check[line]["work"][1] == 0.0: continue
    #f.write(line+","+json.dumps(check[line])+"\n")
    name = check[line]["home"][0]+","+check[line]["home"][1]+","+check[line]["work"][0]+","+check[line]["work"][1]
    result[name] += 1
for line in result:
    f.write(line+","+str(result[line])+"\n")
f.close()