In [98]:
import loadexcel as lc
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from data_convert import*
from tqdm import tqdm
import feedback_option
import loaddb as ld
import os

class UnMatchError(Exception):
    def __init__(self,ErrorInfo):
        super().__init__(self) #初始化父类
        self.errorinfo=ErrorInfo
    def __str__(self):
        return self.errorinfo

class DataManager():
    
    def __init__(self,date,path):
        self._date=date
        self._path=path
        self.update_p_index()
        #读取车辆信息
        self._car_value=ld.get_car_info()
    def getPath(self):
        return self._path
    
    def get_data(self):
        return self._data
    
    def update_path(self,new_path):
        self._path=new_path
        self.update_p_index()
        
    def update_p_index(self):
        self.p_index=len(os.path.split(self._path)[0].split("//"))+1
        
    def load_path(self,path):
        self._path=path
        self.update_p_index()
        self.load()
    
    def get_Sum(self):
        return self._pd_sum
    
    def get_detail(self):
        return self._pd_detail
    
    #加载数据，如果有异常项，判断加载失败并重置    
    def load(self):
        result_=lc.excel2Dataframe(self._path,target_by="车号",date_=self._date)
        sum_,detail_=[],[]
        self._pd_sum=[]
        self._pd_detail=[]
        for item in result_:
            sum_+=item[0]
            detail_+=item[1]
        for item in tqdm(sum_):
            self._pd_sum.append(lc.excel2Dataframe_Sum(item["filepath"],self.p_index,item["sheetname"],"车号"))
        for item in tqdm(detail_):
            self._pd_detail.append(lc.excel2Dataframe_Detail(item["filepath"],self.p_index,item["sheetname"],"车号"))
        self._pd_sum=pd.concat(self._pd_sum)
        self._pd_detail=pd.concat(self._pd_detail)
        if self.clean_data(self._pd_sum)==-1:
            self._pd_sum=[]
            self._pd_detail=[]
            return
        if self.clean_data(self._pd_detail)==-1:
            self._pd_sum=[]
            self._pd_detail=[]
            return
        
    #获取不匹配的数据    
    def get_dismatchs(self,df_source):
        #连接数据与车辆信息
        result_data=pd.merge(df_source,self._car_value,left_on='car_id',right_on='sub_car_id',how='left')
        #找出无法匹配的车辆
        outer_result=result_data[pd.isnull(result_data['sub_car_id'])]
        dismacths=outer_result[["route_x","team","car_id_x"]]
        return dismacths
    
    #过滤一些系统能够识别的数据
    def filter_data(self,df_source,dismacths):
        for i in range(len(dismacths)) :
            _car=dismacths.iloc[i]
            car_id_=_car["car_id_x"]
            route_=_car["route_x"]
            _r=self._car_value[self._car_value["route"]==route_]
            result_r=_r[_r["sub_car_id"].str.contains(car_id_)]
            if len(result_r)==1:
                dismacths.iloc[i]["car_id_x"]=result_r["sub_car_id"].iloc[0]
        df_source.iloc[dismacths.index,0]=dismacths['car_id_x']
        return df_source
        
    #清理
    def clean_data(self,df_source):
        #去掉路
        remove_route=lambda x:x.split("路")[-1].strip()
        #去掉线
        remove_line=lambda x:x.split("线")[-1].strip()
        #去掉斜杆
        remove_diagonal=lambda x:x.split("/")[-1].strip()
        #将F改成正常拼写
        replaceF=lambda x:x.replace("Ｆ","F")
        #如果以F和D结尾，则补齐6位
        zfill_ = lambda x: x.zfill(6) if x.endswith("F")|x.endswith("D") else x
        #将所选列转为字符
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(str)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(remove_route)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(remove_line)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(remove_diagonal)
        #将所选列转为剔除空格
        df_source.iloc[:,0]=df_source.iloc[:,0].str.strip()
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(replaceF)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(zfill_)
        df_source.index=range(len(df_source))
        dismatchs=self.get_dismatchs(df_source)
        fd=self.filter_data(df_source,dismatchs)
        dismatchs=self.get_dismatchs(fd)
        if len(dismatchs)>0:
            try:
                raise UnMatchError("有数据无法匹配：")
            except UnMatchError as e:
                print(e)
                for i in range(len(dismatchs)):
                    item=dismatchs.iloc[i]
                    print(item['route_x']+"线路下的"+item['car_id_x']+"无法识别")
                    print(f"合计%d个项无法识别，请修正后重试"%(len(dismatchs)))
            return -1
        if len(dismatchs)==0:
            return 0
            
        
        
        

In [99]:
import datetime

In [100]:
dm=DataManager(datetime.date(2019,8,1),"example_data//feedback//8月原始")

In [101]:
dm.load_path("example_data//feedback//8月原始")

100%|██████████████████████████████████████████████████████████████████████████████████| 53/53 [00:04<00:00, 11.15it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 58/58 [00:05<00:00, 11.46it/s]


In [108]:
len(dm.get_Sum())

657

In [117]:
result_data=pd.merge(dm.get_Sum(),dm.get_detail(),on=['car_id','route'],how='left')

In [118]:
result_data

Unnamed: 0,car_id,mileage,oil_cost,maintain,follow,inspection,route,team_x,fix_days,stop_days,work_days,engage_mileage,public_mileage,shunt_mileage,fault_times,fault_minutes,team_y
0,B723,6273.6,995.71,5.0,0.0,0.0,501,1,0.0,2.0,29.0,0.0,0.0,371.6,0.0,0.0,1
1,B728,3829.6,591.58,5.0,0.0,0.0,501,1,0.0,14.0,17.0,0.0,0.0,228.6,1.0,104.0,1
2,B835,6925.2,980.76,0.0,0.0,0.0,501,1,0.0,1.0,30.0,0.0,0.0,386.2,1.0,36.0,1
3,B972,5940.9,863.56,0.0,0.0,0.0,501,1,0.0,0.0,31.0,0.0,0.0,1689.9,1.0,93.0,1
4,00519F,5857.1,1868.83,0.0,0.0,0.0,k2,1,0.0,4.0,27.0,0.0,0.0,357.1,5.0,1821.0,1
5,00568F,6683.4,1833.99,5.0,0.0,0.0,k2,1,0.0,2.0,29.0,0.0,0.0,563.4,1.0,227.0,1
6,01100F,5768.6,1560.72,0.0,0.0,0.0,k2,1,0.0,3.0,28.0,0.0,0.0,568.6,1.0,198.0,1
7,01110F,4612.8,1499.16,0.0,0.0,0.0,k2,1,0.0,1.0,30.0,0.0,0.0,212.8,1.0,82.0,1
8,01153F,7398.3,2322.19,0.0,0.0,0.0,k2,1,0.0,0.0,31.0,0.0,0.0,318.3,0.0,0.0,1
9,01180F,7033.9,2066.94,0.0,0.0,0.0,k2,1,0.0,1.0,30.0,0.0,0.0,373.9,2.0,349.0,1


In [120]:
outer_result=result_data[pd.isnull(result_data['work_days'])]

In [121]:
outer_result

Unnamed: 0,car_id,mileage,oil_cost,maintain,follow,inspection,route,team_x,fix_days,stop_days,work_days,engage_mileage,public_mileage,shunt_mileage,fault_times,fault_minutes,team_y
436,9822,0.0,232.0,0.0,232.0,0.0,90,4,,,,,,,,,
