In [16]:
import loadexcel as lc
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from data_convert import*
from tqdm import tqdm
import feedback_option
import loaddb as ld
import os

class UnMatchError(Exception):
    def __init__(self,ErrorInfo):
        super().__init__(self) #初始化父类
        self.errorinfo=ErrorInfo
    def __str__(self):
        return self.errorinfo

class DataManager():
    
    def __init__(self,date,path):
        self._date=date
        self._path=path
        self.update_p_index()
        #读取车辆信息
        self._car_value=ld.get_car_info()
    def getPath(self):
        return self._path
    
    def get_data(self):
        return self._data
    
    def update_path(self,new_path):
        self._path=new_path
        self.update_p_index()
        
    def update_p_index(self):
        self.p_index=len(os.path.split(self._path)[0].split("//"))+1
        
    def load_path(self,path):
        self._path=path
        self.update_p_index()
        self.load()
    
    def get_Sum(self):
        return self._pd_sum
    
    def get_detail(self):
        return self._pd_detail
    
    #加载数据，如果有异常项，判断加载失败并重置    
    def load(self):
        result_=lc.excel2Dataframe(self._path,target_by="车号",date_=self._date)
        sum_,detail_=[],[]
        self._pd_sum=[]
        self._pd_detail=[]
        for item in result_:
            sum_+=item[0]
            detail_+=item[1]
        for item in tqdm(sum_):
            self._pd_sum.append(lc.excel2Dataframe_Sum(item["filepath"],self.p_index,item["sheetname"],"车号"))
        for item in tqdm(detail_):
            self._pd_detail.append(lc.excel2Dataframe_Detail(item["filepath"],self.p_index,item["sheetname"],"车号"))
        self._pd_sum=pd.concat(self._pd_sum)
        self._pd_detail=pd.concat(self._pd_detail)
        if self.clean_data(self._pd_sum)==-1:
            self.reset_data()
            return
        if self.clean_data(self._pd_detail)==-1:
            self.reset_data()
            return
        self.join_data()
        
    #获取不匹配的数据    
    def get_dismatchs(self,df_source):
        #连接数据与车辆信息
        result_data=pd.merge(df_source,self._car_value,left_on='car_id',right_on='sub_car_id',how='left')
        #找出无法匹配的车辆
        outer_result=result_data[pd.isnull(result_data['sub_car_id'])]
        dismacths=outer_result[["route_x","team","car_id_x"]]
        return dismacths
    
    #过滤一些系统能够识别的数据
    def filter_data(self,df_source,dismacths):
        for i in range(len(dismacths)) :
            _car=dismacths.iloc[i]
            car_id_=_car["car_id_x"]
            route_=_car["route_x"]
            _r=self._car_value[self._car_value["route"]==route_]
            result_r=_r[_r["sub_car_id"].str.contains(car_id_)]
            if len(result_r)==1:
                dismacths.iloc[i]["car_id_x"]=result_r["sub_car_id"].iloc[0]
        df_source.iloc[dismacths.index,0]=dismacths['car_id_x']
        return df_source
    
    #重置数据
    def reset_data(self):
        self._pd_sum=[]
        self._pd_detail=[]
        
    #清理
    def clean_data(self,df_source):
        #去掉路
        remove_route=lambda x:x.split("路")[-1].strip()
        #去掉线
        remove_line=lambda x:x.split("线")[-1].strip()
        #去掉斜杆
        remove_diagonal=lambda x:x.split("/")[-1].strip()
        #将F改成正常拼写
        replaceF=lambda x:x.replace("Ｆ","F")
        #如果以F和D结尾，则补齐6位
        zfill_ = lambda x: x.zfill(6) if x.endswith("F")|x.endswith("D") else x
        #将所选列转为字符
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(str)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(remove_route)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(remove_line)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(remove_diagonal)
        #将所选列转为剔除空格
        df_source.iloc[:,0]=df_source.iloc[:,0].str.strip()
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(replaceF)
        df_source.iloc[:,0]=df_source.iloc[:,0].apply(zfill_)
        df_source.index=range(len(df_source))
        dismatchs=self.get_dismatchs(df_source)
        fd=self.filter_data(df_source,dismatchs)
        dismatchs=self.get_dismatchs(fd)
        if len(dismatchs)>0:
            try:
                raise UnMatchError("有数据无法匹配：")
            except UnMatchError as e:
                print(e)
                for i in range(len(dismatchs)):
                    item=dismatchs.iloc[i]
                    print(item['route_x']+"线路下的"+item['car_id_x']+"无法识别")
                print(f"合计%d个项无法识别，请修正后重试"%(len(dismatchs)))
            return -1
        if len(dismatchs)==0:
            
            return 0
    
    def print_dismatchs(self,dismatchs,dsc):
        print("有数据无法匹配：")
            for i in range(len(dismatchs)):
                item=dismatchs.iloc[i]
                print(f"%s线路下的%s缺汇%s项"%(item['route'],item['car_id'],dsc))
            print(f"合计%d个项无法识别，请修正后重试"%(len(dismatchs)))
        
    def join_data(self):
        result_data=pd.merge(self.get_Sum(),self.get_detail(),on=['car_id','route'],how='outer')
        dismatchs=result_data[pd.isnull(result_data['work_days'])]
        if len(dismatchs):
            print_dismatchs(dismatchs,"汇总")
        dismatchs=result_data[pd.isnull(result_data['mileage'])]
        if len(dismatchs):
            try:
                raise UnMatchError("有数据无法匹配：")
            except UnMatchError as e:
                print(e)
                for i in range(len(dismatchs)):
                    item=dismatchs.iloc[i]
                    print(item['route']+"线路下的"+item['car_id']+"缺统计表项")
                    print(f"合计%d个项无法识别，请修正后重试"%(len(dismatchs)))
            #self.reset_data()
            return -1
            
        
        
        

IndentationError: unexpected indent (<ipython-input-16-d5dd928c2f77>, line 141)

In [14]:
import datetime

In [8]:
dm=DataManager(datetime.date(2019,8,1),"example_data//feedback//8月原始")

In [9]:
dm.load_path("example_data//feedback//8月原始")

100%|██████████████████████████████████████████████████████████████████████████████████| 53/53 [00:06<00:00,  6.66it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 58/58 [00:06<00:00,  8.40it/s]


有数据无法匹配：
90线路下的9822缺汇总表项
合计1个项无法识别，请修正后重试
有数据无法匹配：
35线路下的08859F缺统计表项
合计1个项无法识别，请修正后重试


In [5]:
result_data=pd.merge(dm.get_Sum(),dm.get_detail(),on=['car_id','route'],how='outer')
outer_result=result_data[pd.isnull(result_data['mileage'])]
outer_result

Unnamed: 0,car_id,mileage,oil_cost,maintain,follow,inspection,route,team_x,fix_days,stop_days,work_days,engage_mileage,public_mileage,shunt_mileage,fault_times,fault_minutes,team_y
656,08859F,,,,,,35,,0.0,9.0,22.0,0.0,0.0,396.6,0.0,0.0,4


In [12]:
print(f"%d hello%s world"%(10,"else"))

10 helloelse world
