# imports

In [None]:
from fastai.vision.all import *

In [None]:
from fastai.callback.progress import *

In [None]:
from fastai.callback.tensorboard import *

In [None]:
import torchvision

In [None]:
# export
#from FLAI.detect_symbol.exp import databunch as databunch_detsym
from FLAI.detect_symbol.exp import resnet_ssd as resnet_ssd_detsym
from FLAI.detect_symbol.exp import anchors_loss_metrics as anchors_loss_metrics_detsym
from FLAI.detect_symbol.exp import optimizer as optimizer_detsym
#from FLAI.detect_symbol.exp import init_model as init_model_detsym
#from FLAI.detect_symbol.exp import tensorboard_callback
#from FLAI.detect_symbol.exp import scheduling_train

In [None]:
sys.path.append('../sick_tree_detection')
from exp import anchors_loss_metrics as anchors_loss_metrics_sicktree
from exp import resnet_ssd as resnet_ssd_sicktree

# functions

## 应对无目标的情况

In [None]:
def bb_pad_intlbl(samples, pad_idx=0):
    "Function that collect `samples` of labelled bboxes and adds padding with `pad_idx`."
    samples = [(s[0], *clip_remove_empty(*s[1:])) for s in samples]
    max_len = max([len(s[2]) for s in samples])
    def _f(img,bbox,lbl):
        bbox = torch.cat([bbox,bbox.new_zeros(max_len-bbox.shape[0], 4)])
        #lbl  = torch.cat([lbl, lbl .new_zeros(max_len-lbl .shape[0], int)+pad_idx])
        #在无目标也就是lbl为[]的情况下，lbl  = torch.cat([lbl, lbl .new_zeros(max_len-lbl .shape[0])+pad_idx])
        #上面的代码即使指定了dtype=torch.int得到的仍然是浮点数。会导致后面的报错不是索引
        if lbl.shape[0] != 0:
            lbl  = torch.cat([lbl, lbl .new_zeros(max_len-lbl .shape[0])+pad_idx])
        else:
            lbl = lbl.new_zeros(max_len, dtype = torch.int) + pad_idx

        
        return img,bbox,lbl
    return [_f(*s) for s in samples]

BBoxBlock = TransformBlock(type_tfms=TensorBBox.create, item_tfms=PointScaler, dls_kwargs = {'before_batch': bb_pad_intlbl})

## 获取BBox和label  
两个是分开进行的。并且BBox的顺序改成了先x后y，使用v1版的fastai的数据集的时候需要转换顺序。

In [None]:
#export
pat_coord = re.compile(r'\d+')
pat_clas = re.compile(r'\w+')
pat_imgName = re.compile(r'(\w+/\d+\.png)$')
pat_imgName = re.compile(r'(\w+/\d+\.jpg)$')
def get_label_from_df(fn, df, pat_imgName, box_col, cat_col):    
    fn = str(fn)
    pat_cat = re.compile(r'\w+')
    
    fn = pat_imgName.findall(str(fn))[0]
    cats = df.loc[fn,cat_col]
    cats = pat_clas.findall(cats)
    
    return cats

def get_boxes_from_df(fn, df, pat_imgName, box_col, cat_col):
    fn = str(fn)
    pat_num = re.compile(r'\d+')
    pat_cat = re.compile(r'\w+')
    fn = pat_imgName.findall(str(fn))[0]
    #print('dbg1', fn)
    
    boxes = df.loc[fn,box_col]
    boxes = pat_num.findall(boxes)
    #boxes = list(map(np.long, boxes))
    boxes = list(map(np.int32, boxes))
    boxes = np.array(boxes).reshape(-1,4)
    
    #fastai2里面bbox的顺序改成了xy的顺序。现在用的这个数据集还是v1里面的yx的顺序。这里调整一下
    boxes = boxes[...,[1, 0, 3, 2]]
    boxes = boxes.tolist()
    
    cats = df.loc[fn,cat_col]
    cats = pat_clas.findall(cats)
    #print('dbg2', fn, boxes, cats)
    assert len(boxes)==len(cats), 'length of bounding boxes and categories not equeal.'
    
    #print('dbg_boxes:', boxes)    
    return boxes

## 生成DataBlock
作用相当于之前的DataBunch  
item_tfms=Resize(128) 作用类似v1里面的after_open，可以对图片进行一些处理，但是这个处理无法作用在y上,如果需要改变图片尺寸连带y一起改变，应该在aug_transforms里面指定size参数

In [None]:
def get_db():
    get_y1 = partial(get_boxes_from_df, df=df, pat_imgName=pat_imgName, box_col='box', cat_col='cls')
    get_y2 = partial(get_label_from_df, df=df, pat_imgName=pat_imgName, box_col='box', cat_col='cls')
    
    syms = DataBlock(blocks=(ImageBlock, BBoxBlock, BBoxLblBlock),
                     get_items=get_image_files,
                     splitter=RandomSplitter(),
                     get_y=[get_y1, get_y2],
                     #item_tfms=Resize(128),
                     #batch_tfms=aug_transforms(size=(128,128)),
                     n_inp=1)
    return syms

## callback

In [None]:
#添加的额外的metric项目。
#在每一个batch结束的时候会接收到当前valid数据的pred和y
#这个没法在epoch完毕的时候
def ext_met(pred, yb0, yb1):
    #import pdb;pdb.set_trace()
    #rint('mymet',  val, kwargs)
    
    return 1.0


In [None]:
#具体针对多余的验证集进行计算，用在ExtValidCal_met
#使用者在这里进行计算
def ext_valid_cal(learn, ext_valids):
    return 3.14

In [None]:
#具体针对多余的多个验证集进行计算，用在ExtValidCal_cb
#ext_valids是验证集的list
#返回值必须是同样长度的list
#使用者在这里进行计算
def ext_multi_valid_cal(learn, ext_valids):
    assert isinstance(ext_valids, list)
    ret = []
    for i in range(len(ext_valids)):
        ret += [(i + 1) * 3.14]
    return ret

In [None]:
class ExtValidCal_cb(TrainEvalCallback):
    '''
    在after_validate阶段直接修改learn.recorder.log的内容。
    可以添加多个自定义的字段
    ''' 
    run_before = ProgressCallback
    #新加的列的位置。默认前面是epoch，train_loss，valid_loss
    #如果不一样这里要修改
    POS = 3
    def __init__(self, ext_valids = [], ext_titles = []
                 , ext_cal_func = ext_multi_valid_cal
                 , flag = 'callback__after_validate'):
        self.ext_cal_func = ext_cal_func
        self.flag = flag
        self.ext_valids = ext_valids
        self.ext_titles = ext_titles
        assert len(ext_valids) > 0
        assert len(ext_titles) == 0 or len(ext_titles) == len(ext_valids)
        if len(ext_titles) == 0:
            for i in range(len(ext_valids)):
                self.ext_titles += ['ext_valid_%d' % (i + 1)]
    
    def before_fit(self, *args):
        self.learn.recorder.metric_names  = \
            self.learn.recorder.metric_names[:self.POS] + \
            self.ext_titles + \
            self.learn.recorder.metric_names[self.POS:]
                    
            
    def after_validate(self, *args):
        r = self.ext_cal_func(self.learn, self.ext_valids)
        self.learn.recorder.log = self.learn.recorder.log[:self.POS] \
            + r + self.learn.recorder.log[self.POS:]
        
    

In [None]:
class ExtValidCal_met(TrainEvalCallback):
    '''
    添加自定义验证的位置：
    1、在after_validate，ExtValidCal在Recorder之后运行(默认顺序)，
        因为这时recorder.log还没有包含验证结果。为了能够修改题头，
        需要指定：
        self.run_before = ProgressCallback
        也就是在Recorder之后ProgressCallback之前。
        
    2、也可以放在after_epoch，jupyter上更新训练结果的动作在Recorder的
        after_epoch阶段添加自己的计算结果应该在这之前，所以需要指定
        run_before=Recorder。
        但是这种方式没法修改notebook显示结果的表格的题头（也就是
        learn.recorder.metric_names）。
        在创建learner的时候用metric=[xx]的方式指定内容的时候只能用一个
        空函数作为题头的名字(在getattr获取'func'的时候返回的函数，函数
        的名字作为题头出现)
        
    3、其余位置会因为在after_batch阶段对每个在Learner创建的时候指定
        的metric条目调用的返回值覆盖，并且是每个batch都会调用。
    '''   
    def __init__(self, ext_cal_func = ext_valid_cal, ext_valids = None
                 , ext_title = None, flag = 'metric__after_validate'):        
        assert flag in ['metric__after_validate', 'metric__after_epoch'] \
                        , '无效flag:' + flag
        self.ext_cal_func = ext_cal_func
        self.flag = flag
        self.ext_valids = ext_valids
        if ext_title != None:
            assert isinstance(ext_title, str), 'title必须是字符串'
        #如果没有指定就用ext_valid
        self.ext_title = ext_title if ext_title is not None else self.ext_valid.__name__
        self.attr_called = False
        
        if flag == 'metric__after_epoch':
            self.run_before = Recorder
            assert ext_title is None, '无法修改title'
        else:
            self.run_before = ProgressCallback
            
    def __getattr__(self, k):
        if k not in ['toward_end', 'run_before', 'run_after']:
            print('getattr', k)
        if 'func' == k: #fastai/learner.py(371)name()获取名字
            self.attr_called = True
            return self.ext_valid
        return None
            
    def __call__(self, p2, *args):
        #新加的列的位置。默认前面是epoch，train_loss，valid_loss
        #如果不一样这里要修改
        POS = 3
        if isinstance(p2, str):
            #print(p2)
            if p2 == 'after_epoch' and self.flag == 'metric__after_epoch':
                assert str(self.run_before) == str(Recorder), '必须指定run_before=Recorder'                
                self.learn.recorder.log[POS] = self.ext_valid()
                
            if p2 == 'after_validate' and self.flag == 'metric__after_validate':
                assert str(self.run_before) == str(ProgressCallback), '必须指定run_before=ProgressCallback'
                self.learn.recorder.log[POS] = self.ext_valid()
                    
            #metric__after_validate的方式才能修改题头
            if p2 == 'before_fit':
                if self.flag == 'metric__after_validate':
                    self.learn.recorder.metric_names[POS] = self.ext_title
                elif self.flag == 'callback__after_validate':
                    self.learn.recorder.metric_names  = self.learn.recorder.metric_names[:POS] \
                     + [self.ext_title] + self.learn.recorder.metric_names[POS:]
                    
            if p2 == 'before_epoch':
                assert self.attr_called, '需要在创建learner的时候用metric参数指定'
                
            return 'fake_ret'
        else:
            #这里是每个batch之后对metrics立面每个项目的调用
            #和ext_met的参数一致
            pass
        #没调用的话和一个单独的函数(ext_met)效果是一样的
        assert self.learn is not None, '需要调用learn.add_cb'
        return -1
    
    def ext_valid(self):
        #return 'ext_valids_ret'
        return self.ext_cal_func(self.learn, self.ext_valids)

# test

In [None]:
src_path = '/home/dev/jupyter/detect_symbol/data/ds_20200429/'

In [None]:
path = src_path + 'images'

In [None]:
path = Path(path)

In [None]:
path.ls()

In [None]:
df = pd.read_csv(src_path + 'gends.csv',index_col=0)
df = df.set_index('image')
df.head()

In [None]:
syms = get_db()

在docker中如果没有设置-shm-size，不设置num_workers=0会使用_MultiProcessingDataLoaderIter，导致错误： 
Unable to write to file </torch_18692_1954506624>
https://discuss.pytorch.org/t/unable-to-write-to-file-torch-18692-1954506624/9990

在fastai v1中对应的错误是内存溢出。

In [None]:
dls = syms.dataloaders(path, bs = 16, num_workers = 0)

In [None]:
dls.show_batch(max_n = 4)

In [None]:
#syms.summary(path)

In [None]:
#dts = syms.datasets(path)

In [None]:
#dts[1]

In [None]:
#df.loc['images/02364.jpg']

In [None]:
device = torch.device('cuda')

In [None]:
#device = torch.device('cpu')

## 模型和训练-符号检测

In [None]:
model = resnet_ssd_detsym.get_resnet34_1ssd()

In [None]:
gvs,_,_,avs,_,_ = anchors_loss_metrics_detsym.get_ga666()
gaf = anchors_loss_metrics_detsym.GridAnchor_Funcs(gvs,avs,device)

In [None]:
loss_func = partial(anchors_loss_metrics_detsym.yolo_L, gaf=gaf, conf_th=1, clas_weights=None, lambda_nconf=10)

In [None]:
#learn = cnn_learner(dls, model, pretrained=False)
learn = Learner(dls, model, loss_func = loss_func, device = device)

### 添加辅助寻来你的callbacks

#### CSVLogger

In [None]:
logger = CSVLogger('logger.csv')

In [None]:
learn.add_cb(logger)

#### SaveModelCallback
v1中的name参数名换成了fname

In [None]:
i = 0
autoSave = SaveModelCallback(monitor='valid_loss',mode='min',every='improvement',fname=f'run_{i}')

In [None]:
learn.add_cb(autoSave)

#### TensorBoardCallback  
v2中自带了这个类，不需要自己创建。

log_preds参数表示是否记录预测结果，目前因为网络输出的预测结果不是和数据集的格式相同,导致在log_preds的过程中出现异常。

异常情况：  
b_out是由网络输出得到的结构(TensorImage,Tensor(bs,ac,2),Tensor(bs,ac,1),Tensor(bs,ac,17),Tensor(bs,ac,2))(其中ac是anchors数量)，被当作和数据集一致的结构(TensorImage, TensorBBox, TensorMultiCategory)处理。  
/root/miniconda3/envs/fastai-v2/lib/python3.8/site-packages/fastai/data/core.py(107)show_results()=>
x1,y1,outs = self.show_batch(b_out, max_n=max_n, show=False)

In [None]:
tbcb = TensorBoardCallback(log_preds = True)

In [None]:
learn.add_cb(tbcb)

### 训练

In [None]:
learn.fit(1)

## 单独验证集的计算

### Callback的方式增加多余的字段  
这是最合适的实现方式，可以添加多列，列名可以自定义，在必要的时候才进行计算。

In [None]:
evc = ExtValidCal_cb(ext_valids = [1,2], ext_titles = ['一个', '两个'])
learn = Learner(dls, model, loss_func = loss_func, device = device)
learn.add_cb(evc)
learn.fit(1)

### metric添加多余的字段  
每个batch之后都会被调用到，会造成重复计算

In [None]:
learn = Learner(dls, model, loss_func = loss_func, device = device, metrics=[ext_met])
learn.fit(1)

### metric添加多余字段，另一种方式，可以修改字段题头  
虽然没有每个batch之后都有的重复计算，但是只能添加一个字段。

In [None]:
#evc = ExtValidCal_met(flag = 'metric__after_epoch')#这种方式没法修改题头
evc = ExtValidCal_met(ext_title = 'mytitle')
#learn = Learner(dls, model, loss_func = loss_func, device = device)
learn = Learner(dls, model, loss_func = loss_func, device = device, metrics=[evc])
learn.add_cb(evc)
learn.fit(1)

### 用病树检测的模型试一下

In [None]:
model = resnet_ssd_sicktree.get_resnet18_1ssd(num_classes = 17)

In [None]:
#model.load_state_dict(torch.load('../sick_tree_detection/models/pretrained_res18_1ssd_detsym17clas.pth'))

In [None]:
!ls ../sick_tree_detection/models

In [None]:
gaf = anchors_loss_metrics_sicktree.GridAnchor_Funcs(fig_hw = (776,776)
                         , grids = [(49,49)]
                         , device = device)
gvs, avs = gaf.gvs, gaf.avs

In [None]:
clas_cnts = [11191, 712, 1362, 224, 8710, 1212, 1139, 8686, 857, 2176, 6175, 1869, 14794, 1435, 13628, 9618, 1462]
weights = anchors_loss_metrics_detsym.get_clasWeights(clas_cnts,10)
weights = tensor(weights).float().to(device)

In [None]:
loss_func = partial(anchors_loss_metrics_sicktree.yolo_L, gaf=gaf, conf_th=1, clas_weights=None, lambda_nconf=10)

In [None]:
learn = Learner(dls, model, loss_func = loss_func, device = device)

In [None]:
learn.lr_find()

In [None]:
learn.fit(10)

## 实验

In [None]:
type(dls)

In [None]:
def dbg():
    import pdb;pdb.set_trace();
    dls = syms.dataloaders(path, bs = 16, num_workers = 0)
#dbg()    


In [None]:
def lossfunc_decodes(preds):
    import pdb;pdb.set_trace()

loss_func.decodes = lossfunc_decodes

In [None]:
def dbg():
    import pdb;pdb.set_trace();
    learn.fit(1)
#dbg()    

In [None]:
bb1 = torch.rand(2, 2)
bb1

In [None]:
bb2 = torch.rand(2, 2)
bb2

In [None]:
torch.cat([bb1, bb2], dim = 1)

In [None]:
bb = torch.rand(16, 64601, 2)

In [None]:
tbb = TensorBBox(0)
dir(tbb)