In [101]:
import torch
from torch.utils.data import Dataset, DataLoader
import pickle
from torch.nn.utils.rnn import pad_sequence

In [102]:
class NERset(Dataset):
    def __init__(self):
        with open("textData.pkl", "rb") as f:
            self.data = pickle.load(f)

    def __getitem__(self, index):
        name = self.data[index]['name']
        char_tokens_tensors = self.data[index]['char_input']
        word_tokens_tensors = self.data[index]['word_input']
        label_ids = self.data[index]['char_tag']
        segments_tensor = torch.tensor([0]*char_tokens_tensors.shape[0])
        return (name,char_tokens_tensors,word_tokens_tensors,segments_tensor,label_ids)

    def __len__(self):
        return len(self.data)
    def create_mini_batch(self,samples):
        # sample[0]:name
        # sample[1]:char
        # sample[2]:word
        # sample[3]:segments
        # sample[4]:label
        
        name = [s[0] for s in samples]
        char_tokens_tensors = [s[1] for s in samples]
        word_tokens_tensors = [s[2] for s in samples]
        segments_tensors = [s[3] for s in samples]
        label_tensors= [s[4] for s in samples]
        
        before_pad_length=[]
        for i in range(len(char_tokens_tensors)):
            before_pad_length.append(char_tokens_tensors[i].shape[0])
        
        char_tokens_tensors = pad_sequence(char_tokens_tensors,batch_first=True)
        word_tokens_tensors = pad_sequence(word_tokens_tensors,batch_first=True)
        segments_tensors = pad_sequence(segments_tensors,batch_first=True)
        max_pad_length = char_tokens_tensors[0].shape[0]
        masks_tensors = torch.zeros(char_tokens_tensors.shape,dtype=torch.long)
        masks_tensors = masks_tensors.masked_fill(char_tokens_tensors != 0, 1)
        other_label = torch.tensor([1]*1+[0]*20,dtype=torch.float)
        other_label = torch.unsqueeze(other_label,0)
        
        for i in range(len(before_pad_length)):
            for j in range(max_pad_length-before_pad_length[i]):
                label_tensors[i] = torch.cat((label_tensors[i],other_label),0)
        r_label_tensors=torch.stack([i for i in (label_tensors)])
            
        return name,char_tokens_tensors,word_tokens_tensors,masks_tensors,r_label_tensors

In [106]:
if __name__ == "__main__":
    dataset = NERset()
    dataloader = DataLoader(dataset, batch_size=3, shuffle=False,collate_fn=dataset.create_mini_batch)
    print(len(dataloader))
    for i,data in enumerate(dataloader):
        print(data)
        

1486
(['300351441.pdf.xlsx', '300351441.pdf.xlsx', '300351441.pdf.xlsx'], tensor([[   2,  164, 1137,  240,  683,    3,  357,    6,   14,   87,   39,   95,
          659,  607,  544,  164, 1137,    8,  359,   16,   53,   46,   10,    3,
            0,    0,    0,    0],
        [   2,  164, 1137,  240,  683,    3,  275,  147,    1,    1,   25,    1,
           57,    1,    1,   48,    3,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0],
        [   2,  164, 1137,  240,  683,    3,  576,  169,   91,  271,  233,   76,
          484, 1164,  411,  779,  230,   28,   34,  352,  425, 1328,  214,  587,
          777,  201,  464,    3]]), tensor([[    2, 17175, 17175,     1,     1,     3,   288,     5,  5029,  5029,
          5029,   654,   654,  5552,  5552, 17175, 17175,     7, 20950, 20950,
          2610,  2610,     8,     3,     0,     0,     0,     0],
        [    2, 17175, 17175,     1,     1,     3,   593,   593,     1,     1,
            19,     1,     1,    

(['300366036.pdf.xlsx', '300366036.pdf.xlsx', '300366036.pdf.xlsx'], tensor([[   2,    1,    1,  164, 1137,  170,  329,   74,    3,    1,    1,    1,
          164, 1137,  462,  266,  108,   22,    1,   48,  107,  595,  232,  132,
          194,  730,  255,    1,   25,    1,   57,    1,   48,    1,  761,    1,
            1,    1,  107,    1,    1,  155,  646,  594,  238,  646,  594,  152,
          791,  217,  297,  791,  217,  328,  173,  380, 1008,  215,    1,    1,
          320,    1,    1,  346,   62,  791,  196,   21,   64,   13,  140,   29,
            1,    1,  778,  576,  169,   91,  271,  233,   76,  484, 1164,  411,
          779,  230,   28,   34,  352,  425, 1328,  214,  587,  777,  201,  464,
          977,  315,  451,  113,   27,    1,  393,  279,    6, 1228,   16,  586,
          176,  462,  266,  108,  209,    6,  393,  279,   15, 1377,  907,   46,
           11,  141,   12,    7,  730,  255,    1,   25,    1,   57,    1,    1,
           48,    1,  352,    1,    1,  

(['300366257.pdf.xlsx', '300366257.pdf.xlsx', '300366257.pdf.xlsx'], tensor([[   2,   22,  213,  607,  544,  164, 1137,    8,  359,   46,   11,  149,
         1003,    3,   33,   22,   35,  573,  143,    1,  730,  255,    1,   25,
          253,   58,  230,   28,   50,   67,   85,   38,   92,   47,   65, 1244,
         1139,  134,  248,  187,  681,  602,  548,  155, 1341,  384,    1,  234,
           73,  157,  128,   83,  400,    1,   87,   54,  232,  165,  155,  421,
          304, 1286,  583,  294,  114,   88,   47,  144,   85,  207,  160,  244,
          602,  548,  155, 1341,  500,  285,    1,  228,  305,  228,  216,  228,
          259,  204,  703,  242,    1,   59,    6,   25,  146,  397,  722,  867,
          364,    3],
        [   2,   22,  213,  607,  544,  164, 1137,    8,  359,   46,   11,  149,
         1003,    3,   33,   27,   35,  719,  419,    1,  119,   13,  211,   65,
           82,   49,   56,  125,   13,  119,   40,  129,   21,   84,   82,   65,
           82,   4

(['300365618.pdf.xlsx', '300365618.pdf.xlsx', '300365618.pdf.xlsx'], tensor([[   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,    1,    1,
            1,   77,  973,  233,   76,   17,   36,   11,   44,   14,   10,    3,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0],
        [   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,    1,    1,
            1,  174,  192,    7,   80, 1588,   12,  271,  483,  218,  613,  201,
          218,  378,   32,   31,  721,  718,  352,  416,  359,    6,  904,  519,
         1588,   12,  867,  364,    8,  613,   11,  335,  143,  904,  519,  378,
            6,   91,  271,  847,    3],
        [   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,  155,   15,
          287,   79,   18,   19,   24,   19,   44,   14,   10,    3,    0,    0,
            0,    0,    0,    0,    0,   

(['300359897.pdf.xlsx', '300359897.pdf.xlsx', '300359897.pdf.xlsx'], tensor([[   2,    1,    1,   78,    6,  412,    3,  570,  573,    8,  843,  498,
           16,    9,  141,    6,  600,  102,   16,    9,  164, 1137,  314,   12,
          442,  833,   14,   46,   11,   10,    3,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0],
        [   2,    1,    1,   78,    6,  412,    3,    1,    1,    1,  867,  364,
          314,  121,  147,    6,  306, 1141,  306,   10,    3,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
      

(['300366186.pdf.xlsx', '300366186.pdf.xlsx', '300366186.pdf.xlsx'], tensor([[   2,   70,  213,  867,  364,    6,  240,  219,    8,  613,   11,  816,
          366,  149, 1003,    3,  364,   15,   46,   11,  132,  114,    8,   12,
            7,  191, 1508,  233,   76,  209,    6,  435,  713,  642,    6,  471,
         1041,    7,  191, 1508,  233,   76,   14,    6,  146,    6,  308,  450,
          378,    6,  471, 1041,    8,   94,   19,   18,    3],
        [   2,   70,  213,  867,  364,    6,  240,  219,    8,  613,   11,  816,
          366,  149, 1003,    3,  614,  470,   15,  240,  159,   46,   11,   24,
           98,    6,  308,  277,   15,  353,   68,   11,   14,   37,   23,   18,
           19,   11,   14,   44,  390,   17,   46,   10,    3,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0],
        [   2,   70,  213,  867,  364,    6,  240,  219,    8,  613,   11,  816,
          366,  149, 1003,    3,   44,   23,    8,  399,  750,   74,    7,

(['300367181.pdf.xlsx', '300367181.pdf.xlsx', '300367181.pdf.xlsx'], tensor([[   2,    1,    1,  867,  364,    6,  240,  219,    8,  613,   11,  816,
          366,  149, 1003,    3,   24,   87,    7,  786,  573,  209,    6,  532,
         1137,  890,   16,   66,   12,  532, 1380, 1588,   12,  867,  364,    6,
         1097,  294,   15,   42,   30,   18,  134,  366,   37,   23,    9,   42,
            6,   14,  176,   24,   46,   10,    3,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,  

(['300351343.pdf.xlsx', '300351343.pdf.xlsx', '300351343.pdf.xlsx'], tensor([[   2,    1,    1,  607,  544,    8,  393,  279,   46,   11,  141,    8,
          623,  306,   24,  587,  477,    8,  218,   46,   11,  149, 1003,    3,
          141,   14,   16,   18,    6,  951,  448,   15,  355,   18,   19,   11,
          141, 1588,   12,  134,  233,  177,   22,   70,  570,    6,   27,  177,
           22, 1003,    6,  543,  153,    8,  399,  750,   74,  291,  153,  543,
          768,    3,    0,    0,    0],
        [   2,    1,    1,  607,  544,    8,  393,  279,   46,   11,  141,    8,
          623,  306,   24,  587,  477,    8,  218,   46,   11,  149, 1003,    3,
          345,  343,  149,  166,  141,   14,   16,   18,    6, 1500,  102,   15,
           91,   30,   18,   19,   11,  141,   17,   36,   11,   44,   14,   10,
            3,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0],
        [   2,    1,    1,  607,  544,   

(['300351404.pdf.xlsx', '300351404.pdf.xlsx', '300351404.pdf.xlsx'], tensor([[   2,    1,    1,   78,    6,  412,    3,    1,    1,    1,  164, 1137,
            6,  442,  833,   77,  240,  683,    8,  514,   16,    9,  607,  544,
          393,  279,  587,  477,    6,   24,   19,  141,    6,  600,  102,   16,
            9,  164, 1137,  314,  595,  232,  164, 1137,    8,  218,   46,   11,
            3],
        [   2,    1,    1,   78,    6,  412,    3,  570,  573,    8,  843,  498,
           16,    9,  141,    6,  600,  102,   16,    9,  164, 1137,  314,   12,
          442,  833,   14,   46,   11,   10,    3,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0],
        [   2,    1,    1,   78,    6,  412,    3,    1,    1,    1,  867,  364,
          314,  121,  147,    6,  306, 1141,  306,   10,    3,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        

(['300360248.pdf.xlsx', '300360248.pdf.xlsx', '300360248.pdf.xlsx'], tensor([[   2,  164, 1137,  240,  683,    3,  867,  364,  671,  191,  440,  587,
          777,  455, 1830,   77,  113,  158,  931,  319,  555, 1548,    3,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,  

(['300359894.pdf.xlsx', '300359894.pdf.xlsx', '300359894.pdf.xlsx'], tensor([[   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,   15,  252,
          535,   16,  570,  573,   15,  790,    9,   46,  141,   17,   36,   11,
           44,   14,   10,    3,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,  

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



(['300367212.pdf.xlsx', '300367212.pdf.xlsx', '300367212.pdf.xlsx'], tensor([[   2,    1,    1,   78,    6,  412,  623,  306,   24,  149, 1003,    3,
            1,    1,    1,  867,  364,  314,  121,  147,    6,  306, 1141,  306,
            3,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0],
        [   2,    1,    1,   78,    6,  412,  623,  306,   24,  149, 1003,    3,
            1,    1,    1,  633, 1137,  141,    6,  368,  153,  179,  233,  551,
          153,  682,  477,    6,  292,  553,    6, 1130,  827,  172,   17,  351,
          833,   24,  164, 1137,   15,   91,   30,    9,  141,    6,   51,  148,
            7,  205,    3],
        [   2,    1,    1,   78,    6,  412,  623,  306,   24,  149, 1003,    3,
           42,  734,   19,  682,  477,   15,   42,   30,   18,  164, 1137,   16,
            9,  141,   15,  633, 1137,  141,   14,   46,   11,   

(['300365970.pdf.xlsx', '300365970.pdf.xlsx', '300365970.pdf.xlsx'], tensor([[   2,    1,    1,  607,  544,    8,  393,  279,   46,   11,  141,    8,
          623,  306,   24,  587,  477,    8,  218,   46,   11,  149, 1003,    3,
            1,    1,    1,  174,  192,    7,   80, 1588,   12,  271,  483,  218,
          613,  201,  218,  378,   32,   31,  721,  718,  352,  416,  359,    6,
          904,  519, 1588,   12,  867,  364,    8,  613,   11,  335,  143,  904,
          519,  378,    6,   91,  271,  847,  155,   15,  287,   79,   18,   19,
           24,   19,   44,   14,   10,    3,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,  

(['300365622.pdf.xlsx', '300365622.pdf.xlsx', '300365622.pdf.xlsx'], tensor([[   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3, 1588,   12,  867,  364,    6, 1097,  294,   15,   91,
           30,   18,   19,    9,  127,   66,   54,   51,  710,  267,  421,   14,
          710,  593,  239,   15,   87, 1207,   19,   19,    9,   16,   53,   46,
           10,    3,    0,    0,    0,    0,    0,    0,    0],
        [   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3,   24,   87,    7,  786,  573,  209,    6,  532, 1137,
          890,   16,   66,   12,  532, 1380, 1588,   12,  867,  364,    6, 1097,
          294,   15,   42,   30,   18,  134,  366,   37,   23,    9,   42,    6,
           14,  176,   24,   37,  178,   18,   19,    9,    3],
        [   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3,  127,   74,   53,   46,    6,   17,    7,  628,

(['300366500.pdf.xlsx', '300366500.pdf.xlsx', '300366500.pdf.xlsx'], tensor([[   2,    1,    1,  867,  364,  570, 1003,   15,  514,   46,  132,  194,
          378,    3,    1,    1,    1,  867,  364,  570, 1003,   15,  514,   46,
          132,  194,  595,  232,  517,   19,  114,  106,  178,  509, 2663,   22,
           26,   60,  135,   26,   26,   26,   22,  175,  312,  349,  689,  297,
         1788,  272,  492,  380, 1008,  215,    1,    1,  320,    1,  346, 1788,
          272,  492,  221,   40,   21,  140,   29,   96,   82,   21,   92,  309,
         1719,    1,    1,  778,  576,  169,   91,  271,  233,   76,  484, 1164,
          411,  779,  230,   28,   34,  352,  425, 1328,  214,  587,  777,  201,
          464,  472,  293,  113,  472,  293,  997,  671,  191,    1, 1675, 1054,
            7, 2247,  596,  204,   73,  157,   26,   52,  135,   70,   71,   60,
           63,  135,   63,   26,   27,   26,  280,   83,  858,   26,   52,  135,
           70,   71,   60,   63,  135,  

(['300351396.pdf.xlsx', '300351396.pdf.xlsx', '300351396.pdf.xlsx'], tensor([[   2,    1,    1,   78,    6,  412,    3,    1,    1,    1, 1161,  738,
           12,  164, 1137,  462,  266,  314,    8,   54,   11,   10,    3,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0],
        [   2,    1,    1,   78,    6,  412,    3,    1,    1,    1,   77,  149,
          166,    8,  613,   11,  867,  364, 1097,  294,   12,    7,  201,  464,
           20,  275,  147,    1,    1,   25,  253,   80,  189,  455, 1830,  484,
         1164,  552,  267,  378,  166,  293,   15,  287, 1424,   46,   11,   44,
           14,    3],
        [   2,    1,    1,   78,    6,  412,    3,   15,  570,  573,   14,   16,
           53,   46,   10,    3,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   

(['300366034.pdf.xlsx', '300366034.pdf.xlsx', '300366034.pdf.xlsx'], tensor([[   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3,    1,    1,    1,  240,  219,   46,   11,  614,  470,
          118,  231,    8, 1508,  191,   46,   11,  867,  364,  509,    8,   94,
           19,   18,    7,  867,  364,  628,   14,    8,    7,  214,  290,  440,
          293,  378,    6,  143,  426,  595,  232,  224,  548,    7,  867,  364,
         1097,  294,   48,    7,  867,  364,  509,    6,  143,  426,    7,  867,
          364,  352, 1102,  378,   14,  894,  178,    7,  357,    8, 1020,  326,
           11,  614,  470,   15,  240,  219,   16,   53,   46,   10,   22,    1,
          191,  201,  464,    6,  440,  284,  303,  647,  141,  595,  232,  997,
          158,  433,  191,  642,  186,  118,  303,  647,  141,    1,  191,  201,
          464,    1,    1,    1,    6,   76,  224,    7,  642,  143,  595,  232,
          191,  201,  464,    8,   87,  

(['300366235.pdf.xlsx', '300366235.pdf.xlsx', '300366235.pdf.xlsx'], tensor([[   2,    1,    1,   78,    6,  412,  623,  306,   24,  149, 1003,    3,
            9,  164, 1137,   12,  442,  833,   14,   46,   11,   10,    3,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0],
        [   2,    1,    1,   78,    6,  412,  623,  306,   24,  149, 1003,    3,
            1,    1,    1,  867,  364,  314,  121,  147,    6,  306, 1141,  306,
            3,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0],
        [   2,    1,    1,   78,    6,  412,  623,  306,   24,  149, 1003,    3,
            1,    1,    1,  633, 1137,  141,    6,  368,  153,  179,  233,  551,
          153,  682,  477,    6,  292,  553,    6, 11

(['300360246.pdf.xlsx', '300360246.pdf.xlsx', '300360246.pdf.xlsx'], tensor([[   2,    1,    1,  607,  544,    8,  393,  279,   46,   11,  141,    8,
          623,  306,   24,  587,  477,    8,  218,   46,   11,  149, 1003,    3,
          271,  847,  155,   15,  287,   79,   18,   19,   24,   19,   44,   14,
           10,    3,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0],
        [   2,    1,    1,  607,  544,    8,  393,  279,   46,   11,  141,    8,
          623,  306,   24,  587,  477,    8,  218,   46,   11,  149, 1003,    3,
            1,    1,    1,  345,  343,  149,  166,  233,  177,    1,  570,    6,
            1,    6,  543,  153,    8,  399,  750,   66,  247,  356,  345,  343,
          149,  166,    6,  951,  448,   15,  355,   18,   19,   11,  141,    1,
          186,  183,   58,  345,    3],
        [   2,    1,    1,  607,  544,   

(['300365616.pdf.xlsx', '300365616.pdf.xlsx', '300365616.pdf.xlsx'], tensor([[   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,  164, 1137,
          393,  279,  587,  477,   12,    7,  357,    6,  570,  573,   15,  790,
            9,   46,  141,   17,   36,   11,   44,   14,   10,    3,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0],
        [   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,    1,    1,
            1,  201,  464,    6,   58,  607,  544,  393,  279,  141,    6,  587,
          477,    8,  218,   46,   11,  240,  514,   59,    6,   58,    1,    1,
          607,  544,    8,  393,  279,   46,   11,   44,   14,   20,   17,   74,
           24,   19,  141,   59,    3],
        [   2,    1,    1,  607,  544,  393,  279,  587,  477,    3,    8, 1508,
          191,   16,   24,   19,  141,   17,   36,   11,   44,   14,   10,    3,
            0,    0,    0,    0,    0,   

(['300359909.pdf.xlsx', '300359909.pdf.xlsx', '300359909.pdf.xlsx'], tensor([[   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3,  147,    1,    1,   25,    1,    1,   57,    1,   48,
         1111,  339,  368,  153,    1,    8,   87,   19,   18,    7,  576,  169,
           91,  271,  233,   76,   14,   95,  153,    6,  218,  613,   15,  351,
           46,   11,  233,   76,   14,  867,  364,   15,   46,    3],
        [   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3,   11,  132,  114,    8,   12,    7,  191, 1508,  233,
           76,  209,    6,  435,  713,  642,    6,  471, 1041,    7,  191, 1508,
          233,   76,   14,    6,  146,    6,  308,  450,  378,    6,  471, 1041,
            8,   94,   19,   18,  614,  470,   15,  240,  159,    3],
        [   2, 1612,  867,  364,    6,  240,  219,    8,  613,   11,  816,  366,
          149, 1003,    3,   46,   11,   24,   98,    6,  308,

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

