In [1]:
import pickle
import os
import numpy as np
import pandas as pd
from collections import defaultdict


train_path='./Train'
testA_path='./Test_A'

def load_data(path,test=False):
    feats_cols=['volt','current','soc','max_single_volt','min_single_volt','max_temp','min_temp','timestamp']
    metadata_cols=['mileage']
    res=[]
    if not test:
        metadata_cols.append('label')
    
    for fname in os.listdir(path):
        feats=dict()
        with open(os.path.join(path,fname),'rb') as f:
            feat,metadata=pickle.load(f)
            for i,col in enumerate(feats_cols):
                feats[col]=feat[:,i]
            for i,col in enumerate(metadata_cols):
                feats[col]=metadata[col]
            res.append(feats)
    return res
    
train=load_data(train_path)
testA=load_data(testA_path,test=True)    
train_df=pd.DataFrame(train)
test_df=pd.DataFrame(testA)


In [6]:
#!pip install cupy

In [7]:
from numpy.matlib import repmat
arr =np.array([1,2,3]).reshape(3,1)
repmat(arr,1,2)

array([[1, 1],
       [2, 2],
       [3, 3]])

In [3]:
train_df['label'] = train_df['label'].astype(int)
print(train_df['label'])
a = train_df['label'].map(lambda x: (x/10)+1)
a = a.astype(int)
print(a)

0         0
1        10
2         0
3        10
4         0
         ..
28384     0
28385     0
28386     0
28387     0
28388     0
Name: label, Length: 28389, dtype: int64
0        1
1        2
2        1
3        2
4        1
        ..
28384    1
28385    1
28386    1
28387    1
28388    1
Name: label, Length: 28389, dtype: int64


In [4]:
train_df['label'] = a
print(train_df['label'])

0        1
1        2
2        1
3        2
4        1
        ..
28384    1
28385    1
28386    1
28387    1
28388    1
Name: label, Length: 28389, dtype: int64


# 有三种递归生成矩阵的方法，numpy、torch.tensor、cupy

In [13]:
#import cupy as cp
import torch
timestamp=torch.tensor(train_df['timestamp'])
#print(timestamp[0])
global i
global arr1
i=0
arr1 = timestamp[0]
while i < len(timestamp):
    if i >=28388:
        break
    else:
        arr1 =torch.vstack((arr1,timestamp[i+1]))
        i+=1
    
print(arr1)

tensor([[3.7610e+03, 3.7620e+03, 3.7630e+03,  ..., 4.2350e+03, 4.2360e+03,
         4.2370e+03],
        [3.7200e+03, 3.7210e+03, 3.7230e+03,  ..., 4.5300e+03, 4.5310e+03,
         4.5320e+03],
        [0.0000e+00, 1.1000e+01, 2.1000e+01,  ..., 2.5990e+03, 2.6090e+03,
         2.6200e+03],
        ...,
        [6.7530e+03, 6.7540e+03, 6.7550e+03,  ..., 7.4250e+03, 7.4260e+03,
         7.4270e+03],
        [1.5267e+04, 1.5277e+04, 1.5288e+04,  ..., 1.5967e+04, 1.5968e+04,
         1.5969e+04],
        [4.1514e+04, 4.1515e+04, 4.1516e+04,  ..., 4.2328e+04, 4.2330e+04,
         4.2331e+04]], dtype=torch.float64)


In [9]:
import cupy as cp
timestamp=train_df['timestamp']
#print(timestamp[0])
global i
global arr1
i=0
arr1 = timestamp[0]
while i < len(timestamp):
    if i >=28388:
        break
    else:
        arr1 =cp.vstack((arr1,timestamp[i+1]))
        i+=1
print(arr1)

[[3.7610e+03 3.7620e+03 3.7630e+03 ... 4.2350e+03 4.2360e+03 4.2370e+03]
 [3.7200e+03 3.7210e+03 3.7230e+03 ... 4.5300e+03 4.5310e+03 4.5320e+03]
 [0.0000e+00 1.1000e+01 2.1000e+01 ... 2.5990e+03 2.6090e+03 2.6200e+03]
 ...
 [6.7530e+03 6.7540e+03 6.7550e+03 ... 7.4250e+03 7.4260e+03 7.4270e+03]
 [1.5267e+04 1.5277e+04 1.5288e+04 ... 1.5967e+04 1.5968e+04 1.5969e+04]
 [4.1514e+04 4.1515e+04 4.1516e+04 ... 4.2328e+04 4.2330e+04 4.2331e+04]]


# 复制列向量，然后横向复制成矩阵，直接矩阵作差，就不需要迭代

In [11]:
#变成列向量
arr2 = arr1[...,0].reshape(28389,1)
from numpy.matlib import repmat

arr_win = repmat(arr2.get(),1,256)
print(arr_win)
print("数据类型",type(arr_win))           #打印数组数据类型  
print("数组元素数据类型：",arr_win.dtype) #打印数组元素数据类型  
print("数组元素总数：",arr_win.size)      #打印数组尺寸，即数组元素总数  
print("数组形状：",arr_win.shape)         #打印数组形状  
print("数组的维度数目",arr_win.ndim)      #打印数组的维度数目


[[ 3761.  3761.  3761. ...  3761.  3761.  3761.]
 [ 3720.  3720.  3720. ...  3720.  3720.  3720.]
 [    0.     0.     0. ...     0.     0.     0.]
 ...
 [ 6753.  6753.  6753. ...  6753.  6753.  6753.]
 [15267. 15267. 15267. ... 15267. 15267. 15267.]
 [41514. 41514. 41514. ... 41514. 41514. 41514.]]
数据类型 <class 'numpy.ndarray'>
数组元素数据类型： float64
数组元素总数： 7267584
数组形状： (28389, 256)
数组的维度数目 2


In [14]:
time_arr_winer = arr1.get()-arr_win
print(time_arr_winer)

[[0.000e+00 1.000e+00 2.000e+00 ... 4.740e+02 4.750e+02 4.760e+02]
 [0.000e+00 1.000e+00 3.000e+00 ... 8.100e+02 8.110e+02 8.120e+02]
 [0.000e+00 1.100e+01 2.100e+01 ... 2.599e+03 2.609e+03 2.620e+03]
 ...
 [0.000e+00 1.000e+00 2.000e+00 ... 6.720e+02 6.730e+02 6.740e+02]
 [0.000e+00 1.000e+01 2.100e+01 ... 7.000e+02 7.010e+02 7.020e+02]
 [0.000e+00 1.000e+00 2.000e+00 ... 8.140e+02 8.160e+02 8.170e+02]]


In [56]:
print(train_df)

                                                    volt  \
0      [164.3, 164.3, 164.3, 164.3, 164.3, 164.3, 164...   
1      [147.9, 147.9, 148.0, 147.9, 148.0, 148.0, 148...   
2      [148.0, 148.1, 148.2, 148.2, 148.3, 148.3, 148...   
3      [164.5, 164.5, 164.5, 164.5, 164.5, 164.5, 164...   
4      [165.4, 165.4, 165.4, 165.4, 165.4, 165.4, 165...   
...                                                  ...   
28384  [147.7, 147.7, 147.7, 147.7, 147.7, 147.7, 147...   
28385  [149.2, 149.3, 149.3, 149.5, 149.6, 149.6, 149...   
28386  [149.8, 149.8, 149.8, 149.8, 149.8, 149.8, 149...   
28387  [160.1, 160.1, 160.1, 160.1, 160.1, 160.1, 160...   
28388  [166.8, 166.8, 166.8, 166.8, 166.8, 166.8, 166...   

                                                 current  \
0      [-46.0, -46.0, -46.0, -46.0, -46.0, -46.0, -46...   
1      [-5.5, -5.5, -5.5, -5.5, -5.5, -5.5, -5.5, -5....   
2      [-1.8, -3.9, -5.0, -4.9, -5.0, -5.0, -5.0, -5....   
3      [-3.8, -3.8, -3.8, -3.8, -3.8, -

In [7]:
df = train_df.iloc[:,0:8]
print(df)

                                                    volt  \
0      [164.3, 164.3, 164.3, 164.3, 164.3, 164.3, 164...   
1      [147.9, 147.9, 148.0, 147.9, 148.0, 148.0, 148...   
2      [148.0, 148.1, 148.2, 148.2, 148.3, 148.3, 148...   
3      [164.5, 164.5, 164.5, 164.5, 164.5, 164.5, 164...   
4      [165.4, 165.4, 165.4, 165.4, 165.4, 165.4, 165...   
...                                                  ...   
28384  [147.7, 147.7, 147.7, 147.7, 147.7, 147.7, 147...   
28385  [149.2, 149.3, 149.3, 149.5, 149.6, 149.6, 149...   
28386  [149.8, 149.8, 149.8, 149.8, 149.8, 149.8, 149...   
28387  [160.1, 160.1, 160.1, 160.1, 160.1, 160.1, 160...   
28388  [166.8, 166.8, 166.8, 166.8, 166.8, 166.8, 166...   

                                                 current  \
0      [-46.0, -46.0, -46.0, -46.0, -46.0, -46.0, -46...   
1      [-5.5, -5.5, -5.5, -5.5, -5.5, -5.5, -5.5, -5....   
2      [-1.8, -3.9, -5.0, -4.9, -5.0, -5.0, -5.0, -5....   
3      [-3.8, -3.8, -3.8, -3.8, -3.8, -

In [49]:
array_train = np.array(df)
#print(array_train)
import numpy as np

import scipy.io as io

mat_path = './mat_train.mat'

mat = array_train

io.savemat(mat_path, {'name': mat})
print("win！")

win！


In [62]:
df.to_csv("./train.csv",index=False,float_format=3)

In [6]:
df2 = train_df.iloc[:,9]
print(df2)

0        1
1        2
2        1
3        2
4        1
        ..
28384    1
28385    1
28386    1
28387    1
28388    1
Name: label, Length: 28389, dtype: int64


In [4]:
df_test = test_df.iloc[:,0:8]

In [16]:
!pip install h5py

Collecting h5py
  Using cached h5py-3.7.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.5 MB)
Installing collected packages: h5py
Successfully installed h5py-3.7.0


In [8]:
import torch
arra1 =torch.tensor([1,2])
arra2 =torch.tensor([2,3])
array = arra1/arra2
array

tensor([0.5000, 0.6667])

In [6]:

import numpy as np

import scipy.io as io

mat_path = './mat_testA.mat'

mat = np.array(df_test)

io.savemat(mat_path, {'name': mat})

In [6]:
import numpy as np
import pandas as pd
array= np.array([1,2,3,4]).reshape(4,1)
df = pd.DataFrame("a")
pd.DataFrame(array,columns=["score"])

Unnamed: 0,score
0,1
1,2
2,3
3,4


In [14]:
df= pd.read_csv("test_A.csv",header=None)
array = np.array(df)
print("数组元素总数：",array.size)      #打印数组尺寸，即数组元素总数
print("数组形状：",array.shape)

数组元素总数： 6234
数组形状： (6234, 1)


In [76]:
np.savetxt("label2.csv", arr5, delimiter=",")

In [77]:
train_df["volt"]

0        [164.3, 164.3, 164.3, 164.3, 164.3, 164.3, 164...
1        [147.9, 147.9, 148.0, 147.9, 148.0, 148.0, 148...
2        [148.0, 148.1, 148.2, 148.2, 148.3, 148.3, 148...
3        [164.5, 164.5, 164.5, 164.5, 164.5, 164.5, 164...
4        [165.4, 165.4, 165.4, 165.4, 165.4, 165.4, 165...
                               ...                        
28384    [147.7, 147.7, 147.7, 147.7, 147.7, 147.7, 147...
28385    [149.2, 149.3, 149.3, 149.5, 149.6, 149.6, 149...
28386    [149.8, 149.8, 149.8, 149.8, 149.8, 149.8, 149...
28387    [160.1, 160.1, 160.1, 160.1, 160.1, 160.1, 160...
28388    [166.8, 166.8, 166.8, 166.8, 166.8, 166.8, 166...
Name: volt, Length: 28389, dtype: object