In [1]:
import pandas as pd 

In [2]:
df = pd.read_csv('predictive_maintenance.csv')

In [3]:
df.head(5)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [4]:
df = df.rename(columns={'Product ID':'ProductID', 
                        'Air temperature [K]':'AirTemp', 
                        'Process temperature [K]':'ProcessTemp',
                        'Rotational speed [rpm]':'RotSpeed',
                        'Torque [Nm]':'Torque',
                        'Tool wear [min]':'ToolWear',
                        'Failure Type':'FailType'	
                        })

In [5]:
df.head(5)

Unnamed: 0,UDI,ProductID,Type,AirTemp,ProcessTemp,RotSpeed,Torque,ToolWear,Target,FailType
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


### Представление набора данных в длинном формате

In [16]:
df_long = pd.melt(df, 
                  id_vars=['UDI','ProductID','Type','Target','FailType'],
                  value_vars=['AirTemp','ProcessTemp','RotSpeed','Torque','ToolWear'],
                  var_name='Measurement_Type',
                  value_name='Value'
                  )

print(df_long)

         UDI ProductID Type  Target    FailType Measurement_Type  Value
0          1    M14860    M       0  No Failure          AirTemp  298.1
1          2    L47181    L       0  No Failure          AirTemp  298.2
2          3    L47182    L       0  No Failure          AirTemp  298.1
3          4    L47183    L       0  No Failure          AirTemp  298.2
4          5    L47184    L       0  No Failure          AirTemp  298.2
...      ...       ...  ...     ...         ...              ...    ...
49995   9996    M24855    M       0  No Failure         ToolWear   14.0
49996   9997    H39410    H       0  No Failure         ToolWear   17.0
49997   9998    M24857    M       0  No Failure         ToolWear   22.0
49998   9999    H39412    H       0  No Failure         ToolWear   25.0
49999  10000    M24859    M       0  No Failure         ToolWear   30.0

[50000 rows x 7 columns]


### Представление набора данных в широком формате

In [18]:
wide_df = df.pivot_table(index="UDI", columns="Type", values=["AirTemp", "ProcessTemp"])

print(wide_df)


      AirTemp               ProcessTemp              
Type        H      L      M           H      L      M
UDI                                                  
1         NaN    NaN  298.1         NaN    NaN  308.6
2         NaN  298.2    NaN         NaN  308.7    NaN
3         NaN  298.1    NaN         NaN  308.5    NaN
4         NaN  298.2    NaN         NaN  308.6    NaN
5         NaN  298.2    NaN         NaN  308.7    NaN
...       ...    ...    ...         ...    ...    ...
9996      NaN    NaN  298.8         NaN    NaN  308.4
9997    298.9    NaN    NaN       308.4    NaN    NaN
9998      NaN    NaN  299.0         NaN    NaN  308.6
9999    299.0    NaN    NaN       308.7    NaN    NaN
10000     NaN    NaN  299.0         NaN    NaN  308.7

[10000 rows x 6 columns]


### Представление набора данных в агрегированном формате

In [19]:
aggregated_df = df.groupby("Type").mean(numeric_only=True)
print(aggregated_df)


              UDI     AirTemp  ProcessTemp     RotSpeed     Torque  \
Type                                                                 
H     4981.918245  299.866999   309.925723  1538.147557  39.838285   
L     4990.747667  300.015833   310.012300  1539.469167  39.996600   
M     5026.242910  300.029263   310.018785  1537.598932  40.017251   

        ToolWear    Target  
Type                        
H     107.419741  0.020937  
L     108.378833  0.039167  
M     107.272272  0.027694  


### Представление в виде сводных таблиц

In [20]:
pivot_df = df.pivot_table(index="Type", values=["AirTemp", "ProcessTemp", "RotSpeed"], aggfunc="mean")
print(pivot_df)


         AirTemp  ProcessTemp     RotSpeed
Type                                      
H     299.866999   309.925723  1538.147557
L     300.015833   310.012300  1539.469167
M     300.029263   310.018785  1537.598932
