In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
df_train = pd.read_csv('./train.csv')
df_test  = pd.read_csv('./test.csv')
df_sub   = pd.read_csv('./gender_submission.csv')

In [3]:
df_train.drop(['Name','Ticket','Cabin'],axis = 1,inplace=True)
df_test.drop(['Name','Ticket','Cabin'],axis = 1,inplace=True)

sex = pd.get_dummies(df_train['Sex'])
embark = pd.get_dummies(df_train['Embarked'])
df_train = pd.concat([df_train,sex,embark],axis = 1)
df_train.drop(['Sex','Embarked'],axis=1,inplace=True)

sex = pd.get_dummies(df_test['Sex'])
embark = pd.get_dummies(df_test['Embarked'])
df_test = pd.concat([df_test,sex,embark],axis=1)
df_test.drop(['Sex','Embarked'],axis=1,inplace=True)

df_train.fillna(df_train.mean(),inplace=True)
df_test.fillna(df_test.mean(),inplace=True)

train_columns = df_train.columns
test_columns  = df_test.columns

In [4]:
Scaler1 = StandardScaler()
df_train = pd.DataFrame(Scaler1.fit_transform(df_train))
df_test  = pd.DataFrame(Scaler1.fit_transform(df_test))
df_train.columns = train_columns
df_test.columns  = test_columns

In [5]:
X_train = df_train.iloc[:,2:].values
y_train = df_train.loc[:, 'Survived'].values
x_test = df_test.iloc[:,1:]

In [6]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(X_train, y_train, test_size = 0.2) 

In [7]:
trainTorch_x = torch.from_numpy(train_x).type(torch.FloatTensor)
trainTorch_y = torch.from_numpy(train_y).type(torch.LongTensor)


valTorch_x = torch.from_numpy(val_x).type(torch.FloatTensor)
valTorch_y = torch.from_numpy(val_y).type(torch.LongTensor) 

testTorch_x = torch.from_numpy(np.array(x_test)).type(torch.FloatTensor)

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(10,128)
        self.fc2 = nn.Linear(128,1024)
        self.fc3 = nn.Linear(1024,512)
        self.fc4 = nn.Linear(512,128)
        self.fc5 = nn.Linear(128,2)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self,x):
        x = F.relu(self.fc1(x)) 
        x = self.dropout(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.dropout(self.fc4(x))
        x = self.fc5(x)
        return x
model = Net()

In [9]:
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr = 0.01,momentum=0.5)

In [10]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.optim as optim

In [11]:
train = torch.utils.data.TensorDataset(trainTorch_x, trainTorch_y)
val = torch.utils.data.TensorDataset(valTorch_x, valTorch_y)
test = torch.utils.data.TensorDataset(testTorch_x)
train_loader = DataLoader(train, batch_size = 60, shuffle = True)
val_loader = DataLoader(val, batch_size = 60, shuffle = True)
test_loader = DataLoader(test, batch_size = 60, shuffle = True)

In [12]:
# 将单轮循环封装为一个函数

def train_func(epoch):
    running_loss = 0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,target = data
        optimizer.zero_grad()
        
        
#         forward and backward and update
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx % 180 == 179:
            print('[%d,%5d] loss: %.3f' % (epoch + 1,batch_idx + 1,running_loss / 180))
            running_loss =0.0
            

In [13]:
def test_func():
    correct = 0
    total = 0
    with torch.no_grad():
#         执行之后在下面代码就不会执行梯度
        for data in val_loader:
            infor,labels = data
#             拿数据
            outputs = model(infor)
            _, predicted = torch.max(outputs.data,dim = 1)
#           返回 最大值 和 每一行的最大值下标
#           指定沿着维度1（往下 行是第0个维度，向右 列是第一个维度）
            total += labels.size(0)
#             label是一个N 1元组 size 取 0 就是？

            correct += (predicted == labels).sum().item()
    print('Accuracy on test set: %d %%' % (100 * correct / total))

In [19]:
def predict_func():
    with torch.no_grad():
        outputs = model(testTorch_x)
        index, predicted = torch.max(outputs.data,dim = 1)
    ans2 = predicted
    submission_file2 = pd.read_csv('./gender_submission.csv')
    submission_file2.Survived = ans2
#     print(ans2.shape)
    submission_file2.to_csv('submission_nn3.csv', index=False)
    print("saved")

In [244]:
if __name__ == '__main__':
    for epoch in range(1):

        train_func(epoch)
        test_func()
    predict_func()

Accuracy on test set: 83 %
saved


In [245]:
enumerate(train_loader,0)

<enumerate at 0x184ca109228>