In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from sklearn.preprocessing import MinMaxScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
df = pd.read_csv("./coursera.csv")
df = df [["career_level", "available_time_per_week", "competence", "quiz", "Category"]]
df.head()


Unnamed: 0,career_level,available_time_per_week,competence,quiz,Category
0,1,0.25,2,0.5,2
1,2,0.33,2,0.4,2
2,3,0.1,3,0.7,3
3,1,0.45,2,0.6,2
4,2,0.65,2,0.5,2


In [8]:
class MulticlassClassification(nn.Module):
    def __init__(self, num_feature, num_class):
        super(MulticlassClassification, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 128)
        self.layer_3 = nn.Linear(128, 64)
        self.layer_out = nn.Linear(64, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

In [13]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

3# Split into train+val and test
#X = df.iloc[:, 0:-1]
#y = df.iloc[:, -1]

#X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=69)


career_level = 3
available_time_per_week = 0.9
competence = 3
quiz = 0.9


data = [[3,0.45,2,0.4],[career_level,available_time_per_week,competence,quiz]]
# Create the pandas DataFrame 
df = pd.DataFrame(data, columns = ['career_level', 'available_time_per_week','competence', 'quiz'])

X_test = pd.read_csv('test.csv')

X_test = X_test[-4:-1]

#X_test = df

X_test.loc[4] = [career_level,available_time_per_week,competence,quiz]
scaler = MinMaxScaler()
X_test = scaler.fit_transform(X_test)
X_test = np.array(X_test)


class ClassifierDataset(Dataset):
    
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)



test_dataset = ClassifierDataset(torch.from_numpy(X_test).float())


test_loader = DataLoader(dataset=test_dataset, batch_size=1)


model = MulticlassClassification(num_feature = 4, num_class=4)
model = model.load_state_dict(torch.load('./test2.pth', map_location=torch.device('cpu')))#, map_location=torch.device('cpu'))


ModuleAttributeError: 'MulticlassClassification' object has no attribute 'copy'

In [301]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        y_pred_softmax = torch.log_softmax(y_test_pred, dim = 1)
        _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
        
        y_pred_list.append(y_pred_tags.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [302]:
y_pred_list

[2, 3, 1, 3]

In [303]:
print(torch.__version__)

1.6.0+cpu


In [304]:
!pip install --upgrade torch

Requirement already up-to-date: torch in c:\users\acham\appdata\local\programs\python\python37\lib\site-packages (1.6.0+cpu)


You should consider upgrading via the 'c:\users\acham\appdata\local\programs\python\python37\python.exe -m pip install --upgrade pip' command.


In [2]:

import csv 
import json 
  
  
# Function to convert a CSV to JSON 
# Takes the file paths as arguments 
def make_json(csvFilePath, jsonFilePath): 
      
    # create a dictionary 
    data = {} 
      
    # Open a csv reader called DictReader 
    with open(csvFilePath, encoding='utf-8') as csvf: 
        csvReader = csv.DictReader(csvf) 
          
        # Convert each row into a dictionary  
        # and add it to data 
        for rows in csvReader: 
            
            # Assuming a column named 'No' to 
            # be the primary key 
            key = rows['\ufeffName'] 
            data[key] = rows 
  
    # Open a json writer, and use the json.dumps()  
    # function to dump data 
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf: 
        jsonf.write(json.dumps(data, indent=4)) 
          
# Driver Code 
  
# Decide the two file paths according to your  
# computer system 
csvFilePath = './coursera.csv'
jsonFilePath = './coursera.json'
  
# Call the make_json function 
make_json(csvFilePath, jsonFilePath)

In [306]:
import json

with open('./coursera.json') as f:
    rec = json.load(f)
    

final = []

for key, value in rec.items():
    #print(value)
    for k,v in value.items():
        if k == "Category" and v == str(y_pred_list[-1]):
            final.append(value)
            
rec1 = json.dumps(final)
print(rec1)


[{"\ufeffName": "Probabilistic Graphical Models 2: Inference", "Url": "https://coursera.org/learn/probabilistic-graphical-models-2-inference", "Rating": "4.6", "Difficulty": "Advanced Level", "Tags": "['Data Science', 'Machine Learning']", "career_level": "3", "available_time_per_week": "0.1", "competence": "3", "quiz": "0.7", "Category": "3"}, {"\ufeffName": "Introduction to Deep Learning", "Url": "https://coursera.org/learn/intro-to-deep-learning", "Rating": "4.6", "Difficulty": "Advanced Level", "Tags": "['Data Science', 'Machine Learning']", "career_level": "3", "available_time_per_week": "0.25", "competence": "3", "quiz": "0.9", "Category": "3"}, {"\ufeffName": "Production Machine Learning Systems", "Url": "https://coursera.org/learn/gcp-production-ml-systems", "Rating": "4.6", "Difficulty": "Advanced Level", "Tags": "['Data Science', 'Machine Learning']", "career_level": "1", "available_time_per_week": "0.25", "competence": "3", "quiz": "0.8", "Category": "3"}, {"\ufeffName": "Pr