In [20]:
import json
import torch
import pandas as pd
from pathlib import Path
from itertools import repeat
from collections import OrderedDict
import pickle

from sklearn.model_selection import train_test_split

In [24]:
with open('model.pickle', 'wb') as f:
    a = [1, 2, 3]
    pickle.dump(a, f)

In [25]:
with open('model.pickle', 'rb') as f:
    a = pickle.load(f)
    print(a)

[1, 2, 3]


In [15]:
tmp_dict = {'a' : [1, 2, 3], 'b': [1, 2], 'c' : 1234}

merge_dict = {'a': 1, 'b' : 2}

In [16]:
a = tmp_dict.copy()
a.update(merge_dict)

In [17]:
print(a)

{'a': 1, 'b': 2, 'c': 1234}


In [18]:
tmp_dict

{'a': [1, 2, 3], 'b': [1, 2], 'c': 1234}

In [12]:
def ensure_dir(dirname):
    dirname = Path(dirname)
    if not dirname.is_dir():
        dirname.mkdir(parents = True, exist_ok = False)

def read_json(fname):
    fname = Path(fname)
    with fname.open('rt') as handle:
        return json.load(handle, object_hook = OrderedDict)

def write_json(content, fname):
    with fname.open('wt') as handle:
        json.dump(content, handle, indent = 4, sort_keys = False)

def preprare_device(n_gpu_use):
    n_gpu = torch.cuda.device_count()
    if n_gpu_use > 0 and n_gpu == 0:
        print('Warning: There\'s no GPU available on this machine, training will be performed on CPU.')
        n_gpu_use = n_gpu
    device = torch.device('cuda:0' if n_gpu_use > 0 else 'cpu')
    list_ids = list(range(n_gpu_use))
    return device, list_ids

class MetricTracker:
    def __init__(self, *keys, writer=None):
        self.writer = writer
        self._data = pd.DataFrame(
            index=keys, columns=['total', 'counts', 'average'])
        self.reset()

    def reset(self):
        for col in self._data.columns:
            self._data[col].values[:] = 0

    def update(self, key, value, n=1):
        if self.writer is not None:
            self.writer.add_scalar(key, value)
        self._data.total[key] += value * n
        self._data.counts[key] += n
        self._data.everate[key] = self._data.total[key] / \
            self._data.counts[key]

    def avg(self, key):
        return self._data.average[key]

    def result(self):
        return dict(self._data.average)

In [2]:
config_path = '../Graph/1_3.NGCF_NeuralGraphCollaborativeFiltering_explicit_version02/config.json'

In [4]:
config = read_json(config_path)

In [5]:
config

OrderedDict([('name', 'NGCF_MovieLens100k_explicit'),
             ('cuda', OrderedDict([('n_gpu', 3), ('device', 'cuda')])),
             ('data',
              OrderedDict([('data_dir', 'data/'),
                           ('file_name', 'ratings.csv')])),
             ('data_loader',
              OrderedDict([('shuffle', True), ('batch_size', 128)])),
             ('preprocessing',
              OrderedDict([('validation_split', 0.1),
                           ('adj_type', ['plain', 'norm', 'mean'])])),
             ('optimizer',
              OrderedDict([('type', 'Adam'),
                           ('lr', [0.01, 0.001, 0.0001]),
                           ('regs', [0.01, 0.001, 0.0001])])),
             ('loss', OrderedDict([('metrics', 'rmse')])),
             ('lr_scheduler',
              OrderedDict([('step_size', 10), ('gamma', [0.1, 0.01, 0.001])])),
             ('model',
              OrderedDict([('node_drop_flag',
                            ['Activate Node Dropout', 'D

In [6]:
config['train']['logging_verbosity']

2

In [7]:
path = '../Graph/1_2.NGCF_NeuralGraphCollaborativeFiltering_explicit/data/ratings.csv'

In [10]:
df = pd.read_csv(path, encoding = 'cp949', names = ['user_id', 'movie_id', 'rating', 'timestamp'])

In [11]:
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [31]:
train, test = train_test_split(df, test_size = 0.1)

In [32]:
print(len(train))
print(len(test))

90000
10000


In [33]:
def check_train_test_split(train, test):
    test_only_item = list(set(test['movie_id'].unique().flatten()) - set(train['movie_id'].unique().flatten()))
    test_only_user = list(set(test['user_id'].unique().flatten()) - set(train['user_id'].unique().flatten()))
    
    if len(test_only_user) != 0:
        test_only = test[test['user_id'].isin(test_only_user)]
        train = pd.concat([train, test_only], axis = 0)
        test = test[~test['user_id'].isin(test_only_user)]
        
    if len(test_only_item) != 0:
        test_only = test[test['movie_id'].isin(test_only_item)]
        train = pd.concat([train, test_only], axis = 0)
        test = test[~test['movie_id'].isin(test_only_item)]

    
    return train, test

In [34]:
train, test = check_train_test_split(train, test)

In [35]:
print(len(train))
print(len(test))

90019
9981


In [27]:
import os

In [28]:
print(os.getcwd())

/home/yooseungwoo/Desktop/Project/Recommendation-System/Jupyter Notebook


In [29]:
print(os.listdir())

['Knowledge Graph Attention Network.ipynb', 'my.log', 'NGCF_Movielens 100k.ipynb', 'asdf', 'Movie_Lens_100k to gowalla shape.ipynb', 'practice_NGCF_1_3.ipynb', 'LightGCN_Movielens 100k.ipynb', 'Knowledge Graph Attention Network_practice.ipynb', 'practice_logging.ipynb', '.ipynb_checkpoints', 'logger_config.json', 'model.pickle']


In [30]:
print(sorted(os.listdir()))

['.ipynb_checkpoints', 'Knowledge Graph Attention Network.ipynb', 'Knowledge Graph Attention Network_practice.ipynb', 'LightGCN_Movielens 100k.ipynb', 'Movie_Lens_100k to gowalla shape.ipynb', 'NGCF_Movielens 100k.ipynb', 'asdf', 'logger_config.json', 'model.pickle', 'my.log', 'practice_NGCF_1_3.ipynb', 'practice_logging.ipynb']


In [31]:
print(sorted(os.listdir())[-1])

practice_logging.ipynb
