In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
# import
data = pd.read_csv(
    'input.csv', dtype={
        'item1': str,
        'item2': str,
        'dif': float
    })
print('data size: {}'.format(len(data)))

x = data[['item1', 'item2']]
y = data['dif']

In [None]:
# count feature values
list_items = x['item1'].append(x['item2']).drop_duplicates().tolist()
print("number of unique items: {}".format(len(list_items)))

# columns
item1_col = tf.feature_column.indicator_column(
    tf.feature_column.categorical_column_with_vocabulary_list(
        'item1', vocabulary_list=list_items))
item2_col = tf.feature_column.indicator_column(
    tf.feature_column.categorical_column_with_vocabulary_list(
        'item2', vocabulary_list=list_items))
dif_col = tf.feature_column.numeric_column('dif')

In [None]:
# build input function
input_fn_train=tf.estimator.inputs.pandas_input_fn(
        x=x,
        y=y,
        batch_size=100,
        num_epochs=1,
        shuffle=True
    )

In [None]:
# build model function
def model_fn(features, labels, mode, params):
    # split features into ref and tar features
    col1 = params['feature_columns'][0]
    col2 = params['feature_columns'][1]
    feats = {
        'col1':{'item1': features['item1']},
        'col2':{'item2': features['item2']}
    }
    input_layer = {
        'col1': tf.feature_column.input_layer(feats['col1'], col1),
        'col2': tf.feature_column.input_layer(feats['col2'], col2),
    }
    subtracted = tf.subtract(input_layer['col1'], input_layer['col2'])
    out = tf.layers.dense(
        subtracted, units=1,
        use_bias=False,
        kernel_initializer=None)

    # define head
    my_head = tf.contrib.estimator.regression_head(
        label_dimension=1,
        loss_fn=None  # custom loss, default: mean_squared_error
    )

    return my_head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        optimizer=tf.train.FtrlOptimizer(params['step_size']),
        logits=out,
    )

In [None]:
# custom estimator
regressor = tf.estimator.Estimator(
    model_fn=model_fn,
    params={'feature_columns': [item1_col, item2_col],
            'step_size': 0.2}
)

# train estimator
regressor.train(
    input_fn=input_fn_train,
    steps=None
)

In [None]:
# construct prediction table
x_pred = x.drop_duplicates()
x_pred2 = x_pred[['item2', 'item1']]
x_pred2.columns = ['item1', 'item2']
x_pred = x_pred.append(x_pred2, ignore_index=True).drop_duplicates()
x_pred.loc[len(x_pred)] = ['', '']

In [None]:
# predict for each item
predictions = list(
    regressor.predict(
        input_fn=tf.estimator.inputs.pandas_input_fn(
            x=x_pred, batch_size=len(x_pred), num_epochs=1, shuffle=False)))

# output
pred = []
for i in predictions:
    pred.append(np.float64(i['predictions'][0]))
x_pred['shift'] = pred
x_pred.to_csv('predictions.csv', sep=',', index=False)