Source code for recommenders.models.deeprec.models.xDeepFM

# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.

import numpy as np
import tensorflow as tf

from recommenders.models.deeprec.models.base_model import BaseModel


__all__ = ["XDeepFMModel"]


[docs]class XDeepFMModel(BaseModel): """xDeepFM model :Citation: J. Lian, X. Zhou, F. Zhang, Z. Chen, X. Xie, G. Sun, "xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems", in Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, KDD 2018, London, 2018. """ def _build_graph(self): """The main function to create xdeepfm's logic. Returns: object: The prediction score made by the model. """ hparams = self.hparams self.keep_prob_train = 1 - np.array(hparams.dropout) self.keep_prob_test = np.ones_like(hparams.dropout) with tf.compat.v1.variable_scope("XDeepFM") as scope: # noqa: F841 with tf.compat.v1.variable_scope( "embedding", initializer=self.initializer ) as escope: # noqa: F841 self.embedding = tf.compat.v1.get_variable( name="embedding_layer", shape=[hparams.FEATURE_COUNT, hparams.dim], dtype=tf.float32, ) self.embed_params.append(self.embedding) embed_out, embed_layer_size = self._build_embedding() logit = 0 if hparams.use_Linear_part: print("Add linear part.") logit = logit + self._build_linear() if hparams.use_FM_part: print("Add FM part.") logit = logit + self._build_fm() if hparams.use_CIN_part: print("Add CIN part.") if hparams.fast_CIN_d <= 0: logit = logit + self._build_CIN( embed_out, res=True, direct=False, bias=False, is_masked=True ) else: logit = logit + self._build_fast_CIN( embed_out, res=True, direct=False, bias=False ) if hparams.use_DNN_part: print("Add DNN part.") logit = logit + self._build_dnn(embed_out, embed_layer_size) return logit def _build_embedding(self): """The field embedding layer. MLP requires fixed-length vectors as input. This function makes sum pooling of feature embeddings for each field. Returns: embedding: The result of field embedding layer, with size of #_fields * #_dim. embedding_size: #_fields * #_dim """ hparams = self.hparams fm_sparse_index = tf.SparseTensor( self.iterator.dnn_feat_indices, self.iterator.dnn_feat_values, self.iterator.dnn_feat_shape, ) fm_sparse_weight = tf.SparseTensor( self.iterator.dnn_feat_indices, self.iterator.dnn_feat_weights, self.iterator.dnn_feat_shape, ) w_fm_nn_input_orgin = tf.nn.embedding_lookup_sparse( params=self.embedding, sp_ids=fm_sparse_index, sp_weights=fm_sparse_weight, combiner="sum", ) embedding = tf.reshape( w_fm_nn_input_orgin, [-1, hparams.dim * hparams.FIELD_COUNT] ) embedding_size = hparams.FIELD_COUNT * hparams.dim return embedding, embedding_size def _build_linear(self): """Construct the linear part for the model. This is a linear regression. Returns: object: Prediction score made by linear regression. """ with tf.compat.v1.variable_scope( "linear_part", initializer=self.initializer ) as scope: # noqa: F841 w = tf.compat.v1.get_variable( name="w", shape=[self.hparams.FEATURE_COUNT, 1], dtype=tf.float32 ) b = tf.compat.v1.get_variable( name="b", shape=[1], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) x = tf.SparseTensor( self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape, ) linear_output = tf.add(tf.sparse.sparse_dense_matmul(x, w), b) self.layer_params.append(w) self.layer_params.append(b) tf.compat.v1.summary.histogram("linear_part/w", w) tf.compat.v1.summary.histogram("linear_part/b", b) return linear_output def _build_fm(self): """Construct the factorization machine part for the model. This is a traditional 2-order FM module. Returns: object: Prediction score made by factorization machine. """ with tf.compat.v1.variable_scope("fm_part") as scope: # noqa: F841 x = tf.SparseTensor( self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape, ) xx = tf.SparseTensor( self.iterator.fm_feat_indices, tf.pow(self.iterator.fm_feat_values, 2), self.iterator.fm_feat_shape, ) fm_output = 0.5 * tf.reduce_sum( input_tensor=tf.pow(tf.sparse.sparse_dense_matmul(x, self.embedding), 2) - tf.sparse.sparse_dense_matmul(xx, tf.pow(self.embedding, 2)), axis=1, keepdims=True, ) return fm_output def _build_CIN( self, nn_input, res=False, direct=False, bias=False, is_masked=False ): """Construct the compressed interaction network. This component provides explicit and vector-wise higher-order feature interactions. Args: nn_input (object): The output of field-embedding layer. This is the input for CIN. res (bool): Whether use residual structure to fuse the results from each layer of CIN. direct (bool): If true, then all hidden units are connected to both next layer and output layer; otherwise, half of hidden units are connected to next layer and the other half will be connected to output layer. bias (bool): Whether to add bias term when calculating the feature maps. is_masked (bool): Controls whether to remove self-interaction in the first layer of CIN. Returns: object: Prediction score made by CIN. """ hparams = self.hparams hidden_nn_layers = [] field_nums = [] final_len = 0 field_num = hparams.FIELD_COUNT nn_input = tf.reshape(nn_input, shape=[-1, int(field_num), hparams.dim]) field_nums.append(int(field_num)) hidden_nn_layers.append(nn_input) final_result = [] split_tensor0 = tf.split(hidden_nn_layers[0], hparams.dim * [1], 2) with tf.compat.v1.variable_scope( "exfm_part", initializer=self.initializer ) as scope: # noqa: F841 for idx, layer_size in enumerate(hparams.cross_layer_sizes): split_tensor = tf.split(hidden_nn_layers[-1], hparams.dim * [1], 2) dot_result_m = tf.matmul( split_tensor0, split_tensor, transpose_b=True ) # shape : (Dim, Batch, FieldNum, HiddenNum), a.k.a (D,B,F,H) dot_result_o = tf.reshape( dot_result_m, shape=[hparams.dim, -1, field_nums[0] * field_nums[-1]], ) # shape: (D,B,FH) dot_result = tf.transpose(a=dot_result_o, perm=[1, 0, 2]) # (B,D,FH) filters = tf.compat.v1.get_variable( name="f_" + str(idx), shape=[1, field_nums[-1] * field_nums[0], layer_size], dtype=tf.float32, ) if is_masked and idx == 0: ones = tf.ones([field_nums[0], field_nums[0]], dtype=tf.float32) mask_matrix = tf.linalg.band_part( ones, 0, -1 ) - tf.linalg.tensor_diag(tf.ones(field_nums[0])) mask_matrix = tf.reshape( mask_matrix, shape=[1, field_nums[0] * field_nums[0]] ) dot_result = tf.multiply(dot_result, mask_matrix) * 2 self.dot_result = dot_result curr_out = tf.nn.conv1d( input=dot_result, filters=filters, stride=1, padding="VALID" ) # shape : (B,D,H`) if bias: b = tf.compat.v1.get_variable( name="f_b" + str(idx), shape=[layer_size], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) curr_out = tf.nn.bias_add(curr_out, b) self.cross_params.append(b) if hparams.enable_BN is True: curr_out = tf.compat.v1.layers.batch_normalization( curr_out, momentum=0.95, epsilon=0.0001, training=self.is_train_stage, ) curr_out = self._activate(curr_out, hparams.cross_activation) curr_out = tf.transpose(a=curr_out, perm=[0, 2, 1]) # shape : (B,H,D) if direct: direct_connect = curr_out next_hidden = curr_out final_len += layer_size field_nums.append(int(layer_size)) else: if idx != len(hparams.cross_layer_sizes) - 1: next_hidden, direct_connect = tf.split( curr_out, 2 * [int(layer_size / 2)], 1 ) final_len += int(layer_size / 2) else: direct_connect = curr_out next_hidden = 0 final_len += layer_size field_nums.append(int(layer_size / 2)) final_result.append(direct_connect) hidden_nn_layers.append(next_hidden) self.cross_params.append(filters) result = tf.concat(final_result, axis=1) result = tf.reduce_sum(input_tensor=result, axis=-1) # shape : (B,H) if res: base_score = tf.reduce_sum( input_tensor=result, axis=1, keepdims=True ) # (B,1) else: base_score = 0 w_nn_output = tf.compat.v1.get_variable( name="w_nn_output", shape=[final_len, 1], dtype=tf.float32 ) b_nn_output = tf.compat.v1.get_variable( name="b_nn_output", shape=[1], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) self.layer_params.append(w_nn_output) self.layer_params.append(b_nn_output) exFM_out = base_score + tf.compat.v1.nn.xw_plus_b( result, w_nn_output, b_nn_output ) return exFM_out def _build_fast_CIN(self, nn_input, res=False, direct=False, bias=False): """Construct the compressed interaction network with reduced parameters. This component provides explicit and vector-wise higher-order feature interactions. Parameters from the filters are reduced via a matrix decomposition method. Fast CIN is more space and time efficient than CIN. Args: nn_input (object): The output of field-embedding layer. This is the input for CIN. res (bool): Whether use residual structure to fuse the results from each layer of CIN. direct (bool): If true, then all hidden units are connected to both next layer and output layer; otherwise, half of hidden units are connected to next layer and the other half will be connected to output layer. bias (bool): Whether to add bias term when calculating the feature maps. Returns: object: Prediction score made by fast CIN. """ hparams = self.hparams hidden_nn_layers = [] field_nums = [] final_len = 0 field_num = hparams.FIELD_COUNT fast_CIN_d = hparams.fast_CIN_d nn_input = tf.reshape( nn_input, shape=[-1, int(field_num), hparams.dim] ) # (B,F,D) nn_input = tf.transpose(a=nn_input, perm=[0, 2, 1]) # (B,D,F) field_nums.append(int(field_num)) hidden_nn_layers.append(nn_input) final_result = [] with tf.compat.v1.variable_scope( "exfm_part", initializer=self.initializer ) as scope: # noqa: F841 for idx, layer_size in enumerate(hparams.cross_layer_sizes): if idx == 0: fast_w = tf.compat.v1.get_variable( "fast_CIN_w_" + str(idx), shape=[1, field_nums[0], fast_CIN_d * layer_size], dtype=tf.float32, ) self.cross_params.append(fast_w) dot_result_1 = tf.nn.conv1d( input=nn_input, filters=fast_w, stride=1, padding="VALID" ) # shape: (B,D,d*H) dot_result_2 = tf.nn.conv1d( input=tf.pow(nn_input, 2), filters=tf.pow(fast_w, 2), stride=1, padding="VALID", ) # shape: ((B,D,d*H) dot_result = tf.reshape( 0.5 * (dot_result_1 - dot_result_2), shape=[-1, hparams.dim, layer_size, fast_CIN_d], ) curr_out = tf.reduce_sum( input_tensor=dot_result, axis=3, keepdims=False ) # shape: ((B,D,H) else: fast_w = tf.compat.v1.get_variable( "fast_CIN_w_" + str(idx), shape=[1, field_nums[0], fast_CIN_d * layer_size], dtype=tf.float32, ) fast_v = tf.compat.v1.get_variable( "fast_CIN_v_" + str(idx), shape=[1, field_nums[-1], fast_CIN_d * layer_size], dtype=tf.float32, ) self.cross_params.append(fast_w) self.cross_params.append(fast_v) dot_result_1 = tf.nn.conv1d( input=nn_input, filters=fast_w, stride=1, padding="VALID" ) # shape: ((B,D,d*H) dot_result_2 = tf.nn.conv1d( input=hidden_nn_layers[-1], filters=fast_v, stride=1, padding="VALID", ) # shape: ((B,D,d*H) dot_result = tf.reshape( tf.multiply(dot_result_1, dot_result_2), shape=[-1, hparams.dim, layer_size, fast_CIN_d], ) curr_out = tf.reduce_sum( input_tensor=dot_result, axis=3, keepdims=False ) # shape: ((B,D,H) if bias: b = tf.compat.v1.get_variable( name="f_b" + str(idx), shape=[1, 1, layer_size], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) curr_out = tf.nn.bias_add(curr_out, b) self.cross_params.append(b) if hparams.enable_BN is True: curr_out = tf.compat.v1.layers.batch_normalization( curr_out, momentum=0.95, epsilon=0.0001, training=self.is_train_stage, ) curr_out = self._activate(curr_out, hparams.cross_activation) if direct: direct_connect = curr_out next_hidden = curr_out final_len += layer_size field_nums.append(int(layer_size)) else: if idx != len(hparams.cross_layer_sizes) - 1: next_hidden, direct_connect = tf.split( curr_out, 2 * [int(layer_size / 2)], 2 ) final_len += int(layer_size / 2) field_nums.append(int(layer_size / 2)) else: direct_connect = curr_out next_hidden = 0 final_len += layer_size field_nums.append(int(layer_size)) final_result.append(direct_connect) hidden_nn_layers.append(next_hidden) result = tf.concat(final_result, axis=2) result = tf.reduce_sum(input_tensor=result, axis=1, keepdims=False) # (B,H) if res: base_score = tf.reduce_sum( input_tensor=result, axis=1, keepdims=True ) # (B,1) else: base_score = 0 w_nn_output = tf.compat.v1.get_variable( name="w_nn_output", shape=[final_len, 1], dtype=tf.float32 ) b_nn_output = tf.compat.v1.get_variable( name="b_nn_output", shape=[1], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) self.layer_params.append(w_nn_output) self.layer_params.append(b_nn_output) exFM_out = ( tf.compat.v1.nn.xw_plus_b(result, w_nn_output, b_nn_output) + base_score ) return exFM_out def _build_dnn(self, embed_out, embed_layer_size): """Construct the MLP part for the model. This components provides implicit higher-order feature interactions. Args: embed_out (object): The output of field-embedding layer. This is the input for DNN. embed_layer_size (object): Shape of the embed_out Returns: object: Prediction score made by fast CIN. """ hparams = self.hparams w_fm_nn_input = embed_out last_layer_size = embed_layer_size layer_idx = 0 hidden_nn_layers = [] hidden_nn_layers.append(w_fm_nn_input) with tf.compat.v1.variable_scope( "nn_part", initializer=self.initializer ) as scope: for idx, layer_size in enumerate(hparams.layer_sizes): curr_w_nn_layer = tf.compat.v1.get_variable( name="w_nn_layer" + str(layer_idx), shape=[last_layer_size, layer_size], dtype=tf.float32, ) curr_b_nn_layer = tf.compat.v1.get_variable( name="b_nn_layer" + str(layer_idx), shape=[layer_size], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) tf.compat.v1.summary.histogram( "nn_part/" + "w_nn_layer" + str(layer_idx), curr_w_nn_layer ) tf.compat.v1.summary.histogram( "nn_part/" + "b_nn_layer" + str(layer_idx), curr_b_nn_layer ) curr_hidden_nn_layer = tf.compat.v1.nn.xw_plus_b( hidden_nn_layers[layer_idx], curr_w_nn_layer, curr_b_nn_layer ) scope = "nn_part" + str(idx) # noqa: F841 activation = hparams.activation[idx] if hparams.enable_BN is True: curr_hidden_nn_layer = tf.compat.v1.layers.batch_normalization( curr_hidden_nn_layer, momentum=0.95, epsilon=0.0001, training=self.is_train_stage, ) curr_hidden_nn_layer = self._active_layer( logit=curr_hidden_nn_layer, activation=activation, layer_idx=idx ) hidden_nn_layers.append(curr_hidden_nn_layer) layer_idx += 1 last_layer_size = layer_size self.layer_params.append(curr_w_nn_layer) self.layer_params.append(curr_b_nn_layer) w_nn_output = tf.compat.v1.get_variable( name="w_nn_output", shape=[last_layer_size, 1], dtype=tf.float32 ) b_nn_output = tf.compat.v1.get_variable( name="b_nn_output", shape=[1], dtype=tf.float32, initializer=tf.compat.v1.zeros_initializer(), ) tf.compat.v1.summary.histogram( "nn_part/" + "w_nn_output" + str(layer_idx), w_nn_output ) tf.compat.v1.summary.histogram( "nn_part/" + "b_nn_output" + str(layer_idx), b_nn_output ) self.layer_params.append(w_nn_output) self.layer_params.append(b_nn_output) nn_output = tf.compat.v1.nn.xw_plus_b( hidden_nn_layers[-1], w_nn_output, b_nn_output ) return nn_output