Source code for recommenders.models.deeprec.models.xDeepFM

# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.

import numpy as np
import tensorflow as tf

from recommenders.models.deeprec.models.base_model import BaseModel


__all__ = ["XDeepFMModel"]


[docs]class XDeepFMModel(BaseModel):
    """xDeepFM model

    :Citation:

        J. Lian, X. Zhou, F. Zhang, Z. Chen, X. Xie, G. Sun, "xDeepFM: Combining Explicit
        and Implicit Feature Interactions for Recommender Systems", in Proceedings of the
        24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining,
        KDD 2018, London, 2018.
    """

    def _build_graph(self):
        """The main function to create xdeepfm's logic.

        Returns:
            object: The prediction score made by the model.
        """
        hparams = self.hparams
        self.keep_prob_train = 1 - np.array(hparams.dropout)
        self.keep_prob_test = np.ones_like(hparams.dropout)

        with tf.compat.v1.variable_scope("XDeepFM") as scope:  # noqa: F841
            with tf.compat.v1.variable_scope(
                "embedding", initializer=self.initializer
            ) as escope:  # noqa: F841
                self.embedding = tf.compat.v1.get_variable(
                    name="embedding_layer",
                    shape=[hparams.FEATURE_COUNT, hparams.dim],
                    dtype=tf.float32,
                )
                self.embed_params.append(self.embedding)
                embed_out, embed_layer_size = self._build_embedding()

            logit = 0

            if hparams.use_Linear_part:
                print("Add linear part.")
                logit = logit + self._build_linear()

            if hparams.use_FM_part:
                print("Add FM part.")
                logit = logit + self._build_fm()

            if hparams.use_CIN_part:
                print("Add CIN part.")
                if hparams.fast_CIN_d <= 0:
                    logit = logit + self._build_CIN(
                        embed_out, res=True, direct=False, bias=False, is_masked=True
                    )
                else:
                    logit = logit + self._build_fast_CIN(
                        embed_out, res=True, direct=False, bias=False
                    )

            if hparams.use_DNN_part:
                print("Add DNN part.")
                logit = logit + self._build_dnn(embed_out, embed_layer_size)

            return logit

    def _build_embedding(self):
        """The field embedding layer. MLP requires fixed-length vectors as input.
        This function makes sum pooling of feature embeddings for each field.

        Returns:
            embedding:  The result of field embedding layer, with size of #_fields * #_dim.
            embedding_size: #_fields * #_dim
        """
        hparams = self.hparams
        fm_sparse_index = tf.SparseTensor(
            self.iterator.dnn_feat_indices,
            self.iterator.dnn_feat_values,
            self.iterator.dnn_feat_shape,
        )
        fm_sparse_weight = tf.SparseTensor(
            self.iterator.dnn_feat_indices,
            self.iterator.dnn_feat_weights,
            self.iterator.dnn_feat_shape,
        )
        w_fm_nn_input_orgin = tf.nn.embedding_lookup_sparse(
            params=self.embedding,
            sp_ids=fm_sparse_index,
            sp_weights=fm_sparse_weight,
            combiner="sum",
        )
        embedding = tf.reshape(
            w_fm_nn_input_orgin, [-1, hparams.dim * hparams.FIELD_COUNT]
        )
        embedding_size = hparams.FIELD_COUNT * hparams.dim
        return embedding, embedding_size

    def _build_linear(self):
        """Construct the linear part for the model.
        This is a linear regression.

        Returns:
            object: Prediction score made by linear regression.
        """
        with tf.compat.v1.variable_scope(
            "linear_part", initializer=self.initializer
        ) as scope:  # noqa: F841
            w = tf.compat.v1.get_variable(
                name="w", shape=[self.hparams.FEATURE_COUNT, 1], dtype=tf.float32
            )
            b = tf.compat.v1.get_variable(
                name="b",
                shape=[1],
                dtype=tf.float32,
                initializer=tf.compat.v1.zeros_initializer(),
            )
            x = tf.SparseTensor(
                self.iterator.fm_feat_indices,
                self.iterator.fm_feat_values,
                self.iterator.fm_feat_shape,
            )
            linear_output = tf.add(tf.sparse.sparse_dense_matmul(x, w), b)
            self.layer_params.append(w)
            self.layer_params.append(b)
            tf.compat.v1.summary.histogram("linear_part/w", w)
            tf.compat.v1.summary.histogram("linear_part/b", b)
            return linear_output

    def _build_fm(self):
        """Construct the factorization machine part for the model.
        This is a traditional 2-order FM module.

        Returns:
            object: Prediction score made by factorization machine.
        """
        with tf.compat.v1.variable_scope("fm_part") as scope:  # noqa: F841
            x = tf.SparseTensor(
                self.iterator.fm_feat_indices,
                self.iterator.fm_feat_values,
                self.iterator.fm_feat_shape,
            )
            xx = tf.SparseTensor(
                self.iterator.fm_feat_indices,
                tf.pow(self.iterator.fm_feat_values, 2),
                self.iterator.fm_feat_shape,
            )
            fm_output = 0.5 * tf.reduce_sum(
                input_tensor=tf.pow(tf.sparse.sparse_dense_matmul(x, self.embedding), 2)
                - tf.sparse.sparse_dense_matmul(xx, tf.pow(self.embedding, 2)),
                axis=1,
                keepdims=True,
            )
            return fm_output

    def _build_CIN(
        self, nn_input, res=False, direct=False, bias=False, is_masked=False
    ):
        """Construct the compressed interaction network.
        This component provides explicit and vector-wise higher-order feature interactions.

        Args:
            nn_input (object): The output of field-embedding layer. This is the input for CIN.
            res (bool): Whether use residual structure to fuse the results from each layer of CIN.
            direct (bool): If true, then all hidden units are connected to both next layer and output layer;
                    otherwise, half of hidden units are connected to next layer and the other half will be connected to output layer.
            bias (bool): Whether to add bias term when calculating the feature maps.
            is_masked (bool): Controls whether to remove self-interaction in the first layer of CIN.

        Returns:
            object: Prediction score made by CIN.
        """
        hparams = self.hparams
        hidden_nn_layers = []
        field_nums = []
        final_len = 0
        field_num = hparams.FIELD_COUNT
        nn_input = tf.reshape(nn_input, shape=[-1, int(field_num), hparams.dim])
        field_nums.append(int(field_num))
        hidden_nn_layers.append(nn_input)
        final_result = []
        split_tensor0 = tf.split(hidden_nn_layers[0], hparams.dim * [1], 2)
        with tf.compat.v1.variable_scope(
            "exfm_part", initializer=self.initializer
        ) as scope:  # noqa: F841
            for idx, layer_size in enumerate(hparams.cross_layer_sizes):
                split_tensor = tf.split(hidden_nn_layers[-1], hparams.dim * [1], 2)
                dot_result_m = tf.matmul(
                    split_tensor0, split_tensor, transpose_b=True
                )  # shape :  (Dim, Batch, FieldNum, HiddenNum), a.k.a (D,B,F,H)
                dot_result_o = tf.reshape(
                    dot_result_m,
                    shape=[hparams.dim, -1, field_nums[0] * field_nums[-1]],
                )  # shape: (D,B,FH)
                dot_result = tf.transpose(a=dot_result_o, perm=[1, 0, 2])  # (B,D,FH)

                filters = tf.compat.v1.get_variable(
                    name="f_" + str(idx),
                    shape=[1, field_nums[-1] * field_nums[0], layer_size],
                    dtype=tf.float32,
                )

                if is_masked and idx == 0:
                    ones = tf.ones([field_nums[0], field_nums[0]], dtype=tf.float32)
                    mask_matrix = tf.linalg.band_part(
                        ones, 0, -1
                    ) - tf.linalg.tensor_diag(tf.ones(field_nums[0]))
                    mask_matrix = tf.reshape(
                        mask_matrix, shape=[1, field_nums[0] * field_nums[0]]
                    )

                    dot_result = tf.multiply(dot_result, mask_matrix) * 2
                    self.dot_result = dot_result

                curr_out = tf.nn.conv1d(
                    input=dot_result, filters=filters, stride=1, padding="VALID"
                )  # shape : (B,D,H`)

                if bias:
                    b = tf.compat.v1.get_variable(
                        name="f_b" + str(idx),
                        shape=[layer_size],
                        dtype=tf.float32,
                        initializer=tf.compat.v1.zeros_initializer(),
                    )
                    curr_out = tf.nn.bias_add(curr_out, b)
                    self.cross_params.append(b)

                if hparams.enable_BN is True:
                    curr_out = tf.compat.v1.layers.batch_normalization(
                        curr_out,
                        momentum=0.95,
                        epsilon=0.0001,
                        training=self.is_train_stage,
                    )

                curr_out = self._activate(curr_out, hparams.cross_activation)

                curr_out = tf.transpose(a=curr_out, perm=[0, 2, 1])  # shape : (B,H,D)

                if direct:
                    direct_connect = curr_out
                    next_hidden = curr_out
                    final_len += layer_size
                    field_nums.append(int(layer_size))

                else:
                    if idx != len(hparams.cross_layer_sizes) - 1:
                        next_hidden, direct_connect = tf.split(
                            curr_out, 2 * [int(layer_size / 2)], 1
                        )
                        final_len += int(layer_size / 2)
                    else:
                        direct_connect = curr_out
                        next_hidden = 0
                        final_len += layer_size
                    field_nums.append(int(layer_size / 2))

                final_result.append(direct_connect)
                hidden_nn_layers.append(next_hidden)

                self.cross_params.append(filters)

            result = tf.concat(final_result, axis=1)
            result = tf.reduce_sum(input_tensor=result, axis=-1)  # shape : (B,H)

            if res:
                base_score = tf.reduce_sum(
                    input_tensor=result, axis=1, keepdims=True
                )  # (B,1)
            else:
                base_score = 0

            w_nn_output = tf.compat.v1.get_variable(
                name="w_nn_output", shape=[final_len, 1], dtype=tf.float32
            )
            b_nn_output = tf.compat.v1.get_variable(
                name="b_nn_output",
                shape=[1],
                dtype=tf.float32,
                initializer=tf.compat.v1.zeros_initializer(),
            )
            self.layer_params.append(w_nn_output)
            self.layer_params.append(b_nn_output)
            exFM_out = base_score + tf.compat.v1.nn.xw_plus_b(
                result, w_nn_output, b_nn_output
            )
            return exFM_out

    def _build_fast_CIN(self, nn_input, res=False, direct=False, bias=False):
        """Construct the compressed interaction network with reduced parameters.
        This component provides explicit and vector-wise higher-order feature interactions.
        Parameters from the filters are reduced via a matrix decomposition method.
        Fast CIN is more space and time efficient than CIN.

        Args:
            nn_input (object): The output of field-embedding layer. This is the input for CIN.
            res (bool): Whether use residual structure to fuse the results from each layer of CIN.
            direct (bool): If true, then all hidden units are connected to both next layer and output layer;
                    otherwise, half of hidden units are connected to next layer and the other half will be connected to output layer.
            bias (bool): Whether to add bias term when calculating the feature maps.

        Returns:
            object: Prediction score made by fast CIN.
        """
        hparams = self.hparams
        hidden_nn_layers = []
        field_nums = []
        final_len = 0
        field_num = hparams.FIELD_COUNT
        fast_CIN_d = hparams.fast_CIN_d
        nn_input = tf.reshape(
            nn_input, shape=[-1, int(field_num), hparams.dim]
        )  # (B,F,D)
        nn_input = tf.transpose(a=nn_input, perm=[0, 2, 1])  # (B,D,F)
        field_nums.append(int(field_num))
        hidden_nn_layers.append(nn_input)
        final_result = []
        with tf.compat.v1.variable_scope(
            "exfm_part", initializer=self.initializer
        ) as scope:  # noqa: F841
            for idx, layer_size in enumerate(hparams.cross_layer_sizes):
                if idx == 0:
                    fast_w = tf.compat.v1.get_variable(
                        "fast_CIN_w_" + str(idx),
                        shape=[1, field_nums[0], fast_CIN_d * layer_size],
                        dtype=tf.float32,
                    )

                    self.cross_params.append(fast_w)
                    dot_result_1 = tf.nn.conv1d(
                        input=nn_input, filters=fast_w, stride=1, padding="VALID"
                    )  # shape: (B,D,d*H)
                    dot_result_2 = tf.nn.conv1d(
                        input=tf.pow(nn_input, 2),
                        filters=tf.pow(fast_w, 2),
                        stride=1,
                        padding="VALID",
                    )  # shape: ((B,D,d*H)
                    dot_result = tf.reshape(
                        0.5 * (dot_result_1 - dot_result_2),
                        shape=[-1, hparams.dim, layer_size, fast_CIN_d],
                    )
                    curr_out = tf.reduce_sum(
                        input_tensor=dot_result, axis=3, keepdims=False
                    )  # shape: ((B,D,H)
                else:
                    fast_w = tf.compat.v1.get_variable(
                        "fast_CIN_w_" + str(idx),
                        shape=[1, field_nums[0], fast_CIN_d * layer_size],
                        dtype=tf.float32,
                    )
                    fast_v = tf.compat.v1.get_variable(
                        "fast_CIN_v_" + str(idx),
                        shape=[1, field_nums[-1], fast_CIN_d * layer_size],
                        dtype=tf.float32,
                    )

                    self.cross_params.append(fast_w)
                    self.cross_params.append(fast_v)

                    dot_result_1 = tf.nn.conv1d(
                        input=nn_input, filters=fast_w, stride=1, padding="VALID"
                    )  # shape: ((B,D,d*H)
                    dot_result_2 = tf.nn.conv1d(
                        input=hidden_nn_layers[-1],
                        filters=fast_v,
                        stride=1,
                        padding="VALID",
                    )  # shape: ((B,D,d*H)
                    dot_result = tf.reshape(
                        tf.multiply(dot_result_1, dot_result_2),
                        shape=[-1, hparams.dim, layer_size, fast_CIN_d],
                    )
                    curr_out = tf.reduce_sum(
                        input_tensor=dot_result, axis=3, keepdims=False
                    )  # shape: ((B,D,H)

                if bias:
                    b = tf.compat.v1.get_variable(
                        name="f_b" + str(idx),
                        shape=[1, 1, layer_size],
                        dtype=tf.float32,
                        initializer=tf.compat.v1.zeros_initializer(),
                    )
                    curr_out = tf.nn.bias_add(curr_out, b)
                    self.cross_params.append(b)

                if hparams.enable_BN is True:
                    curr_out = tf.compat.v1.layers.batch_normalization(
                        curr_out,
                        momentum=0.95,
                        epsilon=0.0001,
                        training=self.is_train_stage,
                    )

                curr_out = self._activate(curr_out, hparams.cross_activation)

                if direct:
                    direct_connect = curr_out
                    next_hidden = curr_out
                    final_len += layer_size
                    field_nums.append(int(layer_size))

                else:
                    if idx != len(hparams.cross_layer_sizes) - 1:
                        next_hidden, direct_connect = tf.split(
                            curr_out, 2 * [int(layer_size / 2)], 2
                        )
                        final_len += int(layer_size / 2)
                        field_nums.append(int(layer_size / 2))
                    else:
                        direct_connect = curr_out
                        next_hidden = 0
                        final_len += layer_size
                        field_nums.append(int(layer_size))

                final_result.append(direct_connect)
                hidden_nn_layers.append(next_hidden)

            result = tf.concat(final_result, axis=2)
            result = tf.reduce_sum(input_tensor=result, axis=1, keepdims=False)  # (B,H)

            if res:
                base_score = tf.reduce_sum(
                    input_tensor=result, axis=1, keepdims=True
                )  # (B,1)
            else:
                base_score = 0

            w_nn_output = tf.compat.v1.get_variable(
                name="w_nn_output", shape=[final_len, 1], dtype=tf.float32
            )
            b_nn_output = tf.compat.v1.get_variable(
                name="b_nn_output",
                shape=[1],
                dtype=tf.float32,
                initializer=tf.compat.v1.zeros_initializer(),
            )
            self.layer_params.append(w_nn_output)
            self.layer_params.append(b_nn_output)
            exFM_out = (
                tf.compat.v1.nn.xw_plus_b(result, w_nn_output, b_nn_output) + base_score
            )

        return exFM_out

    def _build_dnn(self, embed_out, embed_layer_size):
        """Construct the MLP part for the model.
        This components provides implicit higher-order feature interactions.

        Args:
            embed_out (object): The output of field-embedding layer. This is the input for DNN.
            embed_layer_size (object): Shape of the embed_out

        Returns:
            object: Prediction score made by fast CIN.
        """
        hparams = self.hparams
        w_fm_nn_input = embed_out
        last_layer_size = embed_layer_size
        layer_idx = 0
        hidden_nn_layers = []
        hidden_nn_layers.append(w_fm_nn_input)
        with tf.compat.v1.variable_scope(
            "nn_part", initializer=self.initializer
        ) as scope:
            for idx, layer_size in enumerate(hparams.layer_sizes):
                curr_w_nn_layer = tf.compat.v1.get_variable(
                    name="w_nn_layer" + str(layer_idx),
                    shape=[last_layer_size, layer_size],
                    dtype=tf.float32,
                )
                curr_b_nn_layer = tf.compat.v1.get_variable(
                    name="b_nn_layer" + str(layer_idx),
                    shape=[layer_size],
                    dtype=tf.float32,
                    initializer=tf.compat.v1.zeros_initializer(),
                )
                tf.compat.v1.summary.histogram(
                    "nn_part/" + "w_nn_layer" + str(layer_idx), curr_w_nn_layer
                )
                tf.compat.v1.summary.histogram(
                    "nn_part/" + "b_nn_layer" + str(layer_idx), curr_b_nn_layer
                )
                curr_hidden_nn_layer = tf.compat.v1.nn.xw_plus_b(
                    hidden_nn_layers[layer_idx], curr_w_nn_layer, curr_b_nn_layer
                )
                scope = "nn_part" + str(idx)  # noqa: F841
                activation = hparams.activation[idx]

                if hparams.enable_BN is True:
                    curr_hidden_nn_layer = tf.compat.v1.layers.batch_normalization(
                        curr_hidden_nn_layer,
                        momentum=0.95,
                        epsilon=0.0001,
                        training=self.is_train_stage,
                    )

                curr_hidden_nn_layer = self._active_layer(
                    logit=curr_hidden_nn_layer, activation=activation, layer_idx=idx
                )
                hidden_nn_layers.append(curr_hidden_nn_layer)
                layer_idx += 1
                last_layer_size = layer_size
                self.layer_params.append(curr_w_nn_layer)
                self.layer_params.append(curr_b_nn_layer)

            w_nn_output = tf.compat.v1.get_variable(
                name="w_nn_output", shape=[last_layer_size, 1], dtype=tf.float32
            )
            b_nn_output = tf.compat.v1.get_variable(
                name="b_nn_output",
                shape=[1],
                dtype=tf.float32,
                initializer=tf.compat.v1.zeros_initializer(),
            )
            tf.compat.v1.summary.histogram(
                "nn_part/" + "w_nn_output" + str(layer_idx), w_nn_output
            )
            tf.compat.v1.summary.histogram(
                "nn_part/" + "b_nn_output" + str(layer_idx), b_nn_output
            )
            self.layer_params.append(w_nn_output)
            self.layer_params.append(b_nn_output)
            nn_output = tf.compat.v1.nn.xw_plus_b(
                hidden_nn_layers[-1], w_nn_output, b_nn_output
            )
            return nn_output