Source code for recommenders.models.deeprec.io.dkn_item2item_iterator

# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.


import tensorflow as tf
from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator


[docs]class DKNItem2itemTextIterator(DKNTextIterator):
[docs] def __init__(self, hparams, graph): """This new iterator is for DKN's item-to-item recommendations version. The tutorial can be found `on this notebook <https://github.com/microsoft/recommenders/blob/main/examples/07_tutorials/KDD2020-tutorial/step4_run_dkn_item2item.ipynb>`_. Compared with user-to-item recommendations, we don't need the user behavior module. So the placeholder can be simplified from the original DKNTextIterator. Args: hparams (object): Global hyper-parameters. graph (object): The running graph. """ self.hparams = hparams self.graph = graph self.neg_num = hparams.neg_num self.batch_size = hparams.batch_size * (self.neg_num + 2) self.doc_size = hparams.doc_size with self.graph.as_default(): self.candidate_news_index_batch = tf.compat.v1.placeholder( tf.int64, [self.batch_size, self.doc_size], name="candidate_news_index" ) self.candidate_news_entity_index_batch = tf.compat.v1.placeholder( tf.int64, [self.batch_size, self.doc_size], name="candidate_news_entity_index", ) self._loading_nessary_files()
def _loading_nessary_files(self): """Only one feature file is needed: `news_feature_file`. This function loads the news article's features into two dictionaries: `self.news_word_index` and `self.news_entity_index`. """ hparams = self.hparams self.news_word_index = {} self.news_entity_index = {} with open(hparams.news_feature_file, "r") as rd: while True: line = rd.readline() if not line: break newsid, word_index, entity_index = line.strip().split(" ") self.news_word_index[newsid] = [ int(item) for item in word_index.split(",") ] self.news_entity_index[newsid] = [ int(item) for item in entity_index.split(",") ]
[docs] def load_data_from_file(self, infile): """This function will return a mini-batch of data with features, by looking up `news_word_index` dictionary and `news_entity_index` dictionary according to the news article's ID. Args: infile (str): File path. Each line of `infile` is a news article's ID. Yields: dict, list, int: - A dictionary that maps graph elements to numpy arrays. - A list with news article's ID. - Size of the data in a batch. """ newsid_list = [] candidate_news_index_batch = [] candidate_news_entity_index_batch = [] cnt = 0 with open(infile, "r") as rd: while True: line = rd.readline() if not line: break newsid = line.strip() word_index, entity_index = ( self.news_word_index[newsid], self.news_entity_index[newsid], ) newsid_list.append(newsid) candidate_news_index_batch.append(word_index) candidate_news_entity_index_batch.append(entity_index) cnt += 1 if cnt >= self.batch_size: res = self._convert_infer_data( candidate_news_index_batch, candidate_news_entity_index_batch, ) data_size = self.batch_size yield self.gen_infer_feed_dict(res), newsid_list, data_size candidate_news_index_batch = [] candidate_news_entity_index_batch = [] newsid_list = [] cnt = 0 if cnt > 0: data_size = cnt while cnt < self.batch_size: candidate_news_index_batch.append( candidate_news_index_batch[cnt % data_size] ) candidate_news_entity_index_batch.append( candidate_news_entity_index_batch[cnt % data_size] ) cnt += 1 res = self._convert_infer_data( candidate_news_index_batch, candidate_news_entity_index_batch, ) yield self.gen_infer_feed_dict(res), newsid_list, data_size