埋め込みを使用してレコメンデーションのためにコンピューターでGitHubプロジェクトを再実行しようとしています。目標は、最初にmovieLensデータセットに存在するユーザーとアイテムを埋め込み、次に統合を完了したときに内積を使用して評価を予測することですすべてのコンポーネントのうち、トレーニングでエラーが発生しました。
コード:
from lightfm.datasets import fetch_movielens
movielens = fetch_movielens()
ratings_train, ratings_test = movielens['train'], movielens['test']
def _binarize(dataset):
dataset = dataset.copy()
dataset.data = (dataset.data >= 0.0).astype(np.float32)
dataset = dataset.tocsr()
dataset.eliminate_zeros()
return dataset.tocoo()
train, test = _binarize(movielens['train']), _binarize(movielens['test'])
class ScaledEmbedding(nn.Embedding):
""" Change the scale from normal to [0,1/embedding_dim] """
def reset_parameters(self):
self.weight.data.normal_(0, 1.0 / self.embedding_dim)
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class ZeroEmbedding(nn.Embedding):
def reset_parameters(self):
self.weight.data.zero_()
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class BilinearNet(nn.Module):
def __init__(self, num_users, num_items, embedding_dim, sparse=False):
super().__init__()
self.embedding_dim = embedding_dim
self.user_embeddings = ScaledEmbedding(num_users, embedding_dim,
sparse=sparse)
self.item_embeddings = ScaledEmbedding(num_items, embedding_dim,
sparse=sparse)
self.user_biases = ZeroEmbedding(num_users, 1, sparse=sparse)
self.item_biases = ZeroEmbedding(num_items, 1, sparse=sparse)
def forward(self, user_ids, item_ids):
user_embedding = self.user_embeddings(user_ids)
item_embedding = self.item_embeddings(item_ids)
user_embedding = user_embedding.view(-1, self.embedding_dim)
item_embedding = item_embedding.view(-1, self.embedding_dim)
user_bias = self.user_biases(user_ids).view(-1, 1)
item_bias = self.item_biases(item_ids).view(-1, 1)
dot = (user_embedding * item_embedding).sum(1)
return dot + user_bias + item_bias
def pointwise_loss(net,users, items, ratings, num_items):
negatives = Variable(
torch.from_numpy(np.random.randint(0,
num_items,
len(users))).cuda()
)
positives_loss = (1.0 - torch.sigmoid(net(users, items)))
negatives_loss = torch.sigmoid(net(users, negatives))
return torch.cat([positives_loss, negatives_loss]).mean()
embedding_dim = 128
minibatch_size = 1024
n_iter = 10
l2=0.0
sparse = True
num_users, num_items = train.shape
net = BilinearNet(num_users,
num_items,
embedding_dim,
sparse=sparse).cuda()
optimizer = optim.Adagrad(net.parameters(),
weight_decay=l2)
for Epoch_num in range(n_iter):
users, items, ratings = shuffle(train)
user_ids_tensor = torch.from_numpy(users).cuda()
item_ids_tensor = torch.from_numpy(items).cuda()
ratings_tensor = torch.from_numpy(ratings).cuda()
Epoch_loss = 0.0
for (batch_user,
batch_item,
batch_ratings) in Zip(_minibatch(user_ids_tensor,
minibatch_size),
_minibatch(item_ids_tensor,
minibatch_size),
_minibatch(ratings_tensor,
minibatch_size)):
user_var = Variable(batch_user)
item_var = Variable(batch_item)
ratings_var = Variable(batch_ratings)
optimizer.zero_grad()
loss = pointwise_loss(net,user_var, item_var, ratings_var, num_items)
Epoch_loss += loss.data[0]
loss.backward()
optimizer.step()
print('Epoch {}: loss {}'.format(Epoch_num, Epoch_loss))
エラー:
RuntimeError Traceback (most recent call last) <ipython-input-87-dcd04440363f> in <module>()
22 ratings_var = Variable(batch_ratings)
23 optimizer.zero_grad()
---> 24 loss = pointwise_loss(net,user_var, item_var, ratings_var, num_items)
25 Epoch_loss += loss.data[0]
26 loss.backward()
<ipython-input-86-679e10f637a5> in pointwise_loss(net, users, items, ratings, num_items)
8
9 positives_loss = (1.0 - torch.sigmoid(net(users, items)))
---> 10 negatives_loss = torch.sigmoid(net(users, negatives))
11
12 return torch.cat([positives_loss, negatives_loss]).mean()
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in
__call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
<ipython-input-58-3946abf81d81> in forward(self, user_ids, item_ids)
16
17 user_embedding = self.user_embeddings(user_ids)
---> 18 item_embedding = self.item_embeddings(item_ids)
19
20 user_embedding = user_embedding.view(-1, self.embedding_dim)
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in
__call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
115 return F.embedding(
116 input, self.weight, self.padding_idx, self.max_norm,
--> 117 self.norm_type, self.scale_grad_by_freq, self.sparse)
118
119 def extra_repr(self):
~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 1504 # remove once script supports set_grad_enabled 1505
_no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1506 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 1507 1508
RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got CUDAType instead (while checking arguments for embedding)
誰か私を助けてくれますか?
入力タイプをチェックすることをお勧めします。同じ問題がありましたが、入力タイプをint32からint64に変換することで解決しました。(win10で実行)ex:
x = torch.tensor(train).to(torch.int64)