Kerasで可変長入力LSTMを作成するにはどうすればよいですか？

Question

Kerasを使用してLSTMでバニラパターン認識を行い、シーケンスの次の要素を予測しようとしています。

私のデータは次のようになります。

ここで、トレーニングシーケンスのラベルはリストの最後の要素です：X_train['Sequence'][n][-1]。

Sequence列にはシーケンス内の要素の可変数を含めることができるため、使用するのに最適なモデルはRNNであると考えています。以下は、KerasでLSTMを構築する試みです。

# Build the model # A few arbitrary constants... max_features = 20000 out_size = 128 # The max length should be the length of the longest sequence (minus one to account for the label) max_length = X_train['Sequence'].apply(len).max() - 1 # Normal LSTM model construction with sigmoid activation model = Sequential() model.add(Embedding(max_features, out_size, input_length=max_length, dropout=0.2)) model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) model.add(Dense(1)) model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

そして、ここに私が私のモデルを訓練しようとする方法があります：

# Train the model for seq in X_train['Sequence']: print("Length of training is {0}".format(len(seq[:-1]))) print("Training set is {0}".format(seq[:-1])) model.fit(np.array([seq[:-1]]), [seq[-1]])

私の出力はこれです：

Length of training is 13 Training set is [1, 3, 13, 87, 1053, 28576, 2141733, 508147108, 402135275365, 1073376057490373, 9700385489355970183, 298434346895322960005291, 31479360095907908092817694945]

ただし、次のエラーが表示されます。

Exception: Error when checking model input: expected embedding_input_1 to have shape (None, 347) but got array with shape (1, 13)

私のトレーニングステップは正しくセットアップされていると思うので、モデルの構築は間違っているに違いありません。 347はmax_length。

Kerasで可変長入力LSTMを正しく構築するにはどうすればよいですか？データをパディングしないことを希望します。関連するかどうかはわかりませんが、Theanoバックエンドを使用しています。

Van · Answer

埋め込み手順について明確ではありません。しかし、ここでも可変長入力LSTMを実装する方法があります。 LSTMを構築するときは、タイムスパンディメンションを指定しないでください。

import keras.backend as K from keras.layers import LSTM, Input I = Input(shape=(None, 200)) # unknown timespan, fixed feature size lstm = LSTM(20) f = K.function(inputs=[I], outputs=[lstm(I)]) import numpy as np data1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100 print f([data1])[0].shape # (1, 20) data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314 print f([data2])[0].shape # (1, 20)

Seanny123 · Answer

シーケンスのトレーニングと分類の秘Theは、ステートフルネットワークを使用したマスキングと分類によるトレーニングです。これは、可変長のシーケンスがゼロで始まるかどうかを分類するために作成した例です。

import numpy as np np.random.seed(1) import tensorflow as tf tf.set_random_seed(1) from keras import models from keras.layers import Dense, Masking, LSTM import matplotlib.pyplot as plt def stateful_model(): hidden_units = 256 model = models.Sequential() model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True)) model.add(Dense(1, activation='relu', name='output')) model.compile(loss='binary_crossentropy', optimizer='rmsprop') return model def train_rnn(x_train, y_train, max_len, mask): epochs = 10 batch_size = 200 vec_dims = 1 hidden_units = 256 in_shape = (max_len, vec_dims) model = models.Sequential() model.add(Masking(mask, name="in_layer", input_shape=in_shape,)) model.add(LSTM(hidden_units, return_sequences=False)) model.add(Dense(1, activation='relu', name='output')) model.compile(loss='binary_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.05) return model def gen_train_sig_cls_pair(t_stops, num_examples, mask): x = [] y = [] max_t = int(np.max(t_stops)) for t_stop in t_stops: one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False) sig = np.zeros((num_examples, max_t), dtype=np.int8) sig[one_indices, 0] = 1 sig[:, t_stop:] = mask x.append(sig) cls = np.zeros(num_examples, dtype=np.bool) cls[one_indices] = 1 y.append(cls) return np.concatenate(x, axis=0), np.concatenate(y, axis=0) def gen_test_sig_cls_pair(t_stops, num_examples): x = [] y = [] for t_stop in t_stops: one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False) sig = np.zeros((num_examples, t_stop), dtype=np.bool) sig[one_indices, 0] = 1 x.extend(list(sig)) cls = np.zeros((num_examples, t_stop), dtype=np.bool) cls[one_indices] = 1 y.extend(list(cls)) return x, y if __== '__main__': noise_mag = 0.01 mask_val = -10 signal_lengths = (10, 15, 20) x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val) mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val) testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3) state_mod = stateful_model() state_mod.set_weights(mod.get_weights()) res = [] for s_i in range(len(testing_dat)): seq_in = list(testing_dat[s_i]) seq_len = len(seq_in) for t_i in range(seq_len): res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]]))) state_mod.reset_states() fig, axes = plt.subplots(2) axes[0].plot(np.concatenate(testing_dat), label="input") axes[1].plot(res, "ro", label="result", alpha=0.2) axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2) axes[1].legend(bbox_to_anchor=(1.1, 1)) plt.show()

PSL · Answer

リカレントネットワークがシーケンスにどのように適用できるか、つまり、各要素が他の要因とは対照的に前のシーケンスにどの程度強く依存しているかはわかりません。そうは言っても（これは少し助けにはなりません）、入力に何らかの悪い値を埋め込みたくない場合は、単一のタイムステップを一度に処理するステートフルモデルが可変長シーケンスの唯一の代替手段です。エンコードに別のアプローチを取ることを気にしない場合：

import numpy as np import keras.models as kem import keras.layers as kel import keras.callbacks as kec import sklearn.preprocessing as skprep X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16 num_mem_units = 64 size_batch = 1 num_timesteps = 1 num_features = 1 num_targets = 1 num_epochs = 1500 model = kem.Sequential() model.add(kel.LSTM(num_mem_units, stateful=True, batch_input_shape=(size_batch, num_timesteps, num_features), return_sequences=True)) model.add(kel.Dense(num_targets, activation='sigmoid')) model.summary() model.compile(loss='binary_crossentropy', optimizer='adam') range_act = (0, 1) # sigmoid range_features = np.array([0, max_features]).reshape(-1, 1) normalizer = skprep.MinMaxScaler(feature_range=range_act) normalizer.fit(range_features) reset_state = kec.LambdaCallback(on_Epoch_end=lambda *_ : model.reset_states()) # training for seq in X_train['Sequence']: X = seq[:-1] y = seq[1:] # predict next element X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features) y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets) model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False, callbacks=[reset_state]) # prediction for seq in X_train['Sequence']: model.reset_states() for istep in range(len(seq)-1): # input up to not incl last val = seq[istep] X = np.array([val]).reshape(-1, 1) X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features) y_norm = model.predict(X_norm) yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0]) y = seq[-1] # last put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y) print(put)

次のようなsthを生成します。

1, 2, 4, 5, 8, 10 predicts 11, expecting 16 1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7

とんでもない損失で。