ケラスシーケンシャルモデルにサイズ変更レイヤーを追加する

Question

サイズ変更レイヤーを追加するにはどうすればよいですか

model = Sequential()

使用する

model.add(...)

画像の形状を（160、320、3）から（224,224,3）に変更するには？

nemo · Accepted Answer

通常、これには Reshape レイヤーを使用します。

model.add(Reshape((224,224,3), input_shape=(160,320,3))

ただし、ターゲットディメンションでは、入力ディメンション（224*224 != 160*320）、これは機能しません。要素の数が変わらない場合のみ、Reshapeを使用できます。

画像の一部のデータを失うことに問題がない場合は、独自の非可逆変形を指定できます。

model.add(Reshape(-1,3), input_shape=(160,320,3)) model.add(Lambda(lambda x: x[:50176])) # throw away some, so that #data = 224^2 model.add(Reshape(224,224,3))

つまり、これらの変換は多くの場合、データをモデルに適用する前に行われます。これは、すべてのトレーニングステップで行われると、計算時間が本質的に無駄になるためです。

KeithWM · Answer

Tensorflowのresize_imagesレイヤーの使用を検討する必要があると思います。

https://www.tensorflow.org/api_docs/python/tf/image/resize_images

Kerasにはこれが含まれていないようです。おそらく機能がtheanoに存在しないためです。私は同じことをするカスタムケラスレイヤーを書きました。これは簡単なハックなので、場合によってはうまく機能しない可能性があります。

import keras import keras.backend as K from keras.utils import conv_utils from keras.engine import InputSpec from keras.engine import Layer from tensorflow import image as tfi class ResizeImages(Layer): """Resize Images to a specified size # Arguments output_size: Size of output layer width and height data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` # Output shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, pooled_rows, pooled_cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, pooled_rows, pooled_cols)` """ def __init__(self, output_dim=(1, 1), data_format=None, **kwargs): super(ResizeImages, self).__init__(**kwargs) data_format = conv_utils.normalize_data_format(data_format) self.output_dim = conv_utils.normalize_Tuple(output_dim, 2, 'output_dim') self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=4) def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': return (input_shape[0], input_shape[1], self.output_dim[0], self.output_dim[1]) Elif self.data_format == 'channels_last': return (input_shape[0], self.output_dim[0], self.output_dim[1], input_shape[3]) def _resize_fun(self, inputs, data_format): try: assert keras.backend.backend() == 'tensorflow' assert self.data_format == 'channels_last' except AssertionError: print "Only tensorflow backend is supported for the resize layer and accordingly 'channels_last' ordering" output = tfi.resize_images(inputs, self.output_dim) return output def call(self, inputs): output = self._resize_fun(inputs=inputs, data_format=self.data_format) return output def get_config(self): config = {'output_dim': self.output_dim, 'padding': self.padding, 'data_format': self.data_format} base_config = super(ResizeImages, self).get_config() return dict(list(base_config.items()) + list(config.items()))

mxmlnkn · Answer

受け入れられた答えは Reshape レイヤーを使用します。これは NumPy's reshape のように機能し、4x4マトリックスを2x8マトリックスに変形するために使用できますが、画像が失われます。地域情報：

0 0 0 0 1 1 1 1 -> 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3

代わりに、画像データは、たとえば Tensorflows image_resize 。しかし、正しい sage とバグに注意してください！関連する質問に示されているように、これはラムダレイヤーで使用できます。

model.add( keras.layers.Lambda( lambda image: tf.image.resize_images( image, (224, 224), method = tf.image.ResizeMethod.BICUBIC, align_corners = True, # possibly important preserve_aspect_ratio = True ) ))

あなたの場合、160x320の画像があるので、アスペクト比を維持するかどうかも決定する必要があります。事前にトレーニングされたネットワークを使用する場合は、ネットワークがトレーニングされたのと同じ種類のサイズ変更を使用する必要があります。

ch271828n · Answer

@KeithWMの回答を変更し、output_scaleを追加します。 output_scale = 2は、出力が入力形状の2倍であることを意味します:)

class ResizeImages(Layer): """Resize Images to a specified size https://stackoverflow.com/questions/41903928/add-a-resizing-layer-to-a-keras-sequential-model # Arguments output_dim: Size of output layer width and height output_scale: scale compared with input data_format: A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. `channels_last` corresponds to inputs with shape `(batch, height, width, channels)` while `channels_first` corresponds to inputs with shape `(batch, channels, height, width)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". # Input shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` # Output shape - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, pooled_rows, pooled_cols, channels)` - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, pooled_rows, pooled_cols)` """ def __init__(self, output_dim=(1, 1), output_scale=None, data_format=None, **kwargs): super(ResizeImages, self).__init__(**kwargs) data_format = normalize_data_format(data_format) # does not have self.naive_output_dim = conv_utils.normalize_Tuple(output_dim, 2, 'output_dim') self.naive_output_scale = output_scale self.data_format = normalize_data_format(data_format) self.input_spec = InputSpec(ndim=4) def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] if self.naive_output_scale is not None: if self.data_format == 'channels_first': self.output_dim = (self.naive_output_scale * input_shape[2], self.naive_output_scale * input_shape[3]) Elif self.data_format == 'channels_last': self.output_dim = (self.naive_output_scale * input_shape[1], self.naive_output_scale * input_shape[2]) else: self.output_dim = self.naive_output_dim def compute_output_shape(self, input_shape): if self.data_format == 'channels_first': return (input_shape[0], input_shape[1], self.output_dim[0], self.output_dim[1]) Elif self.data_format == 'channels_last': return (input_shape[0], self.output_dim[0], self.output_dim[1], input_shape[3]) def _resize_fun(self, inputs, data_format): try: assert keras.backend.backend() == 'tensorflow' assert self.data_format == 'channels_last' except AssertionError: print("Only tensorflow backend is supported for the resize layer and accordingly 'channels_last' ordering") output = tf.image.resize_images(inputs, self.output_dim) return output def call(self, inputs): output = self._resize_fun(inputs=inputs, data_format=self.data_format) return output def get_config(self): config = {'output_dim': self.output_dim, 'padding': self.padding, 'data_format': self.data_format} base_config = super(ResizeImages, self).get_config() return dict(list(base_config.items()) + list(config.items()))