research/ptn/losses.py

# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Defines the various loss functions in use by the PTN model."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim


def add_rotator_image_loss(inputs, outputs, step_size, weight_scale):
  """Computes the image loss of deep rotator model.

  Args:
    inputs: Input dictionary to the model containing keys
      such as `images_k'.
    outputs: Output dictionary returned by the model containing keys
      such as `images_k'.
    step_size: A scalar representing the number of recurrent
      steps (number of repeated out-of-plane rotations)
      in the deep rotator network (int).
    weight_scale: A reweighting factor applied over the image loss (float).

  Returns:
    A `Tensor' scalar that returns averaged L2 loss
      (divided by batch_size and step_size) between the
      ground-truth images (RGB) and predicted images (tf.float32).

  """
  batch_size = tf.shape(inputs['images_0'])[0]
  image_loss = 0
  for k in range(1, step_size + 1):
    image_loss += tf.nn.l2_loss(
        inputs['images_%d' % k] - outputs['images_%d' % k])

  image_loss /= tf.to_float(step_size * batch_size)
  slim.summaries.add_scalar_summary(
      image_loss, 'image_loss', prefix='losses')
  image_loss *= weight_scale
  return image_loss


def add_rotator_mask_loss(inputs, outputs, step_size, weight_scale):
  """Computes the mask loss of deep rotator model.

  Args:
    inputs: Input dictionary to the model containing keys
      such as `masks_k'.
    outputs: Output dictionary returned by the model containing
      keys such as `masks_k'.
    step_size: A scalar representing the number of recurrent
      steps (number of repeated out-of-plane rotations)
      in the deep rotator network (int).
    weight_scale: A reweighting factor applied over the mask loss (float).

  Returns:
    A `Tensor' that returns averaged L2 loss
      (divided by batch_size and step_size) between the ground-truth masks
      (object silhouettes) and predicted masks (tf.float32).

  """
  batch_size = tf.shape(inputs['images_0'])[0]
  mask_loss = 0
  for k in range(1, step_size + 1):
    mask_loss += tf.nn.l2_loss(
        inputs['masks_%d' % k] - outputs['masks_%d' % k])

  mask_loss /= tf.to_float(step_size * batch_size)
  slim.summaries.add_scalar_summary(
      mask_loss, 'mask_loss', prefix='losses')
  mask_loss *= weight_scale
  return mask_loss


def add_volume_proj_loss(inputs, outputs, num_views, weight_scale):
  """Computes the projection loss of voxel generation model.

  Args:
    inputs: Input dictionary to the model containing keys such as
      `images_1'.
    outputs: Output dictionary returned by the model containing keys
      such as `masks_k' and ``projs_k'.
    num_views: A integer scalar represents the total number of
      viewpoints for each of the object (int).
    weight_scale: A reweighting factor applied over the projection loss (float).

  Returns:
    A `Tensor' that returns the averaged L2 loss
      (divided by batch_size and num_views) between the ground-truth
      masks (object silhouettes) and predicted masks (tf.float32).

  """
  batch_size = tf.shape(inputs['images_1'])[0]
  proj_loss = 0
  for k in range(num_views):
    proj_loss += tf.nn.l2_loss(
        outputs['masks_%d' % (k + 1)] - outputs['projs_%d' % (k + 1)])
  proj_loss /= tf.to_float(num_views * batch_size)
  slim.summaries.add_scalar_summary(
      proj_loss, 'proj_loss', prefix='losses')
  proj_loss *= weight_scale
  return proj_loss


def add_volume_loss(inputs, outputs, num_views, weight_scale):
  """Computes the volume loss of voxel generation model.

  Args:
    inputs: Input dictionary to the model containing keys such as
      `images_1' and `voxels'.
    outputs: Output dictionary returned by the model containing keys
      such as `voxels_k'.
    num_views: A scalar representing the total number of
      viewpoints for each object (int).
    weight_scale: A reweighting factor applied over the volume
      loss (tf.float32).

  Returns:
    A `Tensor' that returns the averaged L2 loss
      (divided by batch_size and num_views) between the ground-truth
      volumes and predicted volumes (tf.float32).

  """
  batch_size = tf.shape(inputs['images_1'])[0]
  vol_loss = 0
  for k in range(num_views):
    vol_loss += tf.nn.l2_loss(
        inputs['voxels'] - outputs['voxels_%d' % (k + 1)])
  vol_loss /= tf.to_float(num_views * batch_size)
  slim.summaries.add_scalar_summary(
      vol_loss, 'vol_loss', prefix='losses')
  vol_loss *= weight_scale
  return vol_loss


def regularization_loss(scopes, params):
  """Computes the weight decay as regularization during training.

  Args:
    scopes: A list of different components of the model such as
      ``encoder'', ``decoder'' and ``projector''.
    params: Parameters of the model.

  Returns:
    Regularization loss (tf.float32).
  """

  reg_loss = tf.zeros(dtype=tf.float32, shape=[])
  if params.weight_decay > 0:
    is_trainable = lambda x: x in tf.trainable_variables()
    is_weights = lambda x: 'weights' in x.name
    for scope in scopes:
      scope_vars = filter(is_trainable,
                          tf.contrib.framework.get_model_variables(scope))
      scope_vars = filter(is_weights, scope_vars)
      if scope_vars:
        reg_loss += tf.add_n([tf.nn.l2_loss(var) for var in scope_vars])

  slim.summaries.add_scalar_summary(
      reg_loss, 'reg_loss', prefix='losses')
  reg_loss *= params.weight_decay
  return reg_loss