Merge pull request #484 from cpaxton/devel

Support for CoSTAR data
jhu-lcsr · Mar 30, 2018 · 487ede4 · 487ede4
2 parents 30c2682 + fc5960e
commit 487ede4
Show file tree

Hide file tree

Showing 9 changed files with 373 additions and 18 deletions.
diff --git a/costar_models/python/costar_models/conditional_image_costar.py b/costar_models/python/costar_models/conditional_image_costar.py
@@ -0,0 +1,180 @@
+from __future__ import print_function
+
+import keras.backend as K
+import keras.losses as losses
+import keras.optimizers as optimizers
+import numpy as np
+
+from keras.callbacks import ModelCheckpoint
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers import Input, RepeatVector, Reshape
+from keras.layers.embeddings import Embedding
+from keras.layers.merge import Concatenate, Multiply
+from keras.losses import binary_crossentropy
+from keras.models import Model, Sequential
+from keras.optimizers import Adam
+from matplotlib import pyplot as plt
+
+from .robot_multi_models import *
+from .mhp_loss import *
+from .loss import *
+from .sampler2 import *
+
+from .conditional_image import ConditionalImage
+from .costar import *
+
+class ConditionalImageCostar(ConditionalImage):
+
+    def __init__(self, *args, **kwargs):
+        super(ConditionalImageCostar, self).__init__(*args, **kwargs)
+        self.PredictorCb = ImageWithFirstCb
+
+    def _makeModel(self, image, *args, **kwargs):
+
+        img_shape = image.shape[1:]
+        img_size = 1.
+        for dim in img_shape:
+            img_size *= dim
+        gripper_size = 1
+        arm_size = 6
+
+        # =====================================================================
+        # Load the image decoders
+        img_in = Input(img_shape,name="predictor_img_in")
+        img0_in = Input(img_shape,name="predictor_img0_in")
+        #arm_in = Input((arm_size,))
+        #gripper_in = Input((gripper_size,))
+        #arm_gripper = Concatenate()([arm_in, gripper_in])
+        label_in = Input((1,))
+        ins = [img0_in, img_in]
+
+        encoder = MakeImageEncoder(self, img_shape)
+        decoder = MakeImageDecoder(self, self.hidden_shape)
+
+        LoadEncoderWeights(self, encoder, decoder)
+
+        # =====================================================================
+        # Load the arm and gripper representation
+        h = encoder([img0_in, img_in])
+
+        if self.validate:
+            self.loadValidationModels(arm_size, gripper_size, h0, h)
+
+        next_option_in = Input((1,), name="next_option_in")
+        next_option_in2 = Input((1,), name="next_option_in2")
+        ins += [next_option_in, next_option_in2]
+
+        # =====================================================================
+        # Apply transforms
+        y = Flatten()(OneHot(self.num_options)(next_option_in))
+        y2 = Flatten()(OneHot(self.num_options)(next_option_in2))
+
+        tform = self._makeTransform() if not self.dense_transform else self._makeDenseTransform()
+        tform.summary()
+        x = tform([h,y])
+        x2 = tform([x,y2])
+
+        image_out, image_out2 = decoder([x]), decoder([x2])
+
+        # Compute classifier on the last transform
+        if not self.no_disc:
+            image_discriminator = LoadGoalClassifierWeights(self,
+                    make_classifier_fn=MakeCostarImageClassifier,
+                    img_shape=img_shape)
+            #disc_out1 = image_discriminator([img0_in, image_out])
+            disc_out2 = image_discriminator([img0_in, image_out2])
+
+        # Create custom encoder loss
+        if self.enc_loss:
+            loss = EncoderLoss(self.image_encoder, self.loss)
+            enc_losses = [loss, loss]
+            enc_outs = [x, x2]
+            enc_wts = [1e-2, 1e-2]
+            img_loss_wt = 1.
+        else:
+            enc_losses = []
+            enc_outs = []
+            enc_wts = []
+            img_loss_wt = 1.
+
+        # Create models to train
+        if self.no_disc:
+            disc_wt = 0.
+        else:
+            disc_wt = 1e-3
+        if self.no_disc:
+            train_predictor = Model(ins + [label_in],
+                    [image_out, image_out2] + enc_outs)
+            train_predictor.compile(
+                    loss=[self.loss, self.loss,] + enc_losses,
+                    loss_weights=[img_loss_wt, img_loss_wt] + enc_wts,
+                    optimizer=self.getOptimizer())
+        else:
+            train_predictor = Model(ins + [label_in],
+                    #[image_out, image_out2, disc_out1, disc_out2] + enc_outs)
+                    [image_out, image_out2, disc_out2] + enc_outs)
+            train_predictor.compile(
+                    loss=[self.loss, self.loss, "categorical_crossentropy"] + enc_losses,
+                    #loss_weights=[img_loss_wt, img_loss_wt, 0.9*disc_wt, disc_wt] + enc_wts,
+                    loss_weights=[img_loss_wt, img_loss_wt, disc_wt] + enc_wts,
+                    optimizer=self.getOptimizer())
+        train_predictor.summary()
+
+        # Set variables
+        self.predictor = None
+        self.model = train_predictor
+
+
+    def _getData(self, image, label, goal_idx, q, gripper, labels_to_name, *args, **kwargs):
+        '''
+        Parameters:
+        -----------
+        image: jpeg encoding of image
+        label: integer code for which action is being performed
+        goal_idx: index of the start of the next action
+        q: joint states
+        gripper: floating point gripper openness
+        labels_to_name: list of high level actions (AKA options)
+        '''
+
+        # Null option to be set as the first option
+        # Verify this to make sure we aren't loading things with different
+        # numbers of available options/high-level actions
+        assert(len(labels_to_name) == self.null_option)
+        self.null_option = len(labels_to_name)
+        # Total number of options incl. null
+        self.num_options = len(labels_to_name) + 1
+
+        length = label.shape[0]
+        prev_label = np.zeros_like(label)
+        prev_label[1:] = label[:(length-1)]
+        prev_label[0] = self.null_option
+
+        goal_idx = np.min((goal_idx, np.ones_like(goal_idx)*(length-1)),axis=0)
+
+        if not (image.shape[0] == goal_idx.shape[0]):
+            print("Image shape:", image.shape)
+            print("Goal idxs:", goal_idx.shape)
+            print(label)
+            print(goal_idx)
+            raise RuntimeError('data type shapes did not match')
+        goal_label = label[goal_idx]
+        goal_image = image[goal_idx]
+        goal_image2, goal_label2 = GetNextGoal(goal_image, label)
+
+        # Extend image_0 to full length of sequence
+        image0 = image[0]
+        image0 = np.tile(np.expand_dims(image0,axis=0),[length,1,1,1])
+
+        lbls_1h = np.squeeze(ToOneHot2D(label, self.num_options))
+        lbls2_1h = np.squeeze(ToOneHot2D(goal_label2, self.num_options))
+        if self.no_disc:
+            return ([image0, image, label, goal_label, prev_label],
+                    [goal_image,
+                     goal_image2,])
+        else:
+            return ([image0, image, label, goal_label, prev_label],
+                    [goal_image,
+                     goal_image2,
+                     lbls2_1h,])
+
diff --git a/costar_models/python/costar_models/costar.py b/costar_models/python/costar_models/costar.py
@@ -24,4 +24,40 @@
 for real robot execution.
 '''
 
+def MakeCostarImageClassifier(model, img_shape, trainable=True):
+    img0 = Input(img_shape,name="img0_classifier_in")
+    img = Input(img_shape,name="img_classifier_in")
+    bn = model.use_batchnorm
+    disc = True
+    dr = model.dropout_rate
+    x = img
+    x0 = img0
+
+    #x = AddConv2D(x, 32, [7,7], 1, 0., "same", lrelu=disc, bn=bn)
+    x = AddConv2D(x, 32, [5,5], 2, 0., "same", lrelu=disc, bn=bn)
+    x = Dropout(dr)(x)
+    #x = AddConv2D(x, 32, [5,5], 1, 0., "same", lrelu=disc, bn=bn)
+    #x = AddConv2D(x, 32, [5,5], 1, 0., "same", lrelu=disc, bn=bn)
+    x = AddConv2D(x, 64, [5,5], 2, 0., "same", lrelu=disc, bn=bn)
+    x = Dropout(dr)(x)
+    #x = AddConv2D(x, 64, [5,5], 1, 0., "same", lrelu=disc, bn=bn)
+    x = AddConv2D(x, 128, [5,5], 2, 0., "same", lrelu=disc, bn=bn)
+    x = Dropout(dr)(x)
+    #x = AddConv2D(x, 128, [5,5], 1, 0., "same", lrelu=disc, bn=bn)
+    x = AddConv2D(x, 128, [5,5], 2, 0., "same", lrelu=disc, bn=bn)
+
+    x = Flatten()(x)
+    #x = Dropout(0.5)(x)
+    #x = AddDense(x, 1024, "lrelu", 0., output=True, bn=False)
+    x = Dropout(0.5)(x)
+    x = AddDense(x, model.num_options, "softmax", 0., output=True, bn=False)
+    image_encoder = Model([img0, img], x, name="classifier")
+    if not trainable:
+        image_encoder.trainable = False
+    image_encoder.compile(loss="categorical_crossentropy",
+            optimizer=model.getOptimizer(),
+            metrics=["accuracy"])
+    model.classifier = image_encoder
+    return image_encoder
+
 
diff --git a/...hon/costar_models/depth_image_encoding.py → ...r_models/datasets/depth_image_encoding.py b/...hon/costar_models/depth_image_encoding.py → ...r_models/datasets/depth_image_encoding.py
diff --git a/costar_models/python/costar_models/datasets/npy_generator.py b/costar_models/python/costar_models/datasets/npy_generator.py
@@ -48,18 +48,23 @@ def load(self, success_only=False):
             if success_only and f.split('.')[1] == 'failure':
                 continue
 
-            if i < 2:
+            if i < 1:
                 fsample = self._load(os.path.join(self.name, f))
                 for key, value in fsample.items():
 
                     if self.load_jpeg and key in ["image", "goal_image"]:
                         value = ConvertJpegListToNumpy(value)
 
-                    if key not in sample:
-                        sample[key] = value
                     if value.shape[0] == 0:
+                        sample = {}
                         continue
-                    sample[key] = np.concatenate([sample[key],value],axis=0)
+
+                    if key not in sample:
+                        sample[key] = value
+                    else:
+                        # Note: do not collect multiple samples anymore; this
+                        # hould never be reached
+                        sample[key] = np.concatenate([sample[key],value],axis=0)
             i += 1
             acceptable_files.append(f)
 

diff --git a/costar_models/python/costar_models/discriminator.py b/costar_models/python/costar_models/discriminator.py
@@ -17,6 +17,7 @@
 from .multi import *
 from .husky import *
 from .dvrk import *
+from .costar import *
 
 class Discriminator(RobotMultiPredictionSampler):
 
@@ -66,8 +67,6 @@ def __init__(self, goal, taskdef, *args, **kwargs):
         super(HuskyDiscriminator, self).__init__(taskdef, *args, **kwargs)
         self.PredictorCb = None
         self.goal = goal
-        self.num_options = HuskyNumOptions()
-        self.null_options = HuskyNullOption()
 
     def _makeModel(self, image, *args, **kwargs):
         '''
@@ -101,7 +100,6 @@ def __init__(self, goal, taskdef, *args, **kwargs):
         '''
         super(JigsawsDiscriminator, self).__init__(taskdef, *args, **kwargs)
         self.PredictorCb = None
-        self.num_options = SuturingNumOptions()
         self.num_generator_files = 1
         self.goal = goal
         self.load_jpeg = True
@@ -131,3 +129,43 @@ def _getData(self, image, goal_idx, label, *args, **kwargs):
         else:
             return [I0, I], [o1_1h]
 
+class CostarDiscriminator(RobotMultiPredictionSampler):
+
+    def __init__(self, goal, taskdef, *args, **kwargs):
+        '''
+        As in the other models, we call super() to parse arguments from the
+        command line and set things like our optimizer and learning rate.
+        '''
+        super(CostarDiscriminator, self).__init__(taskdef, *args, **kwargs)
+        self.PredictorCb = None
+        self.num_generator_files = 1
+        self.goal = goal
+        self.load_jpeg = True
+
+    def _makeModel(self, image, *args, **kwargs):
+        '''
+        Create model to predict possible manipulation goals.
+        '''
+        img_shape = image.shape[1:]
+        disc = MakeCostarImageClassifier(self, img_shape)
+        disc.summary()
+
+        self.model = disc
+
+    def _getData(self, image, goal_idx, label, *args, **kwargs):
+        #I = np.array(image)
+        #I_target = np.array(goal_image)
+        I = image
+        length = label.shape[0]
+        goal_idx = np.min((goal_idx, np.ones_like(goal_idx)*(length-1)),axis=0)
+        I_target = I[goal_idx]
+        o1 = np.array(label)
+        o1_1h = np.squeeze(ToOneHot2D(o1, self.num_options))
+        I0 = I[0]
+        length = I.shape[0]
+        I0 = np.tile(np.expand_dims(I0,axis=0),[length,1,1,1]) 
+        if self.goal:
+            return [I0, I_target], [o1_1h]
+        else:
+            return [I0, I], [o1_1h]
+
diff --git a/costar_models/python/costar_models/multi_sampler.py b/costar_models/python/costar_models/multi_sampler.py
@@ -293,7 +293,7 @@ def _makePredictor(self, features):
 
         return predictor, model, actor, ins, enc
 
-    def _makeTransform(self, h_dim=(8,8), perm_drop=False):
+    def _makeTransform(self, perm_drop=False):
         '''
         This is the version made for the newer code, it is set up to use both
         the initial and current observed world and creates a transform
@@ -307,6 +307,7 @@ def _makeTransform(self, h_dim=(8,8), perm_drop=False):
         --------
         transform model
         '''
+        h_dim = self.hidden_shape
         h = Input((h_dim[0], h_dim[1], self.encoder_channels),name="h_in")
         option = Input((self.num_options,),name="t_opt_in")
         # Never use the BN here?

diff --git a/costar_models/python/costar_models/util.py b/costar_models/python/costar_models/util.py
@@ -24,6 +24,8 @@
 
 # CoSTAR
 from .pretrain_image_costar import PretrainImageCostar
+from .conditional_image_costar import ConditionalImageCostar
+from .discriminator import CostarDiscriminator
 
 # Jigsaws stuff
 from .dvrk import *
@@ -188,10 +190,26 @@ def MakeModel(features, model, taskdef, **kwargs):
                     model=model,
                     features=features,
                     **kwargs)
+        elif model == "conditional_image":
+            model_instance = ConditionalImageCostar(taskdef,
+                    features=features,
+                    model=model,
+                    **kwargs)
+        elif model == "discriminator":
+            model_instance = CostarDiscriminator(False, taskdef,
+                    features=features,
+                    model=model, **kwargs)
+        elif model == "goal_discriminator":
+            model_instance = CostarDiscriminator(True, taskdef,
+                    features=features,
+                    model=model, **kwargs)
 
         # Global setup for CoSTAR
         # this one uses jpegs
         model_instance.load_jpeg = True
+        model_instance.null_option = 40
+        model_instance.num_options = 41
+        model_instance.validation_split = 0.2
 
     elif features == "husky":
         '''