diff --git a/model-gym-ros-env/car_node/car_node/main.py b/model-gym-ros-env/car_node/car_node/main.py index bea425f0..f860f095 100644 --- a/model-gym-ros-env/car_node/car_node/main.py +++ b/model-gym-ros-env/car_node/car_node/main.py @@ -27,10 +27,10 @@ SPEED_SCALE = 0.7 class PPOModelEvaluator(Node): - + def __init__(self): super().__init__('wall_follow_node') - + #Topics & Subs, Pubs lidarscan_topic = '/scan' drive_topic = '/drive' @@ -49,12 +49,12 @@ def __init__(self): def lidar_callback(self, data): d = np.array(data.ranges, dtype=np.float64) d = convert_range(d, [data.angle_min, data.angle_max], [-1, 1]) - + action, _states = self.model.predict(d, deterministic=True) action = self.eval_env.un_normalise_actions(action) - + self.get_logger().info(f'{action[0], action[1]}') - + drive_msg = AckermannDriveStamped() drive_msg.header.stamp = self.get_clock().now().to_msg() drive_msg.header.frame_id = "laser" diff --git a/model-gym-ros-env/car_node/car_node/wrapper.py b/model-gym-ros-env/car_node/car_node/wrapper.py index c5b1f197..8717a4a8 100644 --- a/model-gym-ros-env/car_node/car_node/wrapper.py +++ b/model-gym-ros-env/car_node/car_node/wrapper.py @@ -71,7 +71,7 @@ def __init__(self, env, random_map=False): def get_total_steps(self) -> int: return self.step_count - + def get_episode_rewards(self) -> List[float]: """ Returns the rewards of all the episodes @@ -81,7 +81,7 @@ def get_episode_rewards(self) -> List[float]: return self.episode_returns - + def set_raceliens(self): @@ -128,13 +128,13 @@ def step(self, action): ('c3B/stream', self.race_line_color)) reward = 0 - - - + + + if self.count < len(self.race_line_x) - 1: X, Y = observation['poses_x'][0], observation['poses_y'][0] - + dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2)) if dist < 2: self.count = self.count + 1 @@ -156,7 +156,7 @@ def step(self, action): + np.power((observation["poses_y"][0] - self.last_position['y']), 2) ) - + # if distance_from_last_position > 0.0005: # reward += 0.8 # else: @@ -194,7 +194,7 @@ def step(self, action): self.episode_returns.append(reward) - + return self.normalise_observations(observation['scans'][0]), reward, bool(done), info @@ -222,7 +222,7 @@ def reset(self): race_line_x = race_line_x[random_index:] + race_line_x[:random_index] race_line_y = race_line_y[random_index:] + race_line_y[:random_index] race_line_theta = race_line_theta[random_index:] + race_line_theta[:random_index] - + self.race_line_x = race_line_x self.race_line_y = race_line_y @@ -261,4 +261,4 @@ def un_normalise_actions(self, actions): def normalise_observations(self, observations): # convert observations from normal lidar distances range to range [-1, 1] return convert_range(observations, [self.lidar_min, self.lidar_max], [-1, 1]) - + diff --git a/src/train.py b/src/train.py index a1fd4417..968e5cd8 100644 --- a/src/train.py +++ b/src/train.py @@ -32,7 +32,7 @@ def train( eval_env.set_map_path(path) eval_env.seed(1773449316) eval_env.set_optimize_speed(optimize_speed) - + timesteps_list = np.logspace(np.log10(min_timesteps), np.log10(max_timesteps), num=num_of_steps, endpoint=True, base=10.0, dtype=int, axis=0) learning_rate_list = np.logspace(np.log10(max_learning_rate), np.log10(min_learning_rate), num=num_of_steps, endpoint=True, base=10.0, dtype=None, axis=0) @@ -43,7 +43,7 @@ def train( for timesteps, learning_rate in zip(timesteps_list, learning_rate_list): eval_env.seed(round(np.random.rand()*1000000000)) - + if os.path.exists("./train_test/best_global_model.zip"): print("Loading Existing Model") model = PPO.load("./train_test/best_global_model", eval_env, learning_rate=linear_schedule(learning_rate), device=device) diff --git a/src/wrapper/wrapper.py b/src/wrapper/wrapper.py index d8cae507..fc21c6c6 100644 --- a/src/wrapper/wrapper.py +++ b/src/wrapper/wrapper.py @@ -14,7 +14,7 @@ def logger(map, event, reword, lap_time): with open('log.csv', 'a', newline='') as file: writer = csv.writer(file) writer.writerow([map, event, reword, lap_time]) - + def convert_range(value, input_range, output_range): # converts value(s) from range to another range @@ -61,7 +61,7 @@ def __init__(self, env, random_map=False): # set threshold for maximum angle of car, to prevent spinning self.max_theta = 100 - + self.map_path = None self.random_map = random_map @@ -120,11 +120,11 @@ def start_position(self): def step(self, action): - + def episode_end(reason = None, rew = 0): if reason is not None: print("Episode End ->", reason, self.map_path) - + done = True self.count = 0 self.episode_returns = [] @@ -147,8 +147,8 @@ def episode_end(reason = None, rew = 0): ('c3B/stream', self.race_line_color)) reward = 0 - - + + aceleration_reward = action_convert[1] if self.optimize_speed: @@ -158,11 +158,11 @@ def episode_end(reason = None, rew = 0): reward += 2 reward = reward * 0.01 - - + + if self.count < len(self.race_line_x) - 1: X, Y = observation['poses_x'][0], observation['poses_y'][0] - + dist = np.sqrt(np.power((X - next_x), 2) + np.power((Y - next_y), 2)) if dist < 2: self.count = self.count + 1 @@ -171,15 +171,15 @@ def episode_end(reason = None, rew = 0): if dist < 2.5: self.number_of_base_reward_give += 1 - + if self.number_of_base_reward_give < 100: reward += 0.05 else: reward -= 1 - + if dist > 3: reward -= 1 - + else: steps_goal = self.count @@ -187,15 +187,15 @@ def episode_end(reason = None, rew = 0): steps_done = self.step_for_episode elif self.one_lap_done: steps_done = self.step_for_episode / 2 - + k = (steps_done - steps_goal)/steps_goal - + reward += (1-k) * 100 print("----------------- Lap Done ----------------->", self.map_path, self.step_for_episode * 0.01, reward) - + self.count = 0 - + if self.one_lap_done: logger(self.map_path, "lap_done", sum(self.episode_returns), self.step_for_episode * 0.01) self.episode_returns = [] @@ -203,22 +203,22 @@ def episode_end(reason = None, rew = 0): self.one_lap_done = False else: self.one_lap_done = True - - + + reward = round(reward, 6) - + if observation['collisions'][0]: logger(self.map_path, "collisions", sum(self.episode_returns), self.step_for_episode * 0.01) done, reward = episode_end(rew = -30) - - + + if self.step_for_episode > 50_000: logger(self.map_path, "too_slow", sum(self.episode_returns), self.step_for_episode * 0.01) done, reward = episode_end("Too long", -10) - + self.episode_returns.append(reward) self.step_for_episode += 1 @@ -249,7 +249,7 @@ def reset(self): race_line_x = race_line_x[random_index:] + race_line_x[:random_index] race_line_y = race_line_y[random_index:] + race_line_y[:random_index] race_line_theta = race_line_theta[random_index:] + race_line_theta[:random_index] - + self.race_line_x = race_line_x self.race_line_y = race_line_y @@ -289,4 +289,4 @@ def un_normalise_actions(self, actions): def normalise_observations(self, observations): # convert observations from normal lidar distances range to range [-1, 1] return convert_range(observations, [self.lidar_min, self.lidar_max], [-1, 1]) - +