initial commit

2018-08-03 14:45:12 -05:00
commit fabec6a2dd
7 changed files with 778 additions and 0 deletions
--- a/Quadcopter_Project.ipynb
+++ b/Quadcopter_Project.ipynb
--- a/ddpg_draw_test_weights_actor.h5f
+++ b/ddpg_draw_test_weights_actor.h5f
--- a/ddpg_draw_test_weights_critic.h5f
+++ b/ddpg_draw_test_weights_critic.h5f
--- a/ddpg_quad_sim_weights_actor.h5f
+++ b/ddpg_quad_sim_weights_actor.h5f
--- a/ddpg_quad_sim_weights_critic.h5f
+++ b/ddpg_quad_sim_weights_critic.h5f
--- a/physics_sim.py
+++ b/physics_sim.py
@@ -0,0 +1,150 @@
 import numpy as np
 import csv
 def C(x):
    return np.cos(x)
 def S(x):
    return np.sin(x)
 def earth_to_body_frame(ii, jj, kk):
    # C^b_n
    R = [[C(kk) * C(jj), C(kk) * S(jj) * S(ii) - S(kk) * C(ii), C(kk) * S(jj) * C(ii) + S(kk) * S(ii)],
         [S(kk) * C(jj), S(kk) * S(jj) * S(ii) + C(kk) * C(ii), S(kk) * S(jj) * C(ii) - C(kk) * S(ii)],
         [-S(jj), C(jj) * S(ii), C(jj) * C(ii)]]
    return np.array(R)
 def body_to_earth_frame(ii, jj, kk):
    # C^n_b
    return np.transpose(earth_to_body_frame(ii, jj, kk))
 class PhysicsSim():
    def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5.):
        self.init_pose = init_pose
        self.init_velocities = init_velocities
        self.init_angle_velocities = init_angle_velocities
        self.runtime = runtime
        self.gravity = -9.81  # m/s
        self.rho = 1.2
        self.mass = 0.958  # 300 g
        self.dt = 1 / 50.0  # Timestep
        self.C_d = 0.3
        self.l_to_rotor = 0.4
        self.propeller_size = 0.1
        width, length, height = .51, .51, .235
        self.dims = np.array([width, length, height])  # x, y, z dimensions of quadcopter
        self.areas = np.array([length * height, width * height, width * length])
        I_x = 1 / 12. * self.mass * (height**2 + width**2)
        I_y = 1 / 12. * self.mass * (height**2 + length**2)  # 0.0112 was a measured value
        I_z = 1 / 12. * self.mass * (width**2 + length**2)
        self.moments_of_inertia = np.array([I_x, I_y, I_z])  # moments of inertia
        env_bounds = 300.0  # 300 m / 300 m / 300 m
        self.lower_bounds = np.array([-env_bounds / 2, -env_bounds / 2, 0])
        self.upper_bounds = np.array([env_bounds / 2, env_bounds / 2, env_bounds])
        self.reset()
    def reset(self):
        self.time = 0.0
        self.pose = np.array([0.0, 0.0, 10.0, 0.0, 0.0, 0.0]) if self.init_pose is None else np.copy(self.init_pose)
        self.v = np.array([0.0, 0.0, 0.0]) if self.init_velocities is None else np.copy(self.init_velocities)
        self.angular_v = np.array([0.0, 0.0, 0.0]) if self.init_angle_velocities is None else np.copy(self.init_angle_velocities)
        self.linear_accel = np.array([0.0, 0.0, 0.0])
        self.angular_accels = np.array([0.0, 0.0, 0.0])
        self.prop_wind_speed = np.array([0., 0., 0., 0.])
        self.done = False
    def find_body_velocity(self):
        body_velocity = np.matmul(earth_to_body_frame(*list(self.pose[3:])), self.v)
        return body_velocity
    def get_linear_drag(self):
        linear_drag = 0.5 * self.rho * self.find_body_velocity()**2 * self.areas * self.C_d
        return linear_drag
    def get_linear_forces(self, thrusts):
        # Gravity
        gravity_force = self.mass * self.gravity * np.array([0, 0, 1])
        # Thrust
        thrust_body_force = np.array([0, 0, sum(thrusts)])
        # Drag
        drag_body_force = -self.get_linear_drag()
        body_forces = thrust_body_force + drag_body_force
        linear_forces = np.matmul(body_to_earth_frame(*list(self.pose[3:])), body_forces)
        linear_forces += gravity_force
        return linear_forces
    def get_moments(self, thrusts):
        thrust_moment = np.array([(thrusts[3] - thrusts[2]) * self.l_to_rotor,
                            (thrusts[1] - thrusts[0]) * self.l_to_rotor,
                            0])# (thrusts[2] + thrusts[3] - thrusts[0] - thrusts[1]) * self.T_q])  # Moment from thrust
        drag_moment =  self.C_d * 0.5 * self.rho * self.angular_v * np.absolute(self.angular_v) * self.areas * self.dims * self.dims
        moments = thrust_moment - drag_moment # + motor_inertia_moment
        return moments
    def calc_prop_wind_speed(self):
        body_velocity = self.find_body_velocity()
        phi_dot, theta_dot = self.angular_v[0], self.angular_v[1]
        s_0 = np.array([0., 0., theta_dot * self.l_to_rotor])
        s_1 = -s_0
        s_2 = np.array([0., 0., phi_dot * self.l_to_rotor])
        s_3 = -s_2
        speeds = [s_0, s_1, s_2, s_3]
        for num in range(4):
            perpendicular_speed = speeds[num] + body_velocity
            self.prop_wind_speed[num] = perpendicular_speed[2]
    def get_propeler_thrust(self, rotor_speeds):
        '''calculates net thrust (thrust - drag) based on velocity
        of propeller and incoming power'''
        thrusts = []
        for prop_number in range(4):
            V = self.prop_wind_speed[prop_number]
            D = self.propeller_size
            n = rotor_speeds[prop_number]
            J = V / n * D
            # From http://m-selig.ae.illinois.edu/pubs/BrandtSelig-2011-AIAA-2011-1255-LRN-Propellers.pdf
            C_T = max(.12 - .07*max(0, J)-.1*max(0, J)**2, 0)
            thrusts.append(C_T * self.rho * n**2 * D**4)
        return thrusts
    def next_timestep(self, rotor_speeds):
        self.calc_prop_wind_speed()
        thrusts = self.get_propeler_thrust(rotor_speeds)
        self.linear_accel = self.get_linear_forces(thrusts) / self.mass
        position = self.pose[:3] + self.v * self.dt + 0.5 * self.linear_accel * self.dt**2
        self.v += self.linear_accel * self.dt
        moments = self.get_moments(thrusts)
        self.angular_accels = moments / self.moments_of_inertia
        angles = self.pose[3:] + self.angular_v * self.dt + 0.5 * self.angular_accels * self.angular_accels * self.dt ** 2
        angles = (angles + 2 * np.pi) % (2 * np.pi)
        self.angular_v = self.angular_v + self.angular_accels * self.dt
        new_positions = []
        for ii in range(3):
            if position[ii] <= self.lower_bounds[ii]:
                new_positions.append(self.lower_bounds[ii])
                self.done = True
            elif position[ii] > self.upper_bounds[ii]:
                new_positions.append(self.upper_bounds[ii])
                self.done = True
            else:
                new_positions.append(position[ii])
        self.pose = np.array(new_positions + list(angles))
        self.time += self.dt
        if self.time > self.runtime:
            self.done = True
        return self.done
--- a/task.py
+++ b/task.py
@@ -0,0 +1,100 @@
 import numpy as np
 from physics_sim import PhysicsSim
 import ipympl
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
 class Task():
 	"""Task (environment) that defines the goal and provides feedback to the agent."""
 	def __init__(self, init_pose=None, init_velocities=None, 
 		init_angle_velocities=None, runtime=5., target_pos=None):
 		"""Initialize a Task object.
 		Params
 		======
 			init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
 			init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
 			init_angle_velocities: initial radians/second for each of the three Euler angles
 			runtime: time limit for each episode
 			target_pos: target/goal (x,y,z) position for the agent
 		"""
 		# Simulation
 		self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
 		self.action_repeat = 3
 		self.state_size = self.action_repeat * 6
 		self.action_low = 0
 		self.action_high = 900 
 		self.action_size = 4
 		# Goal
 		self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])
 		self.point = {'x':[],'y':[],'z':[]}
 		self.show_graph=True
 		self.do_render=False
 	def get_reward(self):
 		r_min = (((np.array([-150.,-150.,0.]) - self.target_pos)**2).sum())**0.5
 		r_max = 0.
 		t_min = -1.
 		t_max = 1.
 		# if(np.any(self.sim.pose[:3] <= self.sim.lower_bounds) or np.any(self.sim.pose[:3] >= self.sim.upper_bounds)):
 			# reward = -3. 
 		# else:
 		"""Uses current pose of sim to return reward."""
 		reward_raw = (((self.sim.pose[:3] - self.target_pos)**2).sum())**0.5
 		reward = (reward_raw-r_min)/(r_max-r_min) * (t_max-t_min) + t_min
 		return reward
 	def step(self, rotor_speeds):
 		"""Uses action to obtain next state, reward, done."""
 		reward = 0
 		pose_all = []
 		done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
 		reward += self.get_reward() 
 		pose_all.append(self.sim.pose)
 		next_state = np.concatenate(pose_all)
 		info = dict()
 		if(self.do_render):
 			self.point['x'].append(self.sim.pose[0])
 			self.point['y'].append(self.sim.pose[1])
 			self.point['z'].append(self.sim.pose[2])
 			self.render(done=done)
 		return next_state, reward, done, info
 	def reset(self):
 		"""Reset the sim to start a new episode."""
 		self.sim.reset()
 		state = np.concatenate([self.sim.pose] )
 		if(self.do_render):
 			self.ax.scatter(self.sim.init_pose[0],self.sim.init_pose[1],self.sim.init_pose[2])
 		return state
 	def render(self, mode='init',done=False):
 		if(mode == 'human'):
 			self.do_render = True
 			if(self.show_graph):
 				self.init_graph()
 				self.show_graph=False
 		if(done):
 			self.line.plot(self.point['x'],self.point['y'], self.point['z'])
 			self.point['x'][:] = []
 			self.point['y'][:] = []
 			self.point['z'][:] = []
 	def init_graph(self):
 		self.fig = plt.figure(figsize=(8,8))
 		self.line = self.fig.add_subplot(111, projection='3d')
 		self.ax = plt.gca()
 		self.line.set_xlim(-150,150)
 		self.line.set_ylim(-150,150)
 		self.line.set_zlim(0,300)
 		self.ax.scatter(self.target_pos[0], self.target_pos[1], self.target_pos[2], color='green', label='Goal')