AI project 3

pdf

School

University of North Texas *

*We aren’t endorsed by this school

Course

5210

Subject

Industrial Engineering

Date

Dec 6, 2023

Type

pdf

Pages

6

Uploaded by amulyam232

Report
AI project 3 AI_project 3 .ipynb - Colaboratory (google.com) import numpy as np class TaxiAgent: def __init__ ( self , rows, cols, taxi_start, pickup_point, restricted_points): self .rows = rows self .cols = cols self .taxi_start = taxi_start self .pickup_point = pickup_point self .restricted_points = restricted_points self .Q_values = np.zeros((rows, cols, 4 )) # Q-values for Up, Down, Left, Right self .alpha = 0.1 # Learning rate self .gamma = 0.9 # Discount factor self .epsilon = 0.1 # Exploration-exploitation trade-off def get_next_state ( self , state, action): row, col = state if action == 0 : # Up row = max ( 0 , row - 1 ) elif action == 1 : # Down row = min ( self .rows - 1 , row + 1 ) elif action == 2 : # Left col = max ( 0 , col - 1 ) elif action == 3 : # Right col = min ( self .cols - 1 , col + 1 ) return row, col def get_reward ( self , state): if state == self .pickup_point: return 20 # Regular customer elif state == self .taxi_start: return 0 # No reward for staying in the starting point elif state in self .restricted_points: return - 10 # Negative reward for restricted points else : return - 0.5 # Live-in reward for other states def choose_action ( self , state): if np.random.rand() < self .epsilon: return np.random.choice( 4 ) # Exploration else : return np.argmax( self .Q_values[state[ 0 ], state[ 1 ]]) def update_q_values ( self , state, action, next_state, reward): self .Q_values[state[ 0 ], state[ 1 ], action] = ( 1 - self .alpha) * self .Q_values[state[ 0 ], state[ 1 ], action] + \ self .alpha * (reward +
self .gamma * np.max( self .Q_values[next_state[ 0 ], next_state[ 1 ]])) def train ( self , episodes): for _ in range (episodes): state = self .taxi_start while state != self .pickup_point: action = self .choose_action(state) next_state = self .get_next_state(state, action) reward = self .get_reward(next_state) self .update_q_values(state, action, next_state, reward) state = next_state # Create an instance of the TaxiAgent agent = TaxiAgent( rows = 5 , cols = 5 , taxi_start =( 3 , 1 ), pickup_point =( 0 , 1 ), restricted_points =[( 0 , 2 ), ( 1 , 2 ), ( 2 , 2 )]) # Train the agent for 1000 episodes agent.train( episodes = 1000 ) # Display the learned Q-values #print("Learned Q-values:") #print(agent.Q_values) def visualize_optimal_policy (agent): optimal_policy_arrows = [[ '' for _ in range (agent.cols)] for _ in range (agent.rows)] # Find the optimal action in each state and populate the arrows for i in range (agent.rows): for j in range (agent.cols): if (i, j) == agent.pickup_point: optimal_policy_arrows[i][j] = 'P' # Pick-up point elif (i, j) == agent.taxi_start: optimal_policy_arrows[i][j] = 'T' # Taxi starting point elif (i, j) in agent.restricted_points: optimal_policy_arrows[i][j] = 'R' # Restricted point else : optimal_action = np.argmax(agent.Q_values[i, j]) if optimal_action == 0 : optimal_policy_arrows[i][j] = '↑' elif optimal_action == 1 : optimal_policy_arrows[i][j] = '↓' elif optimal_action == 2 : optimal_policy_arrows[i][j] = '←' elif optimal_action == 3 : optimal_policy_arrows[i][j] = '→' # Print the optimal policy for row in optimal_policy_arrows: print (row) # Find the optimal path from the starting point to the pick-up point current_state = agent.taxi_start optimal_path = [current_state]
while current_state != agent.pickup_point: action = np.argmax(agent.Q_values[current_state[ 0 ], current_state[ 1 ]]) current_state = agent.get_next_state(current_state, action) optimal_path.append(current_state) # Print the optimal path with arrows for i in range ( len (optimal_path) - 1 ): row, col = optimal_path[i] next_row, next_col = optimal_path[i + 1 ] if next_row < row: arrow = '↑' elif next_row > row: arrow = '↓' elif next_col < col: arrow = '←' elif next_col > col: arrow = '→' else : arrow = 'X' # No movement (should not happen) print ( f"Move { arrow } from { optimal_path[i] } to { optimal_path[i + 1 ] } " ) # Create an instance of the TaxiAgent agent = TaxiAgent( rows = 5 , cols = 5 , taxi_start =( 3 , 1 ), pickup_point =( 0 , 1 ), restricted_points =[( 0 , 2 ), ( 1 , 2 ), ( 2 , 2 )]) # Train the agent for 1000 episodes agent.train( episodes = 1000 ) # Display the learned Q-values # print("Learned Q-values:") # print(agent.Q_values) # Use the previously created agent visualize_optimal_policy(agent) R2 import numpy as np class TaxiAgent: def __init__ ( self , rows, cols, taxi_start, pickup_point, restricted_points): self .rows = rows self .cols = cols self .taxi_start = taxi_start self .pickup_point = pickup_point self .restricted_points = restricted_points self .Q_values = np.zeros((rows, cols, 4 )) # Q-values for Up, Down, Left, Right self .alpha = 0.1 # Learning rate self .gamma = 0.9 # Discount factor self .epsilon = 0.1 # Exploration-exploitation trade-off def get_next_state ( self , state, action):
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help
row, col = state if action == 0 : # Up row = max ( 0 , row - 1 ) elif action == 1 : # Down row = min ( self .rows - 1 , row + 1 ) elif action == 2 : # Left col = max ( 0 , col - 1 ) elif action == 3 : # Right col = min ( self .cols - 1 , col + 1 ) return row, col def get_reward ( self , state): if state == self .pickup_point: return 30 # Updated reward for premium customer pickup elif state == self .taxi_start: return 0 # No reward for staying in the starting point elif state in self .restricted_points: return - 10 # Negative reward for restricted points else : return - 0.5 # Live-in reward for other states def choose_action ( self , state): if np.random.rand() < self .epsilon: return np.random.choice( 4 ) # Exploration else : return np.argmax( self .Q_values[state[ 0 ], state[ 1 ]]) def update_q_values ( self , state, action, next_state, reward): self .Q_values[state[ 0 ], state[ 1 ], action] = ( 1 - self .alpha) * self .Q_values[state[ 0 ], state[ 1 ], action] + \ self .alpha * (reward + self .gamma * np.max( self .Q_values[next_state[ 0 ], next_state[ 1 ]])) def train ( self , episodes): for _ in range (episodes): state = self .taxi_start while state != self .pickup_point: action = self .choose_action(state) next_state = self .get_next_state(state, action) reward = self .get_reward(next_state) self .update_q_values(state, action, next_state, reward) state = next_state # Create an instance of the TaxiAgent with the new scenario agent = TaxiAgent( rows = 5 , cols = 5 , taxi_start =( 2 , 2 ), pickup_point =( 0 , 3 ), restricted_points =[( 0 , 2 ), ( 1 , 2 ), ( 2 , 2 )]) # Train the agent for 1000 episodes agent.train( episodes = 1000 ) # Display the learned Q-values # print("Learned Q-values:") # print(agent.Q_values)
# Use the previously created agent def visualize_optimal_policy (agent): optimal_policy_arrows = [[ '' for _ in range (agent.cols)] for _ in range (agent.rows)] # Find the optimal action in each state and populate the arrows for i in range (agent.rows): for j in range (agent.cols): if (i, j) == agent.pickup_point: optimal_policy_arrows[i][j] = 'P' # Pick-up point elif (i, j) == agent.taxi_start: optimal_policy_arrows[i][j] = 'T' # Taxi starting point elif (i, j) in agent.restricted_points: optimal_policy_arrows[i][j] = 'R' # Restricted point else : optimal_action = np.argmax(agent.Q_values[i, j]) if optimal_action == 0 : optimal_policy_arrows[i][j] = '↑' elif optimal_action == 1 : optimal_policy_arrows[i][j] = '↓' elif optimal_action == 2 : optimal_policy_arrows[i][j] = '←' elif optimal_action == 3 : optimal_policy_arrows[i][j] = '→' # Print the optimal policy for row in optimal_policy_arrows: print (row) # Find the optimal path from the starting point to the pick-up point current_state = agent.taxi_start optimal_path = [current_state] while current_state != agent.pickup_point: action = np.argmax(agent.Q_values[current_state[ 0 ], current_state[ 1 ]]) current_state = agent.get_next_state(current_state, action) optimal_path.append(current_state) # Print the optimal path with arrows for i in range ( len (optimal_path) - 1 ): row, col = optimal_path[i] next_row, next_col = optimal_path[i + 1 ] if next_row < row: arrow = '↑' elif next_row > row: arrow = '↓' elif next_col < col: arrow = '←' elif next_col > col: arrow = '→' else : arrow = 'X' # No movement (should not happen) print ( f"Move { arrow } from { optimal_path[i] } to { optimal_path[i + 1 ] } " )
# Create an instance of the TaxiAgent agent = TaxiAgent( rows = 5 , cols = 5 , taxi_start =( 3 , 1 ), pickup_point =( 0 , 3 ), restricted_points =[( 0 , 2 ), ( 1 , 2 ), ( 2 , 2 )]) # Train the agent for 1000 episodes agent.train( episodes = 1000 ) # Display the learned Q-values # print("Learned Q-values:") # print(agent.Q_values) # Use the previously created agent visualize_optimal_policy(agent)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help