AI project 3
pdf
keyboard_arrow_up
School
University of North Texas *
*We aren’t endorsed by this school
Course
5210
Subject
Industrial Engineering
Date
Dec 6, 2023
Type
Pages
6
Uploaded by amulyam232
AI project 3 AI_project 3 .ipynb - Colaboratory (google.com)
import numpy as np class TaxiAgent: def __init__
(
self
, rows, cols, taxi_start, pickup_point, restricted_points): self
.rows = rows self
.cols = cols self
.taxi_start = taxi_start self
.pickup_point = pickup_point self
.restricted_points = restricted_points self
.Q_values = np.zeros((rows, cols, 4
)) # Q-values for Up, Down, Left, Right self
.alpha = 0.1 # Learning rate self
.gamma = 0.9 # Discount factor self
.epsilon = 0.1 # Exploration-exploitation trade-off def get_next_state
(
self
, state, action): row, col = state if action == 0
: # Up row = max
(
0
, row - 1
) elif action == 1
: # Down row = min
(
self
.rows - 1
, row + 1
) elif action == 2
: # Left col = max
(
0
, col - 1
) elif action == 3
: # Right col = min
(
self
.cols - 1
, col + 1
) return row, col def get_reward
(
self
, state): if state == self
.pickup_point: return 20 # Regular customer elif state == self
.taxi_start: return 0 # No reward for staying in the starting point elif state in self
.restricted_points: return -
10 # Negative reward for restricted points else
: return -
0.5 # Live-in reward for other states def choose_action
(
self
, state): if np.random.rand() < self
.epsilon: return np.random.choice(
4
) # Exploration else
: return np.argmax(
self
.Q_values[state[
0
], state[
1
]]) def update_q_values
(
self
, state, action, next_state, reward): self
.Q_values[state[
0
], state[
1
], action] = (
1 - self
.alpha) * self
.Q_values[state[
0
], state[
1
], action] + \ self
.alpha * (reward +
self
.gamma * np.max( self
.Q_values[next_state[
0
], next_state[
1
]])) def train
(
self
, episodes): for _ in range
(episodes): state = self
.taxi_start while state != self
.pickup_point: action = self
.choose_action(state) next_state = self
.get_next_state(state, action) reward = self
.get_reward(next_state) self
.update_q_values(state, action, next_state, reward) state = next_state # Create an instance of the TaxiAgent agent = TaxiAgent(
rows
=
5
, cols
=
5
, taxi_start
=(
3
, 1
), pickup_point
=(
0
, 1
), restricted_points
=[(
0
, 2
), (
1
, 2
), (
2
, 2
)]) # Train the agent for 1000 episodes agent.train(
episodes
=
1000
) # Display the learned Q-values #print("Learned Q-values:") #print(agent.Q_values) def visualize_optimal_policy
(agent): optimal_policy_arrows = [[
'' for _ in range
(agent.cols)] for _ in range
(agent.rows)] # Find the optimal action in each state and populate the arrows for i in range
(agent.rows): for j in range
(agent.cols): if (i, j) == agent.pickup_point: optimal_policy_arrows[i][j] = 'P' # Pick-up point elif (i, j) == agent.taxi_start: optimal_policy_arrows[i][j] = 'T' # Taxi starting point elif (i, j) in agent.restricted_points: optimal_policy_arrows[i][j] = 'R' # Restricted point else
: optimal_action = np.argmax(agent.Q_values[i, j]) if optimal_action == 0
: optimal_policy_arrows[i][j] = '↑'
elif optimal_action == 1
: optimal_policy_arrows[i][j] = '↓'
elif optimal_action == 2
: optimal_policy_arrows[i][j] = '←'
elif optimal_action == 3
: optimal_policy_arrows[i][j] = '→'
# Print the optimal policy for row in optimal_policy_arrows: print
(row) # Find the optimal path from the starting point to the pick-up point current_state = agent.taxi_start optimal_path = [current_state]
while current_state != agent.pickup_point: action = np.argmax(agent.Q_values[current_state[
0
], current_state[
1
]]) current_state = agent.get_next_state(current_state, action) optimal_path.append(current_state) # Print the optimal path with arrows for i in range
(
len
(optimal_path) - 1
): row, col = optimal_path[i] next_row, next_col = optimal_path[i + 1
] if next_row < row: arrow = '↑'
elif next_row > row: arrow = '↓'
elif next_col < col: arrow = '←'
elif next_col > col: arrow = '→'
else
: arrow = 'X' # No movement (should not happen) print
(
f"Move {
arrow
} from {
optimal_path[i]
} to {
optimal_path[i + 1
]
}
"
) # Create an instance of the TaxiAgent agent = TaxiAgent(
rows
=
5
, cols
=
5
, taxi_start
=(
3
, 1
), pickup_point
=(
0
, 1
), restricted_points
=[(
0
, 2
), (
1
, 2
), (
2
, 2
)]) # Train the agent for 1000 episodes agent.train(
episodes
=
1000
) # Display the learned Q-values # print("Learned Q-values:") # print(agent.Q_values) # Use the previously created agent visualize_optimal_policy(agent) R2
import numpy as np class TaxiAgent: def __init__
(
self
, rows, cols, taxi_start, pickup_point, restricted_points): self
.rows = rows self
.cols = cols self
.taxi_start = taxi_start self
.pickup_point = pickup_point self
.restricted_points = restricted_points self
.Q_values = np.zeros((rows, cols, 4
)) # Q-values for Up, Down, Left, Right self
.alpha = 0.1 # Learning rate self
.gamma = 0.9 # Discount factor self
.epsilon = 0.1 # Exploration-exploitation trade-off def get_next_state
(
self
, state, action):
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help
row, col = state if action == 0
: # Up row = max
(
0
, row - 1
) elif action == 1
: # Down row = min
(
self
.rows - 1
, row + 1
) elif action == 2
: # Left col = max
(
0
, col - 1
) elif action == 3
: # Right col = min
(
self
.cols - 1
, col + 1
) return row, col def get_reward
(
self
, state): if state == self
.pickup_point: return 30 # Updated reward for premium customer pickup elif state == self
.taxi_start: return 0 # No reward for staying in the starting point elif state in self
.restricted_points: return -
10 # Negative reward for restricted points else
: return -
0.5 # Live-in reward for other states def choose_action
(
self
, state): if np.random.rand() < self
.epsilon: return np.random.choice(
4
) # Exploration else
: return np.argmax(
self
.Q_values[state[
0
], state[
1
]]) def update_q_values
(
self
, state, action, next_state, reward): self
.Q_values[state[
0
], state[
1
], action] = (
1 - self
.alpha) * self
.Q_values[state[
0
], state[
1
], action] + \ self
.alpha * (reward + self
.gamma * np.max( self
.Q_values[next_state[
0
], next_state[
1
]])) def train
(
self
, episodes): for _ in range
(episodes): state = self
.taxi_start while state != self
.pickup_point: action = self
.choose_action(state) next_state = self
.get_next_state(state, action) reward = self
.get_reward(next_state) self
.update_q_values(state, action, next_state, reward) state = next_state # Create an instance of the TaxiAgent with the new scenario agent = TaxiAgent(
rows
=
5
, cols
=
5
, taxi_start
=(
2
, 2
), pickup_point
=(
0
, 3
), restricted_points
=[(
0
, 2
), (
1
, 2
), (
2
, 2
)]) # Train the agent for 1000 episodes agent.train(
episodes
=
1000
) # Display the learned Q-values # print("Learned Q-values:") # print(agent.Q_values)
# Use the previously created agent def visualize_optimal_policy
(agent): optimal_policy_arrows = [[
'' for _ in range
(agent.cols)] for _ in range
(agent.rows)] # Find the optimal action in each state and populate the arrows for i in range
(agent.rows): for j in range
(agent.cols): if (i, j) == agent.pickup_point: optimal_policy_arrows[i][j] = 'P' # Pick-up point elif (i, j) == agent.taxi_start: optimal_policy_arrows[i][j] = 'T' # Taxi starting point elif (i, j) in agent.restricted_points: optimal_policy_arrows[i][j] = 'R' # Restricted point else
: optimal_action = np.argmax(agent.Q_values[i, j]) if optimal_action == 0
: optimal_policy_arrows[i][j] = '↑'
elif optimal_action == 1
: optimal_policy_arrows[i][j] = '↓'
elif optimal_action == 2
: optimal_policy_arrows[i][j] = '←'
elif optimal_action == 3
: optimal_policy_arrows[i][j] = '→'
# Print the optimal policy for row in optimal_policy_arrows: print
(row) # Find the optimal path from the starting point to the pick-up point current_state = agent.taxi_start optimal_path = [current_state] while current_state != agent.pickup_point: action = np.argmax(agent.Q_values[current_state[
0
], current_state[
1
]]) current_state = agent.get_next_state(current_state, action) optimal_path.append(current_state) # Print the optimal path with arrows for i in range
(
len
(optimal_path) - 1
): row, col = optimal_path[i] next_row, next_col = optimal_path[i + 1
] if next_row < row: arrow = '↑'
elif next_row > row: arrow = '↓'
elif next_col < col: arrow = '←'
elif next_col > col: arrow = '→'
else
: arrow = 'X' # No movement (should not happen) print
(
f"Move {
arrow
} from {
optimal_path[i]
} to {
optimal_path[i + 1
]
}
"
)
# Create an instance of the TaxiAgent agent = TaxiAgent(
rows
=
5
, cols
=
5
, taxi_start
=(
3
, 1
), pickup_point
=(
0
, 3
), restricted_points
=[(
0
, 2
), (
1
, 2
), (
2
, 2
)]) # Train the agent for 1000 episodes agent.train(
episodes
=
1000
) # Display the learned Q-values # print("Learned Q-values:") # print(agent.Q_values) # Use the previously created agent visualize_optimal_policy(agent)
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
- Access to all documents
- Unlimited textbook solutions
- 24/7 expert homework help