Skip to content
Snippets Groups Projects
Unverified Commit 48fa4c57 authored by Gabriel Santamaria's avatar Gabriel Santamaria
Browse files

Modified the computing of the statistics, augmented the number of simulations

- Also fixed a bug in the time computation of the MCTS algorithm
parent a758df77
No related branches found
No related tags found
No related merge requests found
......@@ -13,11 +13,15 @@ from game.strategy import MCTSStrategy, StrategyAlphaBeta
from game.helpers.mcts import UCB
from game.utils import other
# Parameters to choose from
# The ultimate goal is to find the perfect couple
# that will make MCTS win most of the time.
ucb = [UCB.ucb, UCB.ucb_tuned]
weights = np.arange(0, 10, 0.2)
weights = np.arange(0, 20, 0.2)
def run(ce, fucb, n=100, debug=False):
def run(ce, fucb, n=200, debug=False):
"""
Runs n games with the given parameters and returns the
number of wins for each player and the total time spent
......@@ -25,6 +29,7 @@ def run(ce, fucb, n=100, debug=False):
"""
wins = [0, 0]
ttimes = [0, 0]
niters = [0, 0]
for _ in range(n):
# (size, top player, bottom player)
......@@ -33,7 +38,7 @@ def run(ce, fucb, n=100, debug=False):
ab = StrategyAlphaBeta(grid, rules, 1)
mcts = MCTSStrategy(
grid, rules, 2, simulations=10, exploration_weight=ce, ucb=fucb
grid, rules, 2, simulations=100, exploration_weight=ce, ucb=fucb
)
ply = 1
......@@ -47,11 +52,13 @@ def run(ce, fucb, n=100, debug=False):
action = ab.get_action(grid, ply)
ttimes[ply - 1] += time.time() - s
grid.move(action, ply)
niters[ply - 1] += 1
else:
s = time.time()
action = mcts.get_action(grid, ply)
ttimes[ply - 1] += time.time() - s
grid.move(action, ply)
niters[ply - 1] += 1
if debug:
grid.debug_plot(save=True)
......@@ -63,7 +70,7 @@ def run(ce, fucb, n=100, debug=False):
else:
wins[1] += 1
return wins, ttimes, n
return wins, ttimes, n, niters
def iterate():
......@@ -78,9 +85,9 @@ def iterate():
with tqdm(total=niter, desc="Computing statistics...") as pbar:
for i, u in enumerate(ucb):
for j, w in enumerate(weights):
nwins, t, n = run(w, u)
nwins, t, n, niters = run(w, u)
wins[i, j] = nwins[1] / n * 100
times[i, j] = t[1] / n
times[i, j] = t[1] / niters[1]
pbar.update(1)
......
......@@ -57,6 +57,39 @@ def strategy(
pass
def run_one():
grid: HexGrid = HexGrid.split_state(4, 2, 1)
rules: Rules = Dodo()
ab = StrategyAlphaBeta(grid, rules, 1)
mcts = MCTSStrategy(
grid, rules, 2, simulations=100, exploration_weight=10, ucb=UCB.ucb_tuned
)
ply = 1
ttime = 0
niter = 0
while not rules.game_over(grid):
if ply == 1:
action = ab.get_action(grid, ply)
grid.move(action, ply)
else:
start = time.time()
action = mcts.get_action(grid, ply)
ttime += time.time() - start
grid.move(action, ply)
niter += 1
ply = other(ply)
return ttime / niter, rules.has_won(grid, 2)
average, win = run_one()
print(f"Time: {average:.4f} | Win: {win}")
def run(n=100, debug=False):
wins = [0, 0]
ttimes = [0, 0]
......@@ -101,7 +134,7 @@ def run(n=100, debug=False):
return wins, ttimes, n
w, t, n = run(100)
# w, t, n = run(100)
print(f"Alphabeta | Wins percentage: {w[0] / n * 100:.2f}% | Time: {t[0] / n:.2f}")
print(f"MCTS | Wins percentage: {w[1] / n * 100:.2f}% | Time: {t[1] / n:.2f}")
# print(f"Alphabeta | Wins percentage: {w[0] / n * 100:.2f}% | Time: {t[0] / n:.2f}")
# print(f"MCTS | Wins percentage: {w[1] / n * 100:.2f}% | Time: {t[1] / n:.2f}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment