fix and utilize transpposition tables, we skip many moves but I think we've probably slowed down in some ways too
This commit is contained in:
parent
88131d9ab0
commit
63f18f3d9a
6 changed files with 222 additions and 36 deletions
160
src/ai.rs
160
src/ai.rs
|
|
@ -1,6 +1,7 @@
|
|||
use crate::{
|
||||
board::{Board, explode_board, squares::*},
|
||||
game::{Game, Team},
|
||||
table::{Bound, TTEntry, TTable},
|
||||
};
|
||||
|
||||
/// Contains all corner squares
|
||||
|
|
@ -68,18 +69,26 @@ impl MoveRank {
|
|||
/// for a game with a recursion depth of `depth`.
|
||||
///
|
||||
/// We use a very simple evaluation heuristic: (Black squares - White squares).
|
||||
pub fn alphabeta(mut game: Game, depth: u8, mut alpha: i8, mut beta: i8) -> (Board, i8) {
|
||||
pub fn alphabeta(
|
||||
mut game: Game,
|
||||
depth: u8,
|
||||
mut alpha: i8,
|
||||
mut beta: i8,
|
||||
tt: &mut TTable,
|
||||
) -> (Board, i8, u64) {
|
||||
let mut num_moves = 0;
|
||||
// if we reach our maximum recursion depth, return evaluation
|
||||
if depth == 0 {
|
||||
return (0, game.score().diff());
|
||||
return (0, game.score().diff(), num_moves);
|
||||
}
|
||||
|
||||
let moves = game.available();
|
||||
if moves == 0 {
|
||||
// if no move, skip and continue recursion
|
||||
// this seems to technically introduce a bias against move-chains
|
||||
// that include skips. I haven't found it to be a big deal in play.
|
||||
game.skip();
|
||||
return (0, alphabeta(game, depth - 1, alpha, beta).1);
|
||||
return (0, alphabeta(game, depth - 1, alpha, beta, tt).1, num_moves);
|
||||
}
|
||||
|
||||
// just initially assume that the best move is no move at all. This will
|
||||
|
|
@ -94,10 +103,51 @@ pub fn alphabeta(mut game: Game, depth: u8, mut alpha: i8, mut beta: i8) -> (Boa
|
|||
// We do this by mapping moves to ranked moves and then sorting.
|
||||
let mut moves = explode_board(moves).map(MoveRank::from).collect::<Vec<_>>();
|
||||
moves.sort_unstable();
|
||||
let moves = moves
|
||||
let mut moves = moves
|
||||
.into_iter()
|
||||
.map(MoveRank::into_inner)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// copy our existing alpha/beta for the sake of classifying bounds
|
||||
let original_alpha = alpha;
|
||||
let original_beta = beta;
|
||||
|
||||
// the brilliance here is that even if we don't have a perfect value
|
||||
// computed already, the imperfect values still help us get to better values
|
||||
// quicker.
|
||||
match tt.get(game.hash) {
|
||||
Some(entry) if entry.depth >= depth => {
|
||||
match entry.bound {
|
||||
// if we know this is exact, trust it without question
|
||||
Bound::Exact => return (entry.best_move, entry.evaluation, num_moves),
|
||||
// if we have lower or upper bounds that are more precise than
|
||||
// our existing alpha and beta values, accept the ones found in
|
||||
// the cache.
|
||||
Bound::Lower => alpha = alpha.max(entry.evaluation),
|
||||
Bound::Upper => beta = beta.min(entry.evaluation),
|
||||
}
|
||||
// if we have collapsed the window between alpha and beta, just
|
||||
// accept the cached entry.
|
||||
if alpha >= beta {
|
||||
return (entry.best_move, entry.evaluation, num_moves);
|
||||
}
|
||||
|
||||
// otherwise, if our best move is available, move it to the front
|
||||
if let Some(best_move_idx) = moves.iter().position(|m| *m == entry.best_move) {
|
||||
moves[..=best_move_idx].rotate_right(1);
|
||||
}
|
||||
}
|
||||
Some(entry) => {
|
||||
// otherwise, if our best move is available, move it to the front
|
||||
if let Some(best_move_idx) = moves.iter().position(|m| *m == entry.best_move) {
|
||||
moves[..=best_move_idx].rotate_right(1);
|
||||
}
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
num_moves = moves.len() as u64;
|
||||
|
||||
// I just establish a convention of maximizing for black and minimizing for white.
|
||||
// I'm not sure if that's conventional or not, but it's what I chose.
|
||||
match game.current_team {
|
||||
|
|
@ -106,7 +156,8 @@ pub fn alphabeta(mut game: Game, depth: u8, mut alpha: i8, mut beta: i8) -> (Boa
|
|||
let mut g = game.clone();
|
||||
g.play(mv);
|
||||
// maximize for the evaluation of subsequent moves
|
||||
let evaluation = alphabeta(g, depth - 1, alpha, beta).1;
|
||||
let (_, evaluation, num_moves_sub) = alphabeta(g, depth - 1, alpha, beta, tt);
|
||||
num_moves += num_moves_sub;
|
||||
// if our evaluated move is superior to the alpha, update
|
||||
// it.
|
||||
if evaluation > alpha {
|
||||
|
|
@ -118,14 +169,30 @@ pub fn alphabeta(mut game: Game, depth: u8, mut alpha: i8, mut beta: i8) -> (Boa
|
|||
break;
|
||||
}
|
||||
}
|
||||
(best_move, alpha)
|
||||
let bound = if alpha >= beta {
|
||||
Bound::Lower
|
||||
} else if alpha <= original_alpha {
|
||||
Bound::Upper
|
||||
} else {
|
||||
// i.e. alpha < beta || alpha < original_alpha
|
||||
Bound::Exact
|
||||
};
|
||||
tt.store(TTEntry {
|
||||
depth,
|
||||
evaluation: alpha,
|
||||
hash: game.hash,
|
||||
bound,
|
||||
best_move,
|
||||
});
|
||||
(best_move, alpha, num_moves)
|
||||
}
|
||||
Team::White => {
|
||||
for mv in moves {
|
||||
let mut g = game.clone();
|
||||
g.play(mv);
|
||||
// minimize for the evaluation of subsequent moves
|
||||
let evaluation = alphabeta(g, depth - 1, alpha, beta).1;
|
||||
let (_, evaluation, num_moves_sub) = alphabeta(g, depth - 1, alpha, beta, tt);
|
||||
num_moves += num_moves_sub;
|
||||
// if our evaluated move produces lower eval than the beta,
|
||||
// update beta.
|
||||
if evaluation < beta {
|
||||
|
|
@ -137,7 +204,21 @@ pub fn alphabeta(mut game: Game, depth: u8, mut alpha: i8, mut beta: i8) -> (Boa
|
|||
break;
|
||||
}
|
||||
}
|
||||
(best_move, beta)
|
||||
let bound = if beta <= alpha {
|
||||
Bound::Upper
|
||||
} else if beta >= original_beta {
|
||||
Bound::Lower
|
||||
} else {
|
||||
Bound::Exact
|
||||
};
|
||||
tt.store(TTEntry {
|
||||
depth,
|
||||
evaluation: beta,
|
||||
hash: game.hash,
|
||||
bound,
|
||||
best_move,
|
||||
});
|
||||
(best_move, beta, num_moves)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -168,7 +249,8 @@ mod tests {
|
|||
|
||||
fn assert_ai_move_is_legal(game: &Game, depth: u8) -> Board {
|
||||
let available = game.available();
|
||||
let best_move = alphabeta(game.clone(), depth, i8::MIN + 1, i8::MAX - 1).0;
|
||||
let mut tt = TTable::with_mb(2);
|
||||
let best_move = alphabeta(game.clone(), depth, i8::MIN + 1, i8::MAX - 1, &mut tt).0;
|
||||
assert_ne!(best_move, 0, "AI should return a move when one exists");
|
||||
assert_eq!(
|
||||
best_move & available,
|
||||
|
|
@ -182,8 +264,9 @@ mod tests {
|
|||
// just a sanity check to ensure that my AI performs up to snuff with another popular engine
|
||||
fn opening() {
|
||||
let mut game = Game::default();
|
||||
let mut tt = TTable::with_mb(24);
|
||||
game.play(D3);
|
||||
let (best_move, _) = alphabeta(game.clone(), 12, i8::MIN + 1, i8::MAX - 1);
|
||||
let (best_move, _, _) = alphabeta(game.clone(), 14, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
assert_eq!(best_move, C3);
|
||||
}
|
||||
|
||||
|
|
@ -217,13 +300,65 @@ mod tests {
|
|||
#[test]
|
||||
fn ai_passes_when_no_moves_exist() {
|
||||
let board = BitBoard::from_jon("wwwwwwww/wwwwwwww/////").expect("Valid board");
|
||||
let mut tt = TTable::with_mb(2);
|
||||
let game = Game::from_parts(Team::Black, board);
|
||||
assert_eq!(game.available(), 0);
|
||||
let (mv, eval) = alphabeta(game.clone(), 4, i8::MIN + 1, i8::MAX - 1);
|
||||
let (mv, eval, _) = alphabeta(game.clone(), 4, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
assert_eq!(mv, 0);
|
||||
assert_eq!(eval, game.score().diff());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tt_exact_root_hit_eliminates_repeat_search() {
|
||||
let game = Game::default();
|
||||
let mut tt = TTable::with_mb(2);
|
||||
|
||||
let (best_move, eval, first_considered) =
|
||||
alphabeta(game.clone(), 1, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
assert!(first_considered > 0);
|
||||
|
||||
let (cached_move, cached_eval, second_considered) =
|
||||
alphabeta(game.clone(), 1, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
|
||||
assert_eq!(cached_move, best_move);
|
||||
assert_eq!(cached_eval, eval);
|
||||
assert_eq!(second_considered, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tt_lower_bound_hit_still_searches_with_wide_window() {
|
||||
let game = Game::default();
|
||||
let mut tt = TTable::with_mb(2);
|
||||
|
||||
tt.store(TTEntry {
|
||||
bound: Bound::Lower,
|
||||
evaluation: 0,
|
||||
depth: 1,
|
||||
best_move: D3,
|
||||
hash: game.hash,
|
||||
});
|
||||
|
||||
let (_, _, considered) = alphabeta(game.clone(), 1, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
assert!(considered > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tt_upper_bound_hit_still_searches_with_wide_window() {
|
||||
let game = Game::default();
|
||||
let mut tt = TTable::with_mb(2);
|
||||
|
||||
tt.store(TTEntry {
|
||||
bound: Bound::Upper,
|
||||
evaluation: 0,
|
||||
depth: 1,
|
||||
best_move: D3,
|
||||
hash: game.hash,
|
||||
});
|
||||
|
||||
let (_, _, considered) = alphabeta(game.clone(), 1, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
assert!(considered > 0);
|
||||
}
|
||||
|
||||
// I found that, despite the AI clobbering me, the AI could not
|
||||
// compete with itself very well. I'm honestly not quite sure why that is.
|
||||
#[test]
|
||||
|
|
@ -237,6 +372,7 @@ mod tests {
|
|||
(Team::Black, 123),
|
||||
(Team::White, 87132895),
|
||||
];
|
||||
let mut tt = TTable::with_mb(2);
|
||||
|
||||
for (team, seed) in cases {
|
||||
let mut rng = StdRng::seed_from_u64(seed);
|
||||
|
|
@ -252,7 +388,7 @@ mod tests {
|
|||
continue;
|
||||
}
|
||||
let mv = if game.current_team == team {
|
||||
alphabeta(game.clone(), 8, i8::MIN + 1, i8::MAX - 1).0
|
||||
alphabeta(game.clone(), 8, i8::MIN + 1, i8::MAX - 1, &mut tt).0
|
||||
} else {
|
||||
random_move(&game, &mut rng)
|
||||
};
|
||||
|
|
|
|||
|
|
@ -401,8 +401,8 @@ impl BitBoard {
|
|||
|
||||
result
|
||||
}
|
||||
/// Apply play to a board and compute effected reversals
|
||||
pub fn play(&mut self, current_team: Team, play: Board) {
|
||||
/// Apply play to a board and compute effected reversals (returns the flipped discs)
|
||||
pub fn play(&mut self, current_team: Team, play: Board) -> Board {
|
||||
// bitwise OR gives spots with either white OR black discs
|
||||
// bitwise NEG gives the spots with neither white nor black discs
|
||||
let mut flips = 0;
|
||||
|
|
@ -435,6 +435,7 @@ impl BitBoard {
|
|||
|
||||
self.boards[current_team_idx] |= flips | play;
|
||||
self.boards[current_team.next() as usize] ^= flips;
|
||||
flips
|
||||
}
|
||||
|
||||
/// Compute the score (B, W) by counting the excited bits in each board.
|
||||
|
|
@ -451,7 +452,7 @@ impl BitBoard {
|
|||
pub fn compute_hash(&self, playing: Team) -> u64 {
|
||||
let mut hash = 0;
|
||||
for (player, board) in self.boards.iter().enumerate() {
|
||||
for offset in 0..64 as u64 {
|
||||
for offset in 0..64_u64 {
|
||||
if (1 << offset) & board > 0 {
|
||||
hash ^= ZOBRIST_TABLE[player][offset as usize];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use othello::{
|
|||
view::{Overlay, View},
|
||||
},
|
||||
game::Game,
|
||||
table::TTable,
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
|
|
@ -26,6 +27,7 @@ const PLAY_RE: &str = r"^(play - )?([abcdefghABCDEFGH])(\d)$";
|
|||
pub fn run() -> anyhow::Result<()> {
|
||||
let mut game = Game::default();
|
||||
let mut board_changed = true;
|
||||
let mut tt = TTable::with_mb(16);
|
||||
|
||||
let play_re = Regex::new(PLAY_RE).unwrap();
|
||||
|
||||
|
|
@ -103,8 +105,9 @@ pub fn run() -> anyhow::Result<()> {
|
|||
println!("beep. boop. no legal moves. skipping turn");
|
||||
game.skip();
|
||||
} else if !tracing {
|
||||
let (mv, eval) = alphabeta(game.clone(), 14, i8::MIN + 1, i8::MAX - 1);
|
||||
println!("beep. boop. eval = {eval}");
|
||||
let (mv, eval, num_moves) =
|
||||
alphabeta(game.clone(), 12, i8::MIN + 1, i8::MAX - 1, &mut tt);
|
||||
println!("beep. boop. eval = {eval}, num_moves = {num_moves}");
|
||||
game.play(mv);
|
||||
}
|
||||
board_changed = true;
|
||||
|
|
|
|||
17
src/game.rs
17
src/game.rs
|
|
@ -1,5 +1,5 @@
|
|||
use crate::{
|
||||
board::{BitBoard, Board, Score},
|
||||
board::{BitBoard, Board, Score, explode_board},
|
||||
zobrist::{ZOBRIST_TABLE, ZOBRIST_TURN},
|
||||
};
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ impl Team {
|
|||
#[derive(Clone)]
|
||||
pub struct Game {
|
||||
pub current_team: Team,
|
||||
hash: u64,
|
||||
pub hash: u64,
|
||||
board: BitBoard,
|
||||
}
|
||||
|
||||
|
|
@ -44,10 +44,17 @@ impl Default for Game {
|
|||
impl Game {
|
||||
/// Play a move. Automatically transitions state to next player.
|
||||
pub fn play(&mut self, player_move: Board) {
|
||||
// add newly placed disc to hash for current player
|
||||
self.hash ^=
|
||||
ZOBRIST_TABLE[self.current_team as usize][player_move.trailing_zeros() as usize];
|
||||
for disc in explode_board(self.board.play(self.current_team, player_move)) {
|
||||
// remove flipped discs for opponent player
|
||||
self.hash ^=
|
||||
ZOBRIST_TABLE[self.current_team.next() as usize][disc.trailing_zeros() as usize];
|
||||
// add flipped discs for current player
|
||||
self.hash ^= ZOBRIST_TABLE[self.current_team as usize][disc.trailing_zeros() as usize];
|
||||
}
|
||||
self.hash ^= *ZOBRIST_TURN;
|
||||
self.board.play(self.current_team, player_move);
|
||||
self.current_team = self.current_team.next();
|
||||
}
|
||||
|
||||
|
|
@ -108,8 +115,10 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn game_inits_with_hash() {
|
||||
let game = Game::default();
|
||||
let mut game = Game::default();
|
||||
assert_ne!(game.hash, 0);
|
||||
game.play(E6);
|
||||
assert_eq!(game.board.compute_hash(Team::White), game.hash);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
pub mod ai;
|
||||
pub mod board;
|
||||
pub mod game;
|
||||
mod table;
|
||||
pub mod table;
|
||||
mod zobrist;
|
||||
|
|
|
|||
65
src/table.rs
65
src/table.rs
|
|
@ -1,32 +1,69 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use crate::{board::BitBoard, game::Game, zobrist::ZOBRIST_TABLE};
|
||||
use crate::board::Board;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum Bound {
|
||||
Exact,
|
||||
Lower,
|
||||
Upper,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TTEntry {
|
||||
bound: Bound,
|
||||
evaluation: i8,
|
||||
depth: u8,
|
||||
pub bound: Bound,
|
||||
pub evaluation: i8,
|
||||
pub depth: u8,
|
||||
pub best_move: Board,
|
||||
pub hash: u64,
|
||||
}
|
||||
|
||||
impl TTEntry {
|
||||
pub fn quality(&self) -> u16 {
|
||||
let bound = match self.bound {
|
||||
Bound::Exact => 1,
|
||||
_ => 0,
|
||||
};
|
||||
(self.depth as u16) * 2 + bound
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct TTable {
|
||||
// replace with `DashMap` if we utilize concurrency
|
||||
inner: HashMap<u64, TTEntry>,
|
||||
/// We use a vector of options since we can easily describe zero-values in
|
||||
/// this configuration, and we can utilize the fact that the zobrist hash
|
||||
/// is indeed a hash. If we used a hashmap, we'd have to write funky
|
||||
/// wrapper types that override the default behavior for hashes.
|
||||
inner: Vec<Option<TTEntry>>,
|
||||
/// A mask is used to avoid having to modulate over the length of the inner
|
||||
/// vector to avoid transgressing index bounds. This just gives us a faster
|
||||
/// way of accomplishing that same end.
|
||||
mask: u64,
|
||||
}
|
||||
|
||||
impl TTable {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
pub fn with_mb(size_mb: usize) -> Self {
|
||||
let buckets = size_mb * 1024 * 1024;
|
||||
let inner = vec![None; buckets];
|
||||
let mask = (buckets - 1) as u64;
|
||||
Self { inner, mask }
|
||||
}
|
||||
|
||||
pub fn upsert(&mut self, game: &Game) {
|
||||
//self.inner.entry(key).or_insert(default)
|
||||
todo!()
|
||||
/// Cache a computation in the transposition table. This function shows
|
||||
/// preference to entries computed at a greater depth and entries with
|
||||
/// more precise bounds (using `TTEntry::quality`).
|
||||
pub fn store(&mut self, entry: TTEntry) {
|
||||
let idx = (entry.hash & self.mask) as usize;
|
||||
if self.inner[idx]
|
||||
.as_ref()
|
||||
.is_none_or(|existing| entry.quality() > existing.quality())
|
||||
{
|
||||
self.inner[idx] = Some(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve a computed entry from the table. If it doesn't exist
|
||||
/// or an entry at the same index determined by the hash, then `None` will
|
||||
/// be returned. Otherwise, `Some(&TTEntry)` will be returned.
|
||||
pub fn get(&mut self, hash: u64) -> Option<&TTEntry> {
|
||||
let idx = (hash & self.mask) as usize;
|
||||
self.inner[idx].as_ref().filter(|entry| entry.hash == hash)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue