boardgame.io Gobang: AI transformation

Posted by karapantass on Fri, 28 Jan 2022 04:29:02 +0100

Too long to see: Click here View the complete code.

Last We use boardgame io + wgo. JS + react made a simple Gobang game and used the engine to match our AI. However, AI is very weak. Let's try to transform it in the direction of AlphaGo and create our own AI.

Monte Carlo

We know that AlphaGo uses Monte Carlo search and neural network. Through the Debug panel, we can find that boardgame AI of IO matching is also based on Monte Carlo algorithm. However, if we want to use its Monte Carlo search algorithm with a neural network like AlphaGo, we will find that its Monte Carlo tree nodes lack some information. Its algorithm does not support functions such as adding noise, a priori probability and selecting nodes based on weight, so we have to make our own Monte Carlo search algorithm.

According to the above analysis, our Monte Carlo algorithm needs to provide the following functions:

Customize the evaluator. In this way, we can realize neural network evaluation and random simulation evaluation
Support noise
Node selection based on weight and temperature
Virtual loss. In this way, parallel search can be carried out, especially when using neural network evaluator, multiple situations can be evaluated at one time

Create Src / MCTs JS file and add the following contents:

import { dirichletK, randomPick } from "./Random";

export function Node(a) {
    return {
        a: a,
        p: 1,
        q: 0,
        w: 0,
        n: 0,
        // null - unexpanded, [] - terminal node, [...] - intermediate node
        children: null
    };
}

function isLeaf(node) {
    return node.n === 0 || node.children === null || node.children.length === 0;
}

function ucbScore(node, c) {
    return node.q + (c * node.p) / (node.n + 1);
}

function ucbSelectChild(node) {
    const c = 5 * Math.sqrt(node.n);
    let best = node.children[0];
    let bestScore = ucbScore(best, c);

    for (let i = 1; i < node.children.length; i++) {
        const child = node.children[i];
        const score = ucbScore(child, c);

        if (score > bestScore) {
            best = child;
            bestScore = score;
        }
    }

    return best;
}

function applyPrioProb(root, probs, useNoise) {
    if (!useNoise) {
        for (let i = 0; i < root.children.length; ++i) {
            const child = root.children[i];
            child.p = probs[child.a];
        }

        return;
    }

    const dir = dirichletK(root.children.length, 0.03);

    for (let i = 0; i < root.children.length; ++i) {
        const child = root.children[i];
        child.p = dir[i] * 0.25 + 0.75 * probs[child.a];
    }
}

function backprop(path, r) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.n += 1;
        leaf.w += r;
        leaf.q = leaf.w / leaf.n;
        r = -r;
    }
}

function backpropAndRevertVirtualLoss(path, r) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.w += r;
        leaf.q = leaf.w / leaf.n;
        r = -r;
    }
}

function applyVirtualLoss(path) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.n += 1;
        leaf.q = leaf.w / leaf.n;
    }
}

function revertVirtualLoss(path) {
    let i = path.length;

    while (i-- > 0) {
        let leaf = path[i];
        leaf.n -= 1;
        leaf.q = leaf.w / leaf.n;
    }
}

export class MCTS {
    constructor({ evaluator, maxIteration, maxTime, useNoise }) {
        if (!maxIteration && !maxTime)
            throw new Error("maxIteration and maxTime cannot be 0 at same time");
        this._eval = evaluator;
        this._maxIteration = maxIteration;
        this._maxTime = maxTime;
        this._batch = new Set();
        this._batchSize = 8;
        this._useNoise = useNoise;
        this._searching = false;
        this._timer = null;
    }

    async exec(root, state, opts) {
        if (this._searching)
            throw new Error("another searching is in progress!");
        let { maxIteration, maxTime, tao } = {
            maxIteration: this._maxIteration,
            maxTime: this._maxTime,
            tao: 0.001,
            ...opts
        };
        if (maxIteration === 0 && maxTime === 0)
            throw new Error(
                "maxIteration and maxTime cannot be 0 at same time"
            );

        if (maxTime > 0) {
            this._timer = setTimeout(() => {
                this.stop();
            }, maxTime);
        }

        if (!maxIteration) maxIteration = Number.MAX_SAFE_INTEGER;
        this._searching = true;

        for (let it = 0; it < maxIteration && !this._stop; ++it)
            await this._step(root, state.clone());

        await this._flush();
        this._searching = false;
        this._stop = false;
        clearTimeout(this._timer);

        let probs = getActionProbs(root, tao);

        return {
            bestChild: randomPick(root.children, probs),
            actionProbs: probs.reduce((acc, p, i) => {
                acc[root.children[i].a] = p;
                return acc;
            }, {})
        };
    }

    stop() {
        if (!this._searching) return;
        this._stop = true;
        clearTimeout(this._timer);
    }

    async _step(root, st) {
        const path = [root];
        let leaf = root;

        while (!isLeaf(leaf)) {
            leaf = ucbSelectChild(leaf);
            path.push(leaf);

            st.makeMove(leaf.a);
        }

        const gameover = st.gameover();
        if (gameover) {
            leaf.children = [];
            let score = 0;
            if (gameover.winner === st.currentPlayer) score = 1;
            else if (gameover.draw) score = 0;
            else score = -1;

            return backprop(path, -score);
        } else if (leaf.children === null) { // Expand in advance (Note: there may also be node conflicts, which are not handled)
            let actions = st.legalMoves();
            leaf.children = actions.map((a) => Node(a));
        }
        
        applyVirtualLoss(path);

		let job = {
            state: st,
            node: leaf,
            path: path
        };
        if (this._batch.add(job).size === this._batchSize) {
            await this._flush();
        }
    }

    async _flush() {
        if (this._batch.size === 0) return;
        const list = Array.from(this._batch.values());
        const vals = await this._eval(list.map((b) => b.state));

        for (let i = 0; i < list.length; i++) {
            const info = list[i];
            const leaf = info.node;

            applyPrioProb(leaf, vals[i].probs, this._useNoise);
            backpropAndRevertVirtualLoss(info.path, -vals[i].value);
        }

        this._batch.clear();
    }
}

function getActionProbs(root, tao) {
    tao = 1 / tao;
    let maxv = root.children.reduce((x, c) => Math.max(x, c.n), 0);
    let sum = 0;
    let probs = root.children.map((child) => {
        const p = Math.pow(child.n / maxv, tao);
        sum += p;
        return p;
    });

    for (let i = 0; i < probs.length; i++) probs[i] /= sum;

    return probs;
}

The exec function requires two parameters so that old nodes can be reused. If the node is recreated according to the current state every time the search is performed, the information of the last search (for example, the expanded node, the simulation times of the node, etc.) cannot be reused at all.

In order to make future training with boardgame IO separation, we also re abstract the State and leave a set of interfaces that must be implemented:

legalMoves() is used to list all legal moves, which are represented by numbers
makeMove() executes the move and swaps the current player
gameover() get game results
currentPlayer current player
clone() clone current state

New state

Next, we create a new state.

Create Src / state JS and add the following:

export const PLAYER_BLACK = 1;
export const PLAYER_WHITE = -1;

function checkWinnerByLine(stones, clr, start, end, stride) {
    let cnt = 0;

    for (; cnt < 5 && start !== end; start += stride) {
        if (stones[start] === clr) cnt++;
        else cnt = 0;
    }

    return cnt >= 5;
}

export function checkWinnerByMove(boardSize, stones, p) {
    const _min = 4;
    const c = stones[p];
    if (c === 0) return 0;
    let x0 = p % boardSize;
    let y0 = Math.floor(p / boardSize);
    let x1 = boardSize - 1 - x0;
    let y1 = boardSize - 1 - y0;
    let start = 0,
        end = 0,
        stride = 1;
    x0 = Math.min(x0, _min);
    x1 = Math.min(x1, _min);
    start = p - x0;
    end = p + x1 + 1;
    if (checkWinnerByLine(stones, c, start, end, 1)) return c;

    stride = boardSize;
    y0 = Math.min(y0, _min);
    y1 = Math.min(y1, _min);
    start = p - y0 * stride;
    end = p + (y1 + 1) * stride;
    if (checkWinnerByLine(stones, c, start, end, stride)) return c;

    stride = boardSize + 1;
    let ma = Math.min(x0, y0),
        mb = Math.min(x1, y1);
    start = p - ma * stride;
    end = p + (mb + 1) * stride;
    if (checkWinnerByLine(stones, c, start, end, stride)) return c;

    stride = boardSize - 1;
    ma = Math.min(x1, y0);
    mb = Math.min(x0, y1);
    start = p - ma * stride;
    end = p + (mb + 1) * stride;
    if (checkWinnerByLine(stones, c, start, end, stride)) return c;

    return 0;
}

export class State {
    constructor({ boardSize }) {
        this.boardSize = boardSize;
        this.stones = new Array(boardSize * boardSize).fill(0);
        this.currentPlayer = PLAYER_BLACK;
        this.moveHistory = [];
        this._gameover = null;
    }

    clone() {
        let newObj = new State({
            boardSize: this.boardSize
        });
        newObj.copy(this);
        return newObj;
    }

    copy(src) {
        if (src.boardSize !== this.boardSize)
            throw new Error("incompatible board");

        for (let i = 0; i < src.stones.length; i++) {
            this.stones[i] = src.stones[i];
        }

        this.currentPlayer = src.currentPlayer;

        for (let i = 0; i < src.moveHistory.length; i++) {
            this.moveHistory[i] = src.moveHistory[i];
        }

        this.moveHistory.length = src.moveHistory.length;
        this._gameover = src._gameover;
    }

    makeMove(mov) {
        if (this._gameover) return this;
        this.stones[mov] = this.currentPlayer;
        this.moveHistory.push(mov);
        this.currentPlayer = -this.currentPlayer;
        return this;
    }

    legalMoves() {
        let moves = [];
        for (let i = 0; i < this.stones.length; i++) {
            if (this.stones[i] === 0) moves.push(i);
        }
        return moves;
    }

    gameover() {
        if (this._gameover || this.moveHistory.length === 0)
            return this._gameover;
        const mov = this.moveHistory[this.moveHistory.length - 1];
        const winner = checkWinnerByMove(this.boardSize, this.stones, mov);
        if (winner !== 0) {
            this._gameover = { winner };
        } else if (this.moveHistory.length === this.stones.length) {
            this._gameover = { draw: true };
        }
        return this._gameover;
    }
}

Evaluator

Next, we implement the evaluator. The evaluator is used to evaluate the winning rate of the current state and the probability of feasible landing. It should support the evaluation of multiple states at one time. At present, we do not have a neural network, so we first implement an evaluator based on Monte Carlo method, that is, random drop of the current situation. The evaluator based on Monte Carlo and the above search algorithm constitute the traditional Monte Carlo search algorithm.

Create Src / evaluations JS and paste the following:

import { BOARD_SIZE } from "./Consts";

export function MCEvaluator() {
    return async function evaluator(ss) {
        await new Promise((resolve) => setImmediate(resolve));
        return ss.map((s) => {
            const boardSize = BOARD_SIZE;
            const acts = s.legalMoves();
            const v = randomPlay(s.clone(), acts);
            const p = new Array(boardSize * boardSize).fill(0);

            for (let i = 0; i < acts.length; i++) p[acts[i]] = 1 / acts.length;

            return {
                value: v,
                probs: p
            };
        });
    };
}

function randomPlay(st, acts) {
    const p = st.currentPlayer;
    let gameover = st.gameover();

    for (let i = 0; i < acts.length && !gameover; i++) {
        const j = i + Math.floor(Math.random() * (acts.length - i));
        const x = acts[j];
        acts[j] = acts[i];
        acts[i] = x;

        gameover = st.makeMove(x).gameover();
    }

    if (p === gameover.winner) return 1;
    else if (gameover.draw) return 0;
    return -1;
}

integration

With Monte Carlo and new states, now we integrate them into a new Game with AI. Define a new Game class for integrating Client, MCTS, etc. each Game object generated represents a new Game.

Set Src / game JS renamed Src / gamedef JS, and then create a new Src / game JS and add the following:

import { Client } from "boardgame.io/client";
import { MCTS, Node } from "./MCTS";
import { MCEvaluator } from "./Evals";
import { State } from "./State";
import { BOARD_SIZE } from "./Consts";
import { Gomoku } from "./GameDef";

export class Game {
    constructor({ playAs }) {
        this._client = Client({ game: Gomoku });
        this._mcts = new MCTS({
            evaluator: MCEvaluator(),
            useNoise: false,
            maxIteration: 3200 * 2
        });
        this._root = Node(null);
        this._state = new State({ boardSize: BOARD_SIZE });
        this._playAs = playAs;
        this._started = false;
        this._stopped = false;
        this._aiPlayer = { 0: "1", 1: "0" }[this._playAs];
        this._currentPlayer = this.getState().ctx.currentPlayer;
        this._stateId = this.getState()._stateID - 1;
    }

    get currentPlayer() {
        return this._currentPlayer;
    }

    get playAs() {
        return this._playAs;
    }

    getState() {
        return this._client.getState();
    }

    putStone(id) {
        if (!this._started || this._stopped) return;
        if (this._playAs && this._playAs !== this.currentPlayer) return;
        this._client.moves.putStone(id);
    }

    subscribe(f) {
        return this._client.subscribe(f);
    }

    start() {
        if (this._started || this._stopped) return;

        this._client.subscribe((s) => {
            this._currentPlayer = s.ctx.currentPlayer;

            let moves = (s.deltalog || [])
                .filter((log) => log.action.type === "MAKE_MOVE")
                .map((log) => log.action.payload.args[0]);
            for (let mov of moves) {
                this._advance(mov);
            }

            if (s.ctx.gameover) return;
            if (s._stateID === this._stateId) return;

            this._stateId = s._stateID;

            if (this._aiPlayer === s.ctx.currentPlayer) {
                this._mcts.exec(this._root, this._state).then((result) => {
                    if (this._stopped) return;

                    this._client.moves.putStone(result.bestChild.a);
                });
            }
        });

        this._client.start();

        this._started = true;
    }

    stop() {
        this._client.stop();
        this._mcts.stop();
        this._stopped = true;
    }

    _advance(mov) {
        this._state.makeMove(mov);
        let root = this._root;
        if (!root.children) root = Node(mov);
        else if (root.children.length === 0)
            throw new Error("try to make move on terminal node");
        else root = root.children.find((c) => c.a === mov);
        this._root = root;
    }
}

Next, let's Src / APP JS rectification:

//import { Client } from "boardgame.io/react";

import React, {
    useState,
    useMemo,
    useEffect,
    useReducer
} from "react";

import { GomokuBoard } from "./Board";
import { Game } from "./Game";

//const App = Client({ game: Gomoku });

function App() {
    const [gameId, newGame] = useReducer((id) => id + 1, 1);
    const game = useMemo(() => {
        let game = new Game({
            playAs: Math.random() > 0.5 ? "0" : "1"
        });
        return game;
    }, [gameId]);
    const [state, setState] = useState(game.getState());

    const moves = useMemo(
        () => ({
            putStone: (id) => game.putStone(id)
        }),
        [game]
    );

    useEffect(() => {
        game.start();
        let unsub = game.subscribe(setState);
        return () => {
            unsub();
            game.stop();
        };
    }, [game]);

    const currentPlayer = game.currentPlayer;
    let status = "Please settle down";
    const gameover = state.ctx.gameover;

    if (gameover) {
        if (gameover.winner === "0") status = "Hei Fangsheng";
        else if (gameover.winner === "1") status = "Bai Fangsheng";
        else status = "draw";
    } else if (game.playAs && game.playAs !== currentPlayer) {
        status = "Thinking...";
    }

    return (
        <>
            <div>
                <button onClick={newGame}>Come again</button>
            </div>
            <GomokuBoard {...state} moves={moves} />
            <div>
                <p>{status}</p>
            </div>
        </>
    );
}

export default App;

All right, it's all set. Come on See the effect All right.

Our new Game class will synchronize its own state and automatically trigger AI, which will conflict with the Log backtracking function in the Debug panel. The AI function in the Debug panel should also be used as appropriate, which may conflict with the Game class

summary

In this paper, we implement our own Monte Carlo search algorithm and our own State, and compare our own State with boardgame The status of IO is synchronized. It also uses its own Monte Carlo search algorithm to make a Gobang game supporting AI battle. With these infrastructures, we can gradually move closer to Gobang AI like AlphaGo. Next, we will add a little fun to our game, which is also an important step towards AlphaGo Gobang.

Topics: React Machine Learning Deep Learning Game Development

Programmer Think