matchbox_tictactoe/main.c
2025-02-19 00:07:02 +01:00

545 lines
14 KiB
C

#include <assert.h>
#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
///
/// The board looks like this:
/// #############
/// # 0 | 1 | 2 #
/// #---+---+---#
/// # 3 | 4 | 5 #
/// #---+---+---#
/// # 6 | 7 | 8 #
/// #############
///
/// A board is represented by an integer.
/// A board consists of Chs. Each tile is 2 bits.
///
/// Example:
///
/// ```
/// 0b00000000000000000000000000000000
/// ^^ ^^ - tile 0
/// ++-- tile 2
/// ```
///
/// The tile position is also called it's index or `idx`.
///
typedef uint32_t Board;
///
/// Ch - character/tile/player
///
/// Originally this was string-characters, ie. `' '`, `'X'` and `'O'`.
/// For performance reasons, it's now a 2 bit value.
///
typedef enum {
Ch_Empty,
Ch_Cross,
Ch_Circle,
} Ch;
const size_t ch_size = 2;
const size_t ch_mask = 3;
///
/// Get all possible places, meaning empty tiles.
/// The result is an integer where each bit represents a tile in it's place.
///
uint32_t board_possible_places(Board board)
{
uint32_t set = 0;
for (uint32_t i = 0; i < 9; ++i) {
Ch ch = board >> i * ch_size & ch_mask;
if (ch == Ch_Empty) {
set |= 1 << i;
}
}
return set;
}
Board board_set_place(Board board, Ch ch, uint32_t idx)
{
return board | (ch << idx * ch_size);
}
static inline Board board_transform(Board board, const uint32_t idcs[static 9])
{
Board result = 0;
for (uint32_t i = 0; i < 9; ++i) {
Ch ch = board >> idcs[i] * ch_size & ch_mask;
result |= ch << i * ch_size;
}
return result;
}
/// Rotate the board 3 hours clockwise.
Board board_rotate(Board board)
{
const uint32_t idcs[9] = { 6, 3, 0, 7, 4, 1, 8, 5, 2 };
return board_transform(board, idcs);
}
/// Flip board vertically.
Board board_flip(Board board)
{
const uint32_t idcs[9] = { 6, 7, 8, 3, 4, 5, 0, 1, 2 };
return board_transform(board, idcs);
}
Ch board_place(Board board, uint32_t idx)
{
return board >> idx * ch_size & ch_mask;
}
bool board_player_has_won(Board board, Ch ch)
{
const uint32_t combos[][3] = {
// horizontal
{ 0, 1, 2 },
{ 3, 4, 5 },
{ 6, 7, 8 },
// vertical
{ 0, 3, 6 },
{ 1, 4, 7 },
{ 2, 5, 8 },
// diagonal
{ 0, 4, 8 },
{ 2, 4, 6 },
};
const size_t combos_size = sizeof(combos) / sizeof(combos[0]);
for (size_t i = 0; i < combos_size; ++i) {
bool all = true;
for (int j = 0; j < 3; ++j) {
if (board_place(board, combos[i][j]) != ch) {
all = false;
}
}
if (all) {
return true;
}
}
return false;
}
bool board_is_draw(Board board)
{
//
return board_possible_places(board) == 0;
}
void print_board(Board board)
{
fputs("#############\n", stdout);
for (int y = 0; y < 3; ++y) {
if (y != 0) {
fputs("#---+---+---#\n", stdout);
}
for (int x = 0; x < 3; ++x) {
if (x == 0) {
fputs("# ", stdout);
} else {
fputs(" | ", stdout);
}
uint32_t i = y * 3 + x;
Ch ch = board >> i * ch_size & ch_mask;
switch (ch) {
case Ch_Empty:
printf("\x1b[0;37m%d\x1b[0m", i);
break;
case Ch_Cross:
fputs("\x1b[1;91mX\x1b[0m", stdout);
break;
case Ch_Circle:
fputs("\x1b[1;94mO\x1b[0m", stdout);
break;
}
if (x == 2) {
fputs(" #\n", stdout);
}
}
}
fputs("#############\n", stdout);
}
/// Internal structure used in `Choices`.
typedef struct {
Board board;
int weight;
} ChoiceWeight;
///
/// An arbitrarily indexable array of elements.
/// The index type is a `Board` the element type is a weight as `int`.
/// This collection is optimized for lookups. It does this, by sorting at
/// insertion and therefore being able to use binary search.
///
/// This collection functions like the following Python example:
/// ```py
/// data: dict[int, int] = {}
///
/// data[4] = 2
/// data[3] = 1
///
/// data == { 3: 1, 4: 2 }
/// ```
///
typedef struct {
ChoiceWeight* data;
size_t size;
size_t capacity;
} Choices;
int choices_contruct(Choices* choices)
{
const size_t initial_capacity = 256;
ChoiceWeight* data = malloc(initial_capacity * sizeof(ChoiceWeight));
if (!data) {
return -1;
}
*choices = (Choices) {
.data = data,
.size = 0,
.capacity = initial_capacity,
};
return 0;
}
void choices_destroy(Choices* choices)
{
//
free(choices->data);
}
/// `end` is exclusive.
static inline size_t choices_search_idx(
const Choices* choices, Board board, size_t begin, size_t end)
{
if (end == begin) {
return begin;
}
size_t middle = begin + (end - begin) / 2;
Board b = choices->data[middle].board;
if (b > board) {
return choices_search_idx(choices, board, begin, middle);
} else if (b < board) {
return choices_search_idx(choices, board, middle + 1, end);
} else {
return middle;
}
}
int choices_insert(Choices* choices, Board board, int weight)
{
if (choices->size + 1 >= choices->capacity) {
size_t new_capacity = choices->capacity * 2;
ChoiceWeight* data
= realloc(choices->data, new_capacity * sizeof(ChoiceWeight));
if (!data) {
return -1;
}
choices->data = data;
choices->capacity = new_capacity;
}
size_t idx = choices_search_idx(choices, board, 0, choices->size);
ChoiceWeight elem = { board, weight };
for (size_t i = choices->size; i > idx; --i) {
choices->data[i] = choices->data[i - 1];
}
choices->data[idx] = elem;
choices->size += 1;
return 0;
}
/// `end` is exclusive.
static inline int* choices_search_weight(
const Choices* choices, Board board, size_t begin, size_t end)
{
if (end == begin) {
return NULL;
}
size_t middle = begin + (end - begin) / 2;
Board b = choices->data[middle].board;
if (b > board) {
return choices_search_weight(choices, board, begin, middle);
} else if (b < board) {
return choices_search_weight(choices, board, middle + 1, end);
} else {
return &choices->data[middle].weight;
}
}
/// Returns a pointer to the found weight `int`, or `NULL` if none is found.
int* choices_get(const Choices* choices, Board board)
{
return choices_search_weight(choices, board, 0, choices->size);
}
bool choices_has(const Choices* choices, Board board)
{
return choices_search_weight(choices, board, 0, choices->size) != NULL;
}
void print_choices(const Choices* choices)
{
printf("i\tboard\tweight\t\n");
for (size_t i = 0; i < choices->size; ++i) {
printf("%lu\t%d\t%u\n", i, choices->data[i].board,
choices->data[i].weight);
}
}
const int win_reward = 1;
const int loss_punishment = 1;
const int cand_tolerance = 3;
///
/// A `Player` represents a simulated player, ie. who's gonna play the game
/// and train.
///
typedef struct {
/// The decision/choice weight of every possible combination.
/// When a novel board choice is discovered, the choice is added with
/// default weight.
///
/// This is essentially the trained model.
///
/// Choices are normalized or interned in this collection, meaning that
/// every board that can be mirrored or rotated is only stored once.
/// A visual example of a 2x2 grid:
/// ```
/// a: 0 - b: - 0
/// - - - -
/// ```
/// `a` and `b` are identical apart from rotation. Choices (meaning board)
/// get stored as one of such variations. Which one depends on
/// earliest occurence.
Choices choices;
/// What brick does the player use.
Ch ch;
/// In a given game, all the decisions are stored in this array.
/// This is used to reward and punish, meaning add or remove weight for all
/// choices in that game.
Board current_choices[9];
size_t current_choices_size;
} Player;
int player_construct(Player* player, Ch ch)
{
*player = (Player) {
.choices = NULL,
.ch = ch,
.current_choices = { (Board) { 0 } },
.current_choices_size = 0,
};
int res = choices_contruct(&player->choices);
if (res != 0)
return res;
return 0;
}
void player_destroy(Player* player)
{
//
choices_destroy(&player->choices);
}
void player_clear_choices(Player* player)
{
//
player->current_choices_size = 0;
}
void player_reward_win(Player* player)
{
for (size_t i = 0; i < player->current_choices_size; ++i) {
int* weigth = choices_get(&player->choices, player->current_choices[i]);
*weigth += win_reward;
}
}
void player_punish_loss(Player* player)
{
for (size_t i = 0; i < player->current_choices_size; ++i) {
int* weigth = choices_get(&player->choices, player->current_choices[i]);
*weigth -= loss_punishment;
}
}
void player_reward_or_punish(Player* player, int reward)
{
for (size_t i = 0; i < player->current_choices_size; ++i) {
int* weigth = choices_get(&player->choices, player->current_choices[i]);
*weigth += reward;
}
}
///
/// Find existing board or variation of board in `Player`'s `choices`.
/// All variations of the `board` will be tested, to see if an entry exists.
/// If no variation is found, it is added with a default starting weight.
///
Board player_intern_choice(Player* player, Board board)
{
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 4; ++j) {
if (choices_has(&player->choices, board)) {
return board;
}
board = board_rotate(board);
}
board = board_flip(board);
}
choices_insert(&player->choices, board, 0);
return board;
}
///
/// Make the `Player` make a move.
/// All possible moves are weighed against each other.
/// If multiple candidates are found, one is selected at random.
///
void player_take_turn(Player* player, Board* board)
{
typedef struct {
uint32_t idx;
Board board;
} ChoiceCand;
uint32_t possible_places = board_possible_places(*board);
int cand_weight = INT_MIN;
ChoiceCand cands[9] = { 0 };
size_t cands_size = 0;
for (uint32_t idx = 0; idx < 9; ++idx) {
if ((possible_places >> idx & 1) == 0)
continue;
Board pb = board_set_place(*board, player->ch, idx);
Board choice = player_intern_choice(player, pb);
int weight = *choices_get(&player->choices, choice);
if (weight > cand_weight + cand_tolerance) {
cand_weight = weight;
cands[0] = (ChoiceCand) { idx, choice };
cands_size = 1;
} else if (weight == cand_weight) {
cands[cands_size] = (ChoiceCand) { idx, choice };
cands_size += 1;
}
}
if (cands_size == 0) {
fputs("no candidates\n", stderr);
exit(EXIT_FAILURE);
}
ChoiceCand cand;
if (cands_size == 1) {
cand = cands[0];
} else {
cand = cands[rand() % (cands_size - 1)];
}
player->current_choices[player->current_choices_size] = cand.board;
player->current_choices_size += 1;
*board = board_set_place(*board, player->ch, cand.idx);
}
int main(void)
{
srand(time(NULL));
Player p1;
player_construct(&p1, Ch_Cross);
Player p2;
player_construct(&p2, Ch_Circle);
int games = 0;
int p1_wins = 0;
int p2_wins = 0;
int draws = 0;
printf("Training P1 against P2...\n");
const int traning_iterations = 1000000;
for (int i = 0; i < traning_iterations; ++i) {
Board board = 0;
player_clear_choices(&p1);
player_clear_choices(&p2);
while (true) {
player_take_turn(&p1, &board);
if (board_player_has_won(board, Ch_Cross)) {
p1_wins += 1;
player_reward_win(&p1);
player_punish_loss(&p2);
break;
}
if (board_is_draw(board)) {
draws += 1;
player_reward_or_punish(&p1, rand() % 5 - 2);
player_reward_or_punish(&p2, rand() % 5 - 2);
break;
}
player_take_turn(&p2, &board);
if (board_player_has_won(board, Ch_Circle)) {
p2_wins += 1;
player_reward_win(&p2);
player_punish_loss(&p1);
break;
}
if (board_is_draw(board)) {
draws += 1;
player_reward_or_punish(&p1, rand() % 5 - 2);
player_reward_or_punish(&p2, rand() % 5 - 2);
break;
}
}
games += 1;
}
printf("Games %d, Wins: P1: %d, P2: %d, Draws: %d\n", games, p1_wins,
p2_wins, draws);
while (true) {
printf("\n\nNew game\n");
Board board = 0;
player_clear_choices(&p1);
while (true) {
printf("P1's turn\n");
player_take_turn(&p1, &board);
print_board(board);
if (board_player_has_won(board, Ch_Cross)) {
fputs("\x1b[1;91mYou lost!\x1b[0m", stdout);
break;
}
if (board_is_draw(board)) {
fputs("\x1b[1;93mDraw!\x1b[0m", stdout);
break;
}
printf("Your turn (0..8)\n> ");
uint32_t possible_places = board_possible_places(board);
int choice = 0;
while (true) {
int r = scanf("%d", &choice);
if (r != 1 || (possible_places >> choice & 1) == 0) {
printf("invalid choice\n");
} else {
break;
}
}
board = board_set_place(board, Ch_Circle, choice);
print_board(board);
if (board_player_has_won(board, Ch_Circle)) {
fputs("\x1b[1;94mYou won!\x1b[0m", stdout);
break;
}
if (board_is_draw(board)) {
fputs("\x1b[1;93mDraw!\x1b[0m", stdout);
break;
}
}
}
}