545 lines
14 KiB
C
545 lines
14 KiB
C
#include <assert.h>
|
|
#include <limits.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
|
|
///
|
|
/// The board looks like this:
|
|
/// #############
|
|
/// # 0 | 1 | 2 #
|
|
/// #---+---+---#
|
|
/// # 3 | 4 | 5 #
|
|
/// #---+---+---#
|
|
/// # 6 | 7 | 8 #
|
|
/// #############
|
|
///
|
|
/// A board is represented by an integer.
|
|
/// A board consists of Chs. Each tile is 2 bits.
|
|
///
|
|
/// Example:
|
|
///
|
|
/// ```
|
|
/// 0b00000000000000000000000000000000
|
|
/// ^^ ^^ - tile 0
|
|
/// ++-- tile 2
|
|
/// ```
|
|
///
|
|
/// The tile position is also called it's index or `idx`.
|
|
///
|
|
typedef uint32_t Board;
|
|
|
|
///
|
|
/// Ch - character/tile/player
|
|
///
|
|
/// Originally this was string-characters, ie. `' '`, `'X'` and `'O'`.
|
|
/// For performance reasons, it's now a 2 bit value.
|
|
///
|
|
typedef enum {
|
|
Ch_Empty,
|
|
Ch_Cross,
|
|
Ch_Circle,
|
|
} Ch;
|
|
|
|
const size_t ch_size = 2;
|
|
const size_t ch_mask = 3;
|
|
|
|
///
|
|
/// Get all possible places, meaning empty tiles.
|
|
/// The result is an integer where each bit represents a tile in it's place.
|
|
///
|
|
uint32_t board_possible_places(Board board)
|
|
{
|
|
uint32_t set = 0;
|
|
for (uint32_t i = 0; i < 9; ++i) {
|
|
Ch ch = board >> i * ch_size & ch_mask;
|
|
if (ch == Ch_Empty) {
|
|
set |= 1 << i;
|
|
}
|
|
}
|
|
return set;
|
|
}
|
|
|
|
Board board_set_place(Board board, Ch ch, uint32_t idx)
|
|
{
|
|
return board | (ch << idx * ch_size);
|
|
}
|
|
|
|
static inline Board board_transform(Board board, const uint32_t idcs[static 9])
|
|
{
|
|
Board result = 0;
|
|
for (uint32_t i = 0; i < 9; ++i) {
|
|
Ch ch = board >> idcs[i] * ch_size & ch_mask;
|
|
result |= ch << i * ch_size;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Rotate the board 3 hours clockwise.
|
|
Board board_rotate(Board board)
|
|
{
|
|
const uint32_t idcs[9] = { 6, 3, 0, 7, 4, 1, 8, 5, 2 };
|
|
return board_transform(board, idcs);
|
|
}
|
|
|
|
/// Flip board vertically.
|
|
Board board_flip(Board board)
|
|
{
|
|
const uint32_t idcs[9] = { 6, 7, 8, 3, 4, 5, 0, 1, 2 };
|
|
return board_transform(board, idcs);
|
|
}
|
|
|
|
Ch board_place(Board board, uint32_t idx)
|
|
{
|
|
return board >> idx * ch_size & ch_mask;
|
|
}
|
|
|
|
bool board_player_has_won(Board board, Ch ch)
|
|
{
|
|
const uint32_t combos[][3] = {
|
|
// horizontal
|
|
{ 0, 1, 2 },
|
|
{ 3, 4, 5 },
|
|
{ 6, 7, 8 },
|
|
// vertical
|
|
{ 0, 3, 6 },
|
|
{ 1, 4, 7 },
|
|
{ 2, 5, 8 },
|
|
// diagonal
|
|
{ 0, 4, 8 },
|
|
{ 2, 4, 6 },
|
|
};
|
|
const size_t combos_size = sizeof(combos) / sizeof(combos[0]);
|
|
for (size_t i = 0; i < combos_size; ++i) {
|
|
bool all = true;
|
|
for (int j = 0; j < 3; ++j) {
|
|
if (board_place(board, combos[i][j]) != ch) {
|
|
all = false;
|
|
}
|
|
}
|
|
if (all) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool board_is_draw(Board board)
|
|
{
|
|
//
|
|
return board_possible_places(board) == 0;
|
|
}
|
|
|
|
void print_board(Board board)
|
|
{
|
|
fputs("#############\n", stdout);
|
|
for (int y = 0; y < 3; ++y) {
|
|
if (y != 0) {
|
|
fputs("#---+---+---#\n", stdout);
|
|
}
|
|
for (int x = 0; x < 3; ++x) {
|
|
if (x == 0) {
|
|
fputs("# ", stdout);
|
|
} else {
|
|
fputs(" | ", stdout);
|
|
}
|
|
uint32_t i = y * 3 + x;
|
|
Ch ch = board >> i * ch_size & ch_mask;
|
|
switch (ch) {
|
|
case Ch_Empty:
|
|
printf("\x1b[0;37m%d\x1b[0m", i);
|
|
break;
|
|
case Ch_Cross:
|
|
fputs("\x1b[1;91mX\x1b[0m", stdout);
|
|
break;
|
|
case Ch_Circle:
|
|
fputs("\x1b[1;94mO\x1b[0m", stdout);
|
|
break;
|
|
}
|
|
if (x == 2) {
|
|
fputs(" #\n", stdout);
|
|
}
|
|
}
|
|
}
|
|
fputs("#############\n", stdout);
|
|
}
|
|
|
|
/// Internal structure used in `Choices`.
|
|
typedef struct {
|
|
Board board;
|
|
int weight;
|
|
} ChoiceWeight;
|
|
|
|
///
|
|
/// An arbitrarily indexable array of elements.
|
|
/// The index type is a `Board` the element type is a weight as `int`.
|
|
/// This collection is optimized for lookups. It does this, by sorting at
|
|
/// insertion and therefore being able to use binary search.
|
|
///
|
|
/// This collection functions like the following Python example:
|
|
/// ```py
|
|
/// data: dict[int, int] = {}
|
|
///
|
|
/// data[4] = 2
|
|
/// data[3] = 1
|
|
///
|
|
/// data == { 3: 1, 4: 2 }
|
|
/// ```
|
|
///
|
|
typedef struct {
|
|
ChoiceWeight* data;
|
|
size_t size;
|
|
size_t capacity;
|
|
} Choices;
|
|
|
|
int choices_contruct(Choices* choices)
|
|
{
|
|
const size_t initial_capacity = 256;
|
|
ChoiceWeight* data = malloc(initial_capacity * sizeof(ChoiceWeight));
|
|
if (!data) {
|
|
return -1;
|
|
}
|
|
*choices = (Choices) {
|
|
.data = data,
|
|
.size = 0,
|
|
.capacity = initial_capacity,
|
|
};
|
|
return 0;
|
|
}
|
|
|
|
void choices_destroy(Choices* choices)
|
|
{
|
|
//
|
|
free(choices->data);
|
|
}
|
|
|
|
/// `end` is exclusive.
|
|
static inline size_t choices_search_idx(
|
|
const Choices* choices, Board board, size_t begin, size_t end)
|
|
{
|
|
if (end == begin) {
|
|
return begin;
|
|
}
|
|
size_t middle = begin + (end - begin) / 2;
|
|
Board b = choices->data[middle].board;
|
|
if (b > board) {
|
|
return choices_search_idx(choices, board, begin, middle);
|
|
} else if (b < board) {
|
|
return choices_search_idx(choices, board, middle + 1, end);
|
|
} else {
|
|
return middle;
|
|
}
|
|
}
|
|
|
|
int choices_insert(Choices* choices, Board board, int weight)
|
|
{
|
|
if (choices->size + 1 >= choices->capacity) {
|
|
size_t new_capacity = choices->capacity * 2;
|
|
ChoiceWeight* data
|
|
= realloc(choices->data, new_capacity * sizeof(ChoiceWeight));
|
|
if (!data) {
|
|
return -1;
|
|
}
|
|
choices->data = data;
|
|
choices->capacity = new_capacity;
|
|
}
|
|
size_t idx = choices_search_idx(choices, board, 0, choices->size);
|
|
ChoiceWeight elem = { board, weight };
|
|
for (size_t i = choices->size; i > idx; --i) {
|
|
choices->data[i] = choices->data[i - 1];
|
|
}
|
|
choices->data[idx] = elem;
|
|
choices->size += 1;
|
|
return 0;
|
|
}
|
|
|
|
/// `end` is exclusive.
|
|
static inline int* choices_search_weight(
|
|
const Choices* choices, Board board, size_t begin, size_t end)
|
|
{
|
|
if (end == begin) {
|
|
return NULL;
|
|
}
|
|
size_t middle = begin + (end - begin) / 2;
|
|
Board b = choices->data[middle].board;
|
|
if (b > board) {
|
|
return choices_search_weight(choices, board, begin, middle);
|
|
} else if (b < board) {
|
|
return choices_search_weight(choices, board, middle + 1, end);
|
|
} else {
|
|
return &choices->data[middle].weight;
|
|
}
|
|
}
|
|
|
|
/// Returns a pointer to the found weight `int`, or `NULL` if none is found.
|
|
int* choices_get(const Choices* choices, Board board)
|
|
{
|
|
return choices_search_weight(choices, board, 0, choices->size);
|
|
}
|
|
|
|
bool choices_has(const Choices* choices, Board board)
|
|
{
|
|
return choices_search_weight(choices, board, 0, choices->size) != NULL;
|
|
}
|
|
|
|
void print_choices(const Choices* choices)
|
|
{
|
|
printf("i\tboard\tweight\t\n");
|
|
for (size_t i = 0; i < choices->size; ++i) {
|
|
printf("%lu\t%d\t%u\n", i, choices->data[i].board,
|
|
choices->data[i].weight);
|
|
}
|
|
}
|
|
|
|
const int win_reward = 1;
|
|
const int loss_punishment = 1;
|
|
const int cand_tolerance = 3;
|
|
|
|
///
|
|
/// A `Player` represents a simulated player, ie. who's gonna play the game
|
|
/// and train.
|
|
///
|
|
typedef struct {
|
|
/// The decision/choice weight of every possible combination.
|
|
/// When a novel board choice is discovered, the choice is added with
|
|
/// default weight.
|
|
///
|
|
/// This is essentially the trained model.
|
|
///
|
|
/// Choices are normalized or interned in this collection, meaning that
|
|
/// every board that can be mirrored or rotated is only stored once.
|
|
/// A visual example of a 2x2 grid:
|
|
/// ```
|
|
/// a: 0 - b: - 0
|
|
/// - - - -
|
|
/// ```
|
|
/// `a` and `b` are identical apart from rotation. Choices (meaning board)
|
|
/// get stored as one of such variations. Which one depends on
|
|
/// earliest occurence.
|
|
Choices choices;
|
|
/// What brick does the player use.
|
|
Ch ch;
|
|
/// In a given game, all the decisions are stored in this array.
|
|
/// This is used to reward and punish, meaning add or remove weight for all
|
|
/// choices in that game.
|
|
Board current_choices[9];
|
|
size_t current_choices_size;
|
|
} Player;
|
|
|
|
int player_construct(Player* player, Ch ch)
|
|
{
|
|
*player = (Player) {
|
|
.choices = NULL,
|
|
.ch = ch,
|
|
.current_choices = { (Board) { 0 } },
|
|
.current_choices_size = 0,
|
|
};
|
|
int res = choices_contruct(&player->choices);
|
|
if (res != 0)
|
|
return res;
|
|
return 0;
|
|
}
|
|
|
|
void player_destroy(Player* player)
|
|
{
|
|
//
|
|
choices_destroy(&player->choices);
|
|
}
|
|
|
|
void player_clear_choices(Player* player)
|
|
{
|
|
//
|
|
player->current_choices_size = 0;
|
|
}
|
|
|
|
void player_reward_win(Player* player)
|
|
{
|
|
for (size_t i = 0; i < player->current_choices_size; ++i) {
|
|
int* weigth = choices_get(&player->choices, player->current_choices[i]);
|
|
*weigth += win_reward;
|
|
}
|
|
}
|
|
|
|
void player_punish_loss(Player* player)
|
|
{
|
|
for (size_t i = 0; i < player->current_choices_size; ++i) {
|
|
int* weigth = choices_get(&player->choices, player->current_choices[i]);
|
|
*weigth -= loss_punishment;
|
|
}
|
|
}
|
|
|
|
void player_reward_or_punish(Player* player, int reward)
|
|
{
|
|
for (size_t i = 0; i < player->current_choices_size; ++i) {
|
|
int* weigth = choices_get(&player->choices, player->current_choices[i]);
|
|
*weigth += reward;
|
|
}
|
|
}
|
|
|
|
///
|
|
/// Find existing board or variation of board in `Player`'s `choices`.
|
|
/// All variations of the `board` will be tested, to see if an entry exists.
|
|
/// If no variation is found, it is added with a default starting weight.
|
|
///
|
|
Board player_intern_choice(Player* player, Board board)
|
|
{
|
|
for (int i = 0; i < 2; ++i) {
|
|
for (int j = 0; j < 4; ++j) {
|
|
if (choices_has(&player->choices, board)) {
|
|
return board;
|
|
}
|
|
board = board_rotate(board);
|
|
}
|
|
board = board_flip(board);
|
|
}
|
|
choices_insert(&player->choices, board, 0);
|
|
return board;
|
|
}
|
|
|
|
///
|
|
/// Make the `Player` make a move.
|
|
/// All possible moves are weighed against each other.
|
|
/// If multiple candidates are found, one is selected at random.
|
|
///
|
|
void player_take_turn(Player* player, Board* board)
|
|
{
|
|
typedef struct {
|
|
uint32_t idx;
|
|
Board board;
|
|
} ChoiceCand;
|
|
|
|
uint32_t possible_places = board_possible_places(*board);
|
|
int cand_weight = INT_MIN;
|
|
ChoiceCand cands[9] = { 0 };
|
|
size_t cands_size = 0;
|
|
for (uint32_t idx = 0; idx < 9; ++idx) {
|
|
if ((possible_places >> idx & 1) == 0)
|
|
continue;
|
|
Board pb = board_set_place(*board, player->ch, idx);
|
|
Board choice = player_intern_choice(player, pb);
|
|
int weight = *choices_get(&player->choices, choice);
|
|
if (weight > cand_weight + cand_tolerance) {
|
|
cand_weight = weight;
|
|
cands[0] = (ChoiceCand) { idx, choice };
|
|
cands_size = 1;
|
|
} else if (weight == cand_weight) {
|
|
cands[cands_size] = (ChoiceCand) { idx, choice };
|
|
cands_size += 1;
|
|
}
|
|
}
|
|
if (cands_size == 0) {
|
|
fputs("no candidates\n", stderr);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
ChoiceCand cand;
|
|
if (cands_size == 1) {
|
|
cand = cands[0];
|
|
} else {
|
|
cand = cands[rand() % (cands_size - 1)];
|
|
}
|
|
player->current_choices[player->current_choices_size] = cand.board;
|
|
player->current_choices_size += 1;
|
|
*board = board_set_place(*board, player->ch, cand.idx);
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
srand(time(NULL));
|
|
|
|
Player p1;
|
|
player_construct(&p1, Ch_Cross);
|
|
|
|
Player p2;
|
|
player_construct(&p2, Ch_Circle);
|
|
|
|
int games = 0;
|
|
int p1_wins = 0;
|
|
int p2_wins = 0;
|
|
int draws = 0;
|
|
|
|
printf("Training P1 against P2...\n");
|
|
|
|
const int traning_iterations = 1000000;
|
|
|
|
for (int i = 0; i < traning_iterations; ++i) {
|
|
Board board = 0;
|
|
player_clear_choices(&p1);
|
|
player_clear_choices(&p2);
|
|
while (true) {
|
|
player_take_turn(&p1, &board);
|
|
if (board_player_has_won(board, Ch_Cross)) {
|
|
p1_wins += 1;
|
|
player_reward_win(&p1);
|
|
player_punish_loss(&p2);
|
|
break;
|
|
}
|
|
if (board_is_draw(board)) {
|
|
draws += 1;
|
|
player_reward_or_punish(&p1, rand() % 5 - 2);
|
|
player_reward_or_punish(&p2, rand() % 5 - 2);
|
|
break;
|
|
}
|
|
player_take_turn(&p2, &board);
|
|
if (board_player_has_won(board, Ch_Circle)) {
|
|
p2_wins += 1;
|
|
player_reward_win(&p2);
|
|
player_punish_loss(&p1);
|
|
break;
|
|
}
|
|
if (board_is_draw(board)) {
|
|
draws += 1;
|
|
player_reward_or_punish(&p1, rand() % 5 - 2);
|
|
player_reward_or_punish(&p2, rand() % 5 - 2);
|
|
break;
|
|
}
|
|
}
|
|
games += 1;
|
|
}
|
|
|
|
printf("Games %d, Wins: P1: %d, P2: %d, Draws: %d\n", games, p1_wins,
|
|
p2_wins, draws);
|
|
|
|
while (true) {
|
|
printf("\n\nNew game\n");
|
|
Board board = 0;
|
|
player_clear_choices(&p1);
|
|
while (true) {
|
|
printf("P1's turn\n");
|
|
player_take_turn(&p1, &board);
|
|
print_board(board);
|
|
if (board_player_has_won(board, Ch_Cross)) {
|
|
fputs("\x1b[1;91mYou lost!\x1b[0m", stdout);
|
|
break;
|
|
}
|
|
if (board_is_draw(board)) {
|
|
fputs("\x1b[1;93mDraw!\x1b[0m", stdout);
|
|
break;
|
|
}
|
|
printf("Your turn (0..8)\n> ");
|
|
uint32_t possible_places = board_possible_places(board);
|
|
int choice = 0;
|
|
while (true) {
|
|
int r = scanf("%d", &choice);
|
|
if (r != 1 || (possible_places >> choice & 1) == 0) {
|
|
printf("invalid choice\n");
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
board = board_set_place(board, Ch_Circle, choice);
|
|
print_board(board);
|
|
if (board_player_has_won(board, Ch_Circle)) {
|
|
fputs("\x1b[1;94mYou won!\x1b[0m", stdout);
|
|
break;
|
|
}
|
|
if (board_is_draw(board)) {
|
|
fputs("\x1b[1;93mDraw!\x1b[0m", stdout);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|