rewrite in c
This commit is contained in:
parent
d137e0149b
commit
372cc18795
6
.clang-format
Normal file
6
.clang-format
Normal file
@ -0,0 +1,6 @@
|
||||
Language: Cpp
|
||||
BasedOnStyle: WebKit
|
||||
IndentWidth: 4
|
||||
ColumnLimit: 80
|
||||
IndentCaseLabels: true
|
||||
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
main.o
|
||||
program
|
23
Makefile
Normal file
23
Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
CFLAGS = -O3 -std=c17 -Wall -Wextra -Wpedantic -pedantic -pedantic-errors
|
||||
|
||||
ALL_FLAGS = \
|
||||
|
||||
# -fsanitize=address,undefined
|
||||
|
||||
HEADERS = $(wildcard *.h)
|
||||
|
||||
CFILES = \
|
||||
main.c
|
||||
|
||||
OFILES = $(patsubst %.c,%.o,$(CFILES))
|
||||
|
||||
program: $(OFILES)
|
||||
gcc -o $@ $^ $(ALL_FLAGS)
|
||||
|
||||
%.o: %.c $(HEADERS)
|
||||
gcc $< -c -o $@ $(CFLAGS) $(ALL_FLAGS)
|
||||
|
||||
clean:
|
||||
rm -rf *.o program
|
||||
|
8
compile_flags.txt
Normal file
8
compile_flags.txt
Normal file
@ -0,0 +1,8 @@
|
||||
-xc
|
||||
-std=c17
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
-pedantic
|
||||
-pedantic-errors
|
||||
|
544
main.c
Normal file
544
main.c
Normal file
@ -0,0 +1,544 @@
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
///
|
||||
/// The board looks like this:
|
||||
/// #############
|
||||
/// # 0 | 1 | 2 #
|
||||
/// #---+---+---#
|
||||
/// # 3 | 4 | 5 #
|
||||
/// #---+---+---#
|
||||
/// # 6 | 7 | 8 #
|
||||
/// #############
|
||||
///
|
||||
/// A board is represented by an integer.
|
||||
/// A board consists of Chs. Each tile is 2 bits.
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// ```
|
||||
/// 0b00000000000000000000000000000000
|
||||
/// ^^ ^^ - tile 0
|
||||
/// ++-- tile 2
|
||||
/// ```
|
||||
///
|
||||
/// The tile position is also called it's index or `idx`.
|
||||
///
|
||||
typedef uint32_t Board;
|
||||
|
||||
///
|
||||
/// Ch - character/tile/player
|
||||
///
|
||||
/// Originally this was string-characters, ie. `' '`, `'X'` and `'O'`.
|
||||
/// For performance reasons, it's now a 2 bit value.
|
||||
///
|
||||
typedef enum {
|
||||
Ch_Empty,
|
||||
Ch_Cross,
|
||||
Ch_Circle,
|
||||
} Ch;
|
||||
|
||||
const size_t ch_size = 2;
|
||||
const size_t ch_mask = 3;
|
||||
|
||||
///
|
||||
/// Get all possible places, meaning empty tiles.
|
||||
/// The result is an integer where each bit represents a tile in it's place.
|
||||
///
|
||||
uint32_t board_possible_places(Board board)
|
||||
{
|
||||
uint32_t set = 0;
|
||||
for (uint32_t i = 0; i < 9; ++i) {
|
||||
Ch ch = board >> i * ch_size & ch_mask;
|
||||
if (ch == Ch_Empty) {
|
||||
set |= 1 << i;
|
||||
}
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
Board board_set_place(Board board, Ch ch, uint32_t idx)
|
||||
{
|
||||
return board | (ch << idx * ch_size);
|
||||
}
|
||||
|
||||
static inline Board board_transform(Board board, const uint32_t idcs[static 9])
|
||||
{
|
||||
Board result = 0;
|
||||
for (uint32_t i = 0; i < 9; ++i) {
|
||||
Ch ch = board >> idcs[i] * ch_size & ch_mask;
|
||||
result |= ch << i * ch_size;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Rotate the board 3 hours clockwise.
|
||||
Board board_rotate(Board board)
|
||||
{
|
||||
const uint32_t idcs[9] = { 6, 3, 0, 7, 4, 1, 8, 5, 2 };
|
||||
return board_transform(board, idcs);
|
||||
}
|
||||
|
||||
/// Flip board vertically.
|
||||
Board board_flip(Board board)
|
||||
{
|
||||
const uint32_t idcs[9] = { 6, 7, 8, 3, 4, 5, 0, 1, 2 };
|
||||
return board_transform(board, idcs);
|
||||
}
|
||||
|
||||
Ch board_place(Board board, uint32_t idx)
|
||||
{
|
||||
return board >> idx * ch_size & ch_mask;
|
||||
}
|
||||
|
||||
bool board_player_has_won(Board board, Ch ch)
|
||||
{
|
||||
const uint32_t combos[][3] = {
|
||||
// horizontal
|
||||
{ 0, 1, 2 },
|
||||
{ 3, 4, 5 },
|
||||
{ 6, 7, 8 },
|
||||
// vertical
|
||||
{ 0, 3, 6 },
|
||||
{ 1, 4, 7 },
|
||||
{ 2, 5, 8 },
|
||||
// diagonal
|
||||
{ 0, 4, 8 },
|
||||
{ 2, 4, 6 },
|
||||
};
|
||||
const size_t combos_size = sizeof(combos) / sizeof(combos[0]);
|
||||
for (size_t i = 0; i < combos_size; ++i) {
|
||||
bool all = true;
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
if (board_place(board, combos[i][j]) != ch) {
|
||||
all = false;
|
||||
}
|
||||
}
|
||||
if (all) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool board_is_draw(Board board)
|
||||
{
|
||||
//
|
||||
return board_possible_places(board) == 0;
|
||||
}
|
||||
|
||||
void print_board(Board board)
|
||||
{
|
||||
fputs("#############\n", stdout);
|
||||
for (int y = 0; y < 3; ++y) {
|
||||
if (y != 0) {
|
||||
fputs("#---+---+---#\n", stdout);
|
||||
}
|
||||
for (int x = 0; x < 3; ++x) {
|
||||
if (x == 0) {
|
||||
fputs("# ", stdout);
|
||||
} else {
|
||||
fputs(" | ", stdout);
|
||||
}
|
||||
uint32_t i = y * 3 + x;
|
||||
Ch ch = board >> i * ch_size & ch_mask;
|
||||
switch (ch) {
|
||||
case Ch_Empty:
|
||||
printf("\x1b[0;37m%d\x1b[0m", i);
|
||||
break;
|
||||
case Ch_Cross:
|
||||
fputs("\x1b[1;91mX\x1b[0m", stdout);
|
||||
break;
|
||||
case Ch_Circle:
|
||||
fputs("\x1b[1;94mO\x1b[0m", stdout);
|
||||
break;
|
||||
}
|
||||
if (x == 2) {
|
||||
fputs(" #\n", stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
fputs("#############\n", stdout);
|
||||
}
|
||||
|
||||
/// Internal structure used in `Choices`.
|
||||
typedef struct {
|
||||
Board board;
|
||||
int weight;
|
||||
} ChoiceWeight;
|
||||
|
||||
///
|
||||
/// An arbitrarily indexable array of elements.
|
||||
/// The index type is a `Board` the element type is a weight as `int`.
|
||||
/// This collection is optimized for lookups. It does this, by sorting at
|
||||
/// insertion and therefore being able to use binary search.
|
||||
///
|
||||
/// This collection functions like the following Python example:
|
||||
/// ```py
|
||||
/// data: dict[int, int] = {}
|
||||
///
|
||||
/// data[4] = 2
|
||||
/// data[3] = 1
|
||||
///
|
||||
/// data == { 3: 1, 4: 2 }
|
||||
/// ```
|
||||
///
|
||||
typedef struct {
|
||||
ChoiceWeight* data;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
} Choices;
|
||||
|
||||
int choices_contruct(Choices* choices)
|
||||
{
|
||||
const size_t initial_capacity = 256;
|
||||
ChoiceWeight* data = malloc(initial_capacity * sizeof(ChoiceWeight));
|
||||
if (!data) {
|
||||
return -1;
|
||||
}
|
||||
*choices = (Choices) {
|
||||
.data = data,
|
||||
.size = 0,
|
||||
.capacity = initial_capacity,
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
void choices_destroy(Choices* choices)
|
||||
{
|
||||
//
|
||||
free(choices->data);
|
||||
}
|
||||
|
||||
/// `end` is exclusive.
|
||||
static inline size_t choices_search_idx(
|
||||
const Choices* choices, Board board, size_t begin, size_t end)
|
||||
{
|
||||
if (end == begin) {
|
||||
return begin;
|
||||
}
|
||||
size_t middle = begin + (end - begin) / 2;
|
||||
Board b = choices->data[middle].board;
|
||||
if (b > board) {
|
||||
return choices_search_idx(choices, board, begin, middle);
|
||||
} else if (b < board) {
|
||||
return choices_search_idx(choices, board, middle + 1, end);
|
||||
} else {
|
||||
return middle;
|
||||
}
|
||||
}
|
||||
|
||||
int choices_insert(Choices* choices, Board board, int weight)
|
||||
{
|
||||
if (choices->size + 1 >= choices->capacity) {
|
||||
size_t new_capacity = choices->capacity * 2;
|
||||
ChoiceWeight* data
|
||||
= realloc(choices->data, new_capacity * sizeof(ChoiceWeight));
|
||||
if (!data) {
|
||||
return -1;
|
||||
}
|
||||
choices->data = data;
|
||||
choices->capacity = new_capacity;
|
||||
}
|
||||
size_t idx = choices_search_idx(choices, board, 0, choices->size);
|
||||
ChoiceWeight elem = { board, weight };
|
||||
for (size_t i = choices->size; i > idx; --i) {
|
||||
choices->data[i] = choices->data[i - 1];
|
||||
}
|
||||
choices->data[idx] = elem;
|
||||
choices->size += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// `end` is exclusive.
|
||||
static inline int* choices_search_weight(
|
||||
const Choices* choices, Board board, size_t begin, size_t end)
|
||||
{
|
||||
if (end == begin) {
|
||||
return NULL;
|
||||
}
|
||||
size_t middle = begin + (end - begin) / 2;
|
||||
Board b = choices->data[middle].board;
|
||||
if (b > board) {
|
||||
return choices_search_weight(choices, board, begin, middle);
|
||||
} else if (b < board) {
|
||||
return choices_search_weight(choices, board, middle + 1, end);
|
||||
} else {
|
||||
return &choices->data[middle].weight;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a pointer to the found weight `int`, or `NULL` if none is found.
|
||||
int* choices_get(const Choices* choices, Board board)
|
||||
{
|
||||
return choices_search_weight(choices, board, 0, choices->size);
|
||||
}
|
||||
|
||||
bool choices_has(const Choices* choices, Board board)
|
||||
{
|
||||
return choices_search_weight(choices, board, 0, choices->size) != NULL;
|
||||
}
|
||||
|
||||
void print_choices(const Choices* choices)
|
||||
{
|
||||
printf("i\tboard\tweight\t\n");
|
||||
for (size_t i = 0; i < choices->size; ++i) {
|
||||
printf("%lu\t%d\t%u\n", i, choices->data[i].board,
|
||||
choices->data[i].weight);
|
||||
}
|
||||
}
|
||||
|
||||
const int win_reward = 1;
|
||||
const int loss_punishment = 1;
|
||||
const int cand_tolerance = 3;
|
||||
|
||||
///
|
||||
/// A `Player` represents a simulated player, ie. who's gonna play the game
|
||||
/// and train.
|
||||
///
|
||||
typedef struct {
|
||||
/// The decision/choice weight of every possible combination.
|
||||
/// When a novel board choice is discovered, the choice is added with
|
||||
/// default weight.
|
||||
///
|
||||
/// This is essentially the trained model.
|
||||
///
|
||||
/// Choices are normalized or interned in this collection, meaning that
|
||||
/// every board that can be mirrored or rotated is only stored once.
|
||||
/// A visual example of a 2x2 grid:
|
||||
/// ```
|
||||
/// a: 0 - b: - 0
|
||||
/// - - - -
|
||||
/// ```
|
||||
/// `a` and `b` are identical apart from rotation. Choices (meaning board)
|
||||
/// get stored as one of such variations. Which one depends on
|
||||
/// earliest occurence.
|
||||
Choices choices;
|
||||
/// What brick does the player use.
|
||||
Ch ch;
|
||||
/// In a given game, all the decisions are stored in this array.
|
||||
/// This is used to reward and punish, meaning add or remove weight for all
|
||||
/// choices in that game.
|
||||
Board current_choices[9];
|
||||
size_t current_choices_size;
|
||||
} Player;
|
||||
|
||||
int player_construct(Player* player, Ch ch)
|
||||
{
|
||||
*player = (Player) {
|
||||
.choices = NULL,
|
||||
.ch = ch,
|
||||
.current_choices = { (Board) { 0 } },
|
||||
.current_choices_size = 0,
|
||||
};
|
||||
int res = choices_contruct(&player->choices);
|
||||
if (res != 0)
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void player_destroy(Player* player)
|
||||
{
|
||||
//
|
||||
choices_destroy(&player->choices);
|
||||
}
|
||||
|
||||
void player_clear_choices(Player* player)
|
||||
{
|
||||
//
|
||||
player->current_choices_size = 0;
|
||||
}
|
||||
|
||||
void player_reward_win(Player* player)
|
||||
{
|
||||
for (size_t i = 0; i < player->current_choices_size; ++i) {
|
||||
int* weigth = choices_get(&player->choices, player->current_choices[i]);
|
||||
*weigth += win_reward;
|
||||
}
|
||||
}
|
||||
|
||||
void player_punish_loss(Player* player)
|
||||
{
|
||||
for (size_t i = 0; i < player->current_choices_size; ++i) {
|
||||
int* weigth = choices_get(&player->choices, player->current_choices[i]);
|
||||
*weigth -= loss_punishment;
|
||||
}
|
||||
}
|
||||
|
||||
void player_reward_or_punish(Player* player, int reward)
|
||||
{
|
||||
for (size_t i = 0; i < player->current_choices_size; ++i) {
|
||||
int* weigth = choices_get(&player->choices, player->current_choices[i]);
|
||||
*weigth += reward;
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Find existing board or variation of board in `Player`'s `choices`.
|
||||
/// All variations of the `board` will be tested, to see if an entry exists.
|
||||
/// If no variation is found, it is added with a default starting weight.
|
||||
///
|
||||
Board player_intern_choice(Player* player, Board board)
|
||||
{
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
if (choices_has(&player->choices, board)) {
|
||||
return board;
|
||||
}
|
||||
board = board_rotate(board);
|
||||
}
|
||||
board = board_flip(board);
|
||||
}
|
||||
choices_insert(&player->choices, board, 0);
|
||||
return board;
|
||||
}
|
||||
|
||||
///
|
||||
/// Make the `Player` make a move.
|
||||
/// All possible moves are weighed against each other.
|
||||
/// If multiple candidates are found, one is selected at random.
|
||||
///
|
||||
void player_take_turn(Player* player, Board* board)
|
||||
{
|
||||
typedef struct {
|
||||
uint32_t idx;
|
||||
Board board;
|
||||
} ChoiceCand;
|
||||
|
||||
uint32_t possible_places = board_possible_places(*board);
|
||||
int cand_weight = INT_MIN;
|
||||
ChoiceCand cands[9] = { 0 };
|
||||
size_t cands_size = 0;
|
||||
for (uint32_t idx = 0; idx < 9; ++idx) {
|
||||
if ((possible_places >> idx & 1) == 0)
|
||||
continue;
|
||||
Board pb = board_set_place(*board, player->ch, idx);
|
||||
Board choice = player_intern_choice(player, pb);
|
||||
int weight = *choices_get(&player->choices, choice);
|
||||
if (weight > cand_weight + cand_tolerance) {
|
||||
cand_weight = weight;
|
||||
cands[0] = (ChoiceCand) { idx, choice };
|
||||
cands_size = 1;
|
||||
} else if (weight == cand_weight) {
|
||||
cands[cands_size] = (ChoiceCand) { idx, choice };
|
||||
cands_size += 1;
|
||||
}
|
||||
}
|
||||
if (cands_size == 0) {
|
||||
fputs("no candidates\n", stderr);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
ChoiceCand cand;
|
||||
if (cands_size == 1) {
|
||||
cand = cands[0];
|
||||
} else {
|
||||
cand = cands[rand() % (cands_size - 1)];
|
||||
}
|
||||
player->current_choices[player->current_choices_size] = cand.board;
|
||||
player->current_choices_size += 1;
|
||||
*board = board_set_place(*board, player->ch, cand.idx);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
srand(time(NULL));
|
||||
|
||||
Player p1;
|
||||
player_construct(&p1, Ch_Cross);
|
||||
|
||||
Player p2;
|
||||
player_construct(&p2, Ch_Circle);
|
||||
|
||||
int games = 0;
|
||||
int p1_wins = 0;
|
||||
int p2_wins = 0;
|
||||
int draws = 0;
|
||||
|
||||
printf("Training P1 against P2...\n");
|
||||
|
||||
const int traning_iterations = 1000000;
|
||||
|
||||
for (int i = 0; i < traning_iterations; ++i) {
|
||||
Board board = 0;
|
||||
player_clear_choices(&p1);
|
||||
player_clear_choices(&p2);
|
||||
while (true) {
|
||||
player_take_turn(&p1, &board);
|
||||
if (board_player_has_won(board, Ch_Cross)) {
|
||||
p1_wins += 1;
|
||||
player_reward_win(&p1);
|
||||
player_punish_loss(&p2);
|
||||
break;
|
||||
}
|
||||
if (board_is_draw(board)) {
|
||||
draws += 1;
|
||||
player_reward_or_punish(&p1, rand() % 5 - 2);
|
||||
player_reward_or_punish(&p2, rand() % 5 - 2);
|
||||
break;
|
||||
}
|
||||
player_take_turn(&p2, &board);
|
||||
if (board_player_has_won(board, Ch_Circle)) {
|
||||
p2_wins += 1;
|
||||
player_reward_win(&p2);
|
||||
player_punish_loss(&p1);
|
||||
break;
|
||||
}
|
||||
if (board_is_draw(board)) {
|
||||
draws += 1;
|
||||
player_reward_or_punish(&p1, rand() % 5 - 2);
|
||||
player_reward_or_punish(&p2, rand() % 5 - 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
games += 1;
|
||||
}
|
||||
|
||||
printf("Games %d, Wins: P1: %d, P2: %d, Draws: %d\n", games, p1_wins,
|
||||
p2_wins, draws);
|
||||
|
||||
while (true) {
|
||||
printf("\n\nNew game\n");
|
||||
Board board = 0;
|
||||
player_clear_choices(&p1);
|
||||
while (true) {
|
||||
printf("P1's turn\n");
|
||||
player_take_turn(&p1, &board);
|
||||
print_board(board);
|
||||
if (board_player_has_won(board, Ch_Cross)) {
|
||||
fputs("\x1b[1;91mYou lost!\x1b[0m", stdout);
|
||||
break;
|
||||
}
|
||||
if (board_is_draw(board)) {
|
||||
fputs("\x1b[1;93mDraw!\x1b[0m", stdout);
|
||||
break;
|
||||
}
|
||||
printf("Your turn (0..8)\n> ");
|
||||
uint32_t possible_places = board_possible_places(board);
|
||||
int choice = 0;
|
||||
while (true) {
|
||||
int r = scanf("%d", &choice);
|
||||
if (r != 1 || (possible_places >> choice & 1) == 0) {
|
||||
printf("invalid choice\n");
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
board = board_set_place(board, Ch_Circle, choice);
|
||||
print_board(board);
|
||||
if (board_player_has_won(board, Ch_Circle)) {
|
||||
fputs("\x1b[1;94mYou won!\x1b[0m", stdout);
|
||||
break;
|
||||
}
|
||||
if (board_is_draw(board)) {
|
||||
fputs("\x1b[1;93mDraw!\x1b[0m", stdout);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user