From dfb025fa5197dd298974c27c1cb2b06d798d3ca4 Mon Sep 17 00:00:00 2001 From: sfja Date: Wed, 19 Feb 2025 00:02:11 +0100 Subject: [PATCH] rewrite in c --- .clang-format | 6 + .gitignore | 2 + Makefile | 23 ++ compile_flags.txt | 8 + main.c | 538 ++++++++++++++++++++++++++++++++++++++++++++++ main.py | 2 +- 6 files changed, 578 insertions(+), 1 deletion(-) create mode 100644 .clang-format create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 compile_flags.txt create mode 100644 main.c diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..8929d9b --- /dev/null +++ b/.clang-format @@ -0,0 +1,6 @@ +Language: Cpp +BasedOnStyle: WebKit +IndentWidth: 4 +ColumnLimit: 80 +IndentCaseLabels: true + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..106e23d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +main.o +program diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c71b84d --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ + +CFLAGS = -O3 -std=c17 -Wall -Wextra -Wpedantic -pedantic -pedantic-errors + +ALL_FLAGS = \ + +# -fsanitize=address,undefined + +HEADERS = $(wildcard *.h) + +CFILES = \ + main.c + +OFILES = $(patsubst %.c,%.o,$(CFILES)) + +program: $(OFILES) + gcc -o $@ $^ $(ALL_FLAGS) + +%.o: %.c $(HEADERS) + gcc $< -c -o $@ $(CFLAGS) $(ALL_FLAGS) + +clean: + rm -rf *.o program + diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..b8fdb5c --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,8 @@ +-xc +-std=c17 +-Wall +-Wextra +-Wpedantic +-pedantic +-pedantic-errors + diff --git a/main.c b/main.c new file mode 100644 index 0000000..f8fb9e3 --- /dev/null +++ b/main.c @@ -0,0 +1,538 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/// +/// The board looks like this: +/// ############# +/// # 0 | 1 | 2 # +/// #---+---+---# +/// # 3 | 4 | 5 # +/// #---+---+---# +/// # 6 | 7 | 8 # +/// ############# +/// +/// A board is represented by an integer. +/// A board consists of Chs. Each tile is 2 bits. +/// +/// Example: +/// +/// ``` +/// 0b00000000000000000000000000000000 +/// ^^ ^^ - tile 0 +/// ++-- tile 2 +/// ``` +/// +/// The tile position is also called it's index or `idx`. +/// +typedef uint32_t Board; + +/// +/// Ch - character/tile/player +/// +/// Originally this was string-characters, ie. `' '`, `'X'` and `'O'`. +/// For performance reasons, it's now a 2 bit value. +/// +typedef enum { + Ch_Empty, + Ch_Cross, + Ch_Circle, +} Ch; + +const size_t ch_size = 2; +const size_t ch_mask = 3; + +/// +/// Get all possible places, meaning empty tiles. +/// The result is an integer where each bit represents a tile in it's place. +/// +uint32_t board_possible_places(Board board) +{ + uint32_t set = 0; + for (uint32_t i = 0; i < 9; ++i) { + Ch ch = board >> i * ch_size & ch_mask; + if (ch == Ch_Empty) { + set |= 1 << i; + } + } + return set; +} + +Board board_set_place(Board board, Ch ch, uint32_t idx) +{ + return board | (ch << idx * ch_size); +} + +static inline Board board_transform(Board board, const uint32_t idcs[static 9]) +{ + Board result = 0; + for (uint32_t i = 0; i < 9; ++i) { + Ch ch = board >> idcs[i] * ch_size & ch_mask; + result |= ch << i * ch_size; + } + return result; +} + +/// Rotate the board 3 hours clockwise. +Board board_rotate(Board board) +{ + const uint32_t idcs[9] = { 6, 3, 0, 7, 4, 1, 8, 5, 2 }; + return board_transform(board, idcs); +} + +/// Flip board vertically. +Board board_flip(Board board) +{ + const uint32_t idcs[9] = { 6, 7, 8, 3, 4, 5, 0, 1, 2 }; + return board_transform(board, idcs); +} + +Ch board_place(Board board, uint32_t idx) +{ + return board >> idx * ch_size & ch_mask; +} + +bool board_player_has_won(Board board, Ch ch) +{ + const uint32_t combos[][3] = { + // horizontal + { 0, 1, 2 }, + { 3, 4, 5 }, + { 6, 7, 8 }, + // vertical + { 0, 3, 6 }, + { 1, 4, 7 }, + { 2, 5, 8 }, + // diagonal + { 0, 4, 8 }, + { 2, 4, 6 }, + }; + const size_t combos_size = sizeof(combos) / sizeof(combos[0]); + for (size_t i = 0; i < combos_size; ++i) { + bool all = true; + for (int j = 0; j < 3; ++j) { + if (board_place(board, combos[i][j]) != ch) { + all = false; + } + } + if (all) { + return true; + } + } + return false; +} + +bool board_is_draw(Board board) +{ + // + return board_possible_places(board) == 0; +} + +void print_board(Board board) +{ + fputs("#############\n", stdout); + for (int y = 0; y < 3; ++y) { + if (y != 0) { + fputs("#---+---+---#\n", stdout); + } + for (int x = 0; x < 3; ++x) { + if (x == 0) { + fputs("# ", stdout); + } else { + fputs(" | ", stdout); + } + uint32_t i = y * 3 + x; + Ch ch = board >> i * ch_size & ch_mask; + switch (ch) { + case Ch_Empty: + printf("\x1b[0;37m%d\x1b[0m", i); + break; + case Ch_Cross: + fputs("\x1b[1;91mX\x1b[0m", stdout); + break; + case Ch_Circle: + fputs("\x1b[1;94mO\x1b[0m", stdout); + break; + } + if (x == 2) { + fputs(" #\n", stdout); + } + } + } + fputs("#############\n", stdout); +} + +/// Internal structure used in `Choices`. +typedef struct { + Board board; + int weight; +} ChoiceWeight; + +/// +/// An arbitrarily indexable array of elements. +/// The index type is a `Board` the element type is a weight as `int`. +/// This collection is optimized for lookups. It does this, by sorting at +/// insertion and therefore being able to use binary search. +/// +/// This collection functions like the following Python example: +/// ```py +/// data: dict[int, int] = {} +/// +/// data[4] = 2 +/// data[3] = 1 +/// +/// data == { 3: 1, 4: 2 } +/// ``` +/// +typedef struct { + ChoiceWeight* data; + size_t size; + size_t capacity; +} Choices; + +int choices_contruct(Choices* choices) +{ + const size_t initial_capacity = 256; + ChoiceWeight* data = malloc(initial_capacity * sizeof(ChoiceWeight)); + if (!data) { + return -1; + } + *choices = (Choices) { + .data = data, + .size = 0, + .capacity = initial_capacity, + }; + return 0; +} + +void choices_destroy(Choices* choices) +{ + // + free(choices->data); +} + +/// `end` is exclusive. +static inline size_t choices_search_idx( + const Choices* choices, Board board, size_t begin, size_t end) +{ + if (end == begin) { + return begin; + } + size_t middle = begin + (end - begin) / 2; + Board b = choices->data[middle].board; + if (b > board) { + return choices_search_idx(choices, board, begin, middle); + } else if (b < board) { + return choices_search_idx(choices, board, middle + 1, end); + } else { + return middle; + } +} + +int choices_insert(Choices* choices, Board board, int weight) +{ + if (choices->size + 1 >= choices->capacity) { + size_t new_capacity = choices->capacity * 2; + ChoiceWeight* data + = realloc(choices->data, new_capacity * sizeof(ChoiceWeight)); + if (!data) { + return -1; + } + choices->data = data; + choices->capacity = new_capacity; + } + size_t idx = choices_search_idx(choices, board, 0, choices->size); + ChoiceWeight elem = { board, weight }; + for (size_t i = choices->size; i > idx; --i) { + choices->data[i] = choices->data[i - 1]; + } + choices->data[idx] = elem; + choices->size += 1; + return 0; +} + +/// `end` is exclusive. +static inline int* choices_search_weight( + const Choices* choices, Board board, size_t begin, size_t end) +{ + if (end == begin) { + return NULL; + } + size_t middle = begin + (end - begin) / 2; + Board b = choices->data[middle].board; + if (b > board) { + return choices_search_weight(choices, board, begin, middle); + } else if (b < board) { + return choices_search_weight(choices, board, middle + 1, end); + } else { + return &choices->data[middle].weight; + } +} + +/// Returns a pointer to the found weight `int`, or `NULL` if none is found. +int* choices_get(const Choices* choices, Board board) +{ + return choices_search_weight(choices, board, 0, choices->size); +} + +bool choices_has(const Choices* choices, Board board) +{ + return choices_search_weight(choices, board, 0, choices->size) != NULL; +} + +void print_choices(const Choices* choices) +{ + printf("i\tboard\tweight\t\n"); + for (size_t i = 0; i < choices->size; ++i) { + printf("%lu\t%d\t%u\n", i, choices->data[i].board, + choices->data[i].weight); + } +} + +const int win_reward = 1; +const int loss_punishment = 1; +const int cand_tolerance = 3; + +/// +/// A `Player` represents a simulated player, ie. who's gonna play the game +/// and train. +/// +typedef struct { + /// The decision/choice weight of every possible combination. + /// When a novel board choice is discovered, the choice is added with + /// default weight. + /// + /// This is essentially the trained model. + /// + /// Choices are normalized or interned in this collection, meaning that + /// every board that can be mirrored or rotated is only stored once. + /// A visual example of a 2x2 grid: + /// ``` + /// a: 0 - b: - 0 + /// - - - - + /// ``` + /// `a` and `b` are identical apart from rotation. Choices (meaning board) + /// get stored as one of such variations. Which one depends on + /// earliest occurence. + Choices choices; + /// What brick does the player use. + Ch ch; + /// In a given game, all the decisions are stored in this array. + /// This is used to reward and punish, meaning add or remove weight for all + /// choices in that game. + Board current_choices[9]; + size_t current_choices_size; +} Player; + +int player_construct(Player* player, Ch ch) +{ + *player = (Player) { + .choices = NULL, + .ch = ch, + .current_choices = { (Board) { 0 } }, + .current_choices_size = 0, + }; + int res = choices_contruct(&player->choices); + if (res != 0) + return res; + return 0; +} + +void player_destroy(Player* player) +{ + // + choices_destroy(&player->choices); +} + +void player_clear_choices(Player* player) +{ + // + player->current_choices_size = 0; +} + +void player_reward_win(Player* player) +{ + for (size_t i = 0; i < player->current_choices_size; ++i) { + int* weigth = choices_get(&player->choices, player->current_choices[i]); + *weigth += win_reward; + } +} + +void player_punish_loss(Player* player) +{ + for (size_t i = 0; i < player->current_choices_size; ++i) { + int* weigth = choices_get(&player->choices, player->current_choices[i]); + *weigth -= loss_punishment; + } +} + +void player_reward_or_punish(Player* player, int reward) +{ + for (size_t i = 0; i < player->current_choices_size; ++i) { + int* weigth = choices_get(&player->choices, player->current_choices[i]); + *weigth += reward; + } +} + +/// +/// Find existing board or variation of board in `Player`'s `choices`. +/// All variations of the `board` will be tested, to see if an entry exists. +/// If no variation is found, it is added with a default starting weight. +Board player_intern_choice(Player* player, Board board) +{ + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 4; ++j) { + if (choices_has(&player->choices, board)) { + return board; + } + board = board_rotate(board); + } + board = board_flip(board); + } + choices_insert(&player->choices, board, 0); + return board; +} + +void player_take_turn(Player* player, Board* board) +{ + typedef struct { + uint32_t idx; + Board board; + } ChoiceCand; + + uint32_t possible_places = board_possible_places(*board); + int cand_weight = INT_MIN; + ChoiceCand cands[9] = { 0 }; + size_t cands_size = 0; + for (uint32_t idx = 0; idx < 9; ++idx) { + if ((possible_places >> idx & 1) == 0) + continue; + Board pb = board_set_place(*board, player->ch, idx); + Board choice = player_intern_choice(player, pb); + int weight = *choices_get(&player->choices, choice); + if (weight > cand_weight + cand_tolerance) { + cand_weight = weight; + cands[0] = (ChoiceCand) { idx, choice }; + cands_size = 1; + } else if (weight == cand_weight) { + cands[cands_size] = (ChoiceCand) { idx, choice }; + cands_size += 1; + } + } + if (cands_size == 0) { + fputs("no candidates\n", stderr); + exit(EXIT_FAILURE); + } + ChoiceCand cand; + if (cands_size == 1) { + cand = cands[0]; + } else { + cand = cands[rand() % (cands_size - 1)]; + } + player->current_choices[player->current_choices_size] = cand.board; + player->current_choices_size += 1; + *board = board_set_place(*board, player->ch, cand.idx); +} + +int main(void) +{ + srand(time(NULL)); + + Player p1; + player_construct(&p1, Ch_Cross); + + Player p2; + player_construct(&p2, Ch_Circle); + + int games = 0; + int p1_wins = 0; + int p2_wins = 0; + int draws = 0; + + printf("Training P1 against P2...\n"); + + const int traning_iterations = 1000000; + + for (int i = 0; i < traning_iterations; ++i) { + Board board = 0; + player_clear_choices(&p1); + player_clear_choices(&p2); + while (true) { + player_take_turn(&p1, &board); + if (board_player_has_won(board, Ch_Cross)) { + p1_wins += 1; + player_reward_win(&p1); + player_punish_loss(&p2); + break; + } + if (board_is_draw(board)) { + draws += 1; + player_reward_or_punish(&p1, rand() % 5 - 2); + player_reward_or_punish(&p2, rand() % 5 - 2); + break; + } + player_take_turn(&p2, &board); + if (board_player_has_won(board, Ch_Circle)) { + p2_wins += 1; + player_reward_win(&p2); + player_punish_loss(&p1); + break; + } + if (board_is_draw(board)) { + draws += 1; + player_reward_or_punish(&p1, rand() % 5 - 2); + player_reward_or_punish(&p2, rand() % 5 - 2); + break; + } + } + games += 1; + } + + printf("Games %d, Wins: P1: %d, P2: %d, Draws: %d\n", games, p1_wins, + p2_wins, draws); + + while (true) { + printf("\n\nNew game\n"); + Board board = 0; + player_clear_choices(&p1); + while (true) { + printf("P1's turn\n"); + player_take_turn(&p1, &board); + print_board(board); + if (board_player_has_won(board, Ch_Cross)) { + fputs("\x1b[1;91mYou lost!\x1b[0m", stdout); + break; + } + if (board_is_draw(board)) { + fputs("\x1b[1;93mDraw!\x1b[0m", stdout); + break; + } + printf("Your turn (0..8)\n> "); + uint32_t possible_places = board_possible_places(board); + int choice = 0; + while (true) { + int r = scanf("%d", &choice); + if (r != 1 || (possible_places >> choice & 1) == 0) { + printf("invalid choice\n"); + } else { + break; + } + } + board = board_set_place(board, Ch_Circle, choice); + print_board(board); + if (board_player_has_won(board, Ch_Circle)) { + fputs("\x1b[1;94mYou won!\x1b[0m", stdout); + break; + } + if (board_is_draw(board)) { + fputs("\x1b[1;93mDraw!\x1b[0m", stdout); + break; + } + } + } +} diff --git a/main.py b/main.py index 006b342..5ec90e2 100644 --- a/main.py +++ b/main.py @@ -169,7 +169,7 @@ def main(): print("Training P1 against P2...") - for _ in range(1000): + for _ in range(100000): board = Board() p1.clear_choices() p2.clear_choices()