#include #include #include #include #include #include #include #include /// /// The board looks like this: /// ############# /// # 0 | 1 | 2 # /// #---+---+---# /// # 3 | 4 | 5 # /// #---+---+---# /// # 6 | 7 | 8 # /// ############# /// /// A board is represented by an integer. /// A board consists of Chs. Each tile is 2 bits. /// /// Example: /// /// ``` /// 0b00000000000000000000000000000000 /// ^^ ^^ - tile 0 /// ++-- tile 2 /// ``` /// /// The tile position is also called it's index or `idx`. /// typedef uint32_t Board; /// /// Ch - character/tile/player /// /// Originally this was string-characters, ie. `' '`, `'X'` and `'O'`. /// For performance reasons, it's now a 2 bit value. /// typedef enum { Ch_Empty, Ch_Cross, Ch_Circle, } Ch; const size_t ch_size = 2; const size_t ch_mask = 3; /// /// Get all possible places, meaning empty tiles. /// The result is an integer where each bit represents a tile in it's place. /// uint32_t board_possible_places(Board board) { uint32_t set = 0; for (uint32_t i = 0; i < 9; ++i) { Ch ch = board >> i * ch_size & ch_mask; if (ch == Ch_Empty) { set |= 1 << i; } } return set; } Board board_set_place(Board board, Ch ch, uint32_t idx) { return board | (ch << idx * ch_size); } static inline Board board_transform(Board board, const uint32_t idcs[static 9]) { Board result = 0; for (uint32_t i = 0; i < 9; ++i) { Ch ch = board >> idcs[i] * ch_size & ch_mask; result |= ch << i * ch_size; } return result; } /// Rotate the board 3 hours clockwise. Board board_rotate(Board board) { const uint32_t idcs[9] = { 6, 3, 0, 7, 4, 1, 8, 5, 2 }; return board_transform(board, idcs); } /// Flip board vertically. Board board_flip(Board board) { const uint32_t idcs[9] = { 6, 7, 8, 3, 4, 5, 0, 1, 2 }; return board_transform(board, idcs); } Ch board_place(Board board, uint32_t idx) { return board >> idx * ch_size & ch_mask; } bool board_player_has_won(Board board, Ch ch) { const uint32_t combos[][3] = { // horizontal { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, // vertical { 0, 3, 6 }, { 1, 4, 7 }, { 2, 5, 8 }, // diagonal { 0, 4, 8 }, { 2, 4, 6 }, }; const size_t combos_size = sizeof(combos) / sizeof(combos[0]); for (size_t i = 0; i < combos_size; ++i) { bool all = true; for (int j = 0; j < 3; ++j) { if (board_place(board, combos[i][j]) != ch) { all = false; } } if (all) { return true; } } return false; } bool board_is_draw(Board board) { // return board_possible_places(board) == 0; } void print_board(Board board) { fputs("#############\n", stdout); for (int y = 0; y < 3; ++y) { if (y != 0) { fputs("#---+---+---#\n", stdout); } for (int x = 0; x < 3; ++x) { if (x == 0) { fputs("# ", stdout); } else { fputs(" | ", stdout); } uint32_t i = y * 3 + x; Ch ch = board >> i * ch_size & ch_mask; switch (ch) { case Ch_Empty: printf("\x1b[0;37m%d\x1b[0m", i); break; case Ch_Cross: fputs("\x1b[1;91mX\x1b[0m", stdout); break; case Ch_Circle: fputs("\x1b[1;94mO\x1b[0m", stdout); break; } if (x == 2) { fputs(" #\n", stdout); } } } fputs("#############\n", stdout); } /// Internal structure used in `Choices`. typedef struct { Board board; int weight; } ChoiceWeight; /// /// An arbitrarily indexable array of elements. /// The index type is a `Board` the element type is a weight as `int`. /// This collection is optimized for lookups. It does this, by sorting at /// insertion and therefore being able to use binary search. /// /// This collection functions like the following Python example: /// ```py /// data: dict[int, int] = {} /// /// data[4] = 2 /// data[3] = 1 /// /// data == { 3: 1, 4: 2 } /// ``` /// typedef struct { ChoiceWeight* data; size_t size; size_t capacity; } Choices; int choices_contruct(Choices* choices) { const size_t initial_capacity = 256; ChoiceWeight* data = malloc(initial_capacity * sizeof(ChoiceWeight)); if (!data) { return -1; } *choices = (Choices) { .data = data, .size = 0, .capacity = initial_capacity, }; return 0; } void choices_destroy(Choices* choices) { // free(choices->data); } /// `end` is exclusive. static inline size_t choices_search_idx( const Choices* choices, Board board, size_t begin, size_t end) { if (end == begin) { return begin; } size_t middle = begin + (end - begin) / 2; Board b = choices->data[middle].board; if (b > board) { return choices_search_idx(choices, board, begin, middle); } else if (b < board) { return choices_search_idx(choices, board, middle + 1, end); } else { return middle; } } int choices_insert(Choices* choices, Board board, int weight) { if (choices->size + 1 >= choices->capacity) { size_t new_capacity = choices->capacity * 2; ChoiceWeight* data = realloc(choices->data, new_capacity * sizeof(ChoiceWeight)); if (!data) { return -1; } choices->data = data; choices->capacity = new_capacity; } size_t idx = choices_search_idx(choices, board, 0, choices->size); ChoiceWeight elem = { board, weight }; for (size_t i = choices->size; i > idx; --i) { choices->data[i] = choices->data[i - 1]; } choices->data[idx] = elem; choices->size += 1; return 0; } /// `end` is exclusive. static inline int* choices_search_weight( const Choices* choices, Board board, size_t begin, size_t end) { if (end == begin) { return NULL; } size_t middle = begin + (end - begin) / 2; Board b = choices->data[middle].board; if (b > board) { return choices_search_weight(choices, board, begin, middle); } else if (b < board) { return choices_search_weight(choices, board, middle + 1, end); } else { return &choices->data[middle].weight; } } /// Returns a pointer to the found weight `int`, or `NULL` if none is found. int* choices_get(const Choices* choices, Board board) { return choices_search_weight(choices, board, 0, choices->size); } bool choices_has(const Choices* choices, Board board) { return choices_search_weight(choices, board, 0, choices->size) != NULL; } void print_choices(const Choices* choices) { printf("i\tboard\tweight\t\n"); for (size_t i = 0; i < choices->size; ++i) { printf("%lu\t%d\t%u\n", i, choices->data[i].board, choices->data[i].weight); } } const int win_reward = 1; const int loss_punishment = 1; const int cand_tolerance = 3; /// /// A `Player` represents a simulated player, ie. who's gonna play the game /// and train. /// typedef struct { /// The decision/choice weight of every possible combination. /// When a novel board choice is discovered, the choice is added with /// default weight. /// /// This is essentially the trained model. /// /// Choices are normalized or interned in this collection, meaning that /// every board that can be mirrored or rotated is only stored once. /// A visual example of a 2x2 grid: /// ``` /// a: 0 - b: - 0 /// - - - - /// ``` /// `a` and `b` are identical apart from rotation. Choices (meaning board) /// get stored as one of such variations. Which one depends on /// earliest occurence. Choices choices; /// What brick does the player use. Ch ch; /// In a given game, all the decisions are stored in this array. /// This is used to reward and punish, meaning add or remove weight for all /// choices in that game. Board current_choices[9]; size_t current_choices_size; } Player; int player_construct(Player* player, Ch ch) { *player = (Player) { .choices = NULL, .ch = ch, .current_choices = { (Board) { 0 } }, .current_choices_size = 0, }; int res = choices_contruct(&player->choices); if (res != 0) return res; return 0; } void player_destroy(Player* player) { // choices_destroy(&player->choices); } void player_clear_choices(Player* player) { // player->current_choices_size = 0; } void player_reward_win(Player* player) { for (size_t i = 0; i < player->current_choices_size; ++i) { int* weigth = choices_get(&player->choices, player->current_choices[i]); *weigth += win_reward; } } void player_punish_loss(Player* player) { for (size_t i = 0; i < player->current_choices_size; ++i) { int* weigth = choices_get(&player->choices, player->current_choices[i]); *weigth -= loss_punishment; } } void player_reward_or_punish(Player* player, int reward) { for (size_t i = 0; i < player->current_choices_size; ++i) { int* weigth = choices_get(&player->choices, player->current_choices[i]); *weigth += reward; } } /// /// Find existing board or variation of board in `Player`'s `choices`. /// All variations of the `board` will be tested, to see if an entry exists. /// If no variation is found, it is added with a default starting weight. Board player_intern_choice(Player* player, Board board) { for (int i = 0; i < 2; ++i) { for (int j = 0; j < 4; ++j) { if (choices_has(&player->choices, board)) { return board; } board = board_rotate(board); } board = board_flip(board); } choices_insert(&player->choices, board, 0); return board; } void player_take_turn(Player* player, Board* board) { typedef struct { uint32_t idx; Board board; } ChoiceCand; uint32_t possible_places = board_possible_places(*board); int cand_weight = INT_MIN; ChoiceCand cands[9] = { 0 }; size_t cands_size = 0; for (uint32_t idx = 0; idx < 9; ++idx) { if ((possible_places >> idx & 1) == 0) continue; Board pb = board_set_place(*board, player->ch, idx); Board choice = player_intern_choice(player, pb); int weight = *choices_get(&player->choices, choice); if (weight > cand_weight + cand_tolerance) { cand_weight = weight; cands[0] = (ChoiceCand) { idx, choice }; cands_size = 1; } else if (weight == cand_weight) { cands[cands_size] = (ChoiceCand) { idx, choice }; cands_size += 1; } } if (cands_size == 0) { fputs("no candidates\n", stderr); exit(EXIT_FAILURE); } ChoiceCand cand; if (cands_size == 1) { cand = cands[0]; } else { cand = cands[rand() % (cands_size - 1)]; } player->current_choices[player->current_choices_size] = cand.board; player->current_choices_size += 1; *board = board_set_place(*board, player->ch, cand.idx); } int main(void) { srand(time(NULL)); Player p1; player_construct(&p1, Ch_Cross); Player p2; player_construct(&p2, Ch_Circle); int games = 0; int p1_wins = 0; int p2_wins = 0; int draws = 0; printf("Training P1 against P2...\n"); const int traning_iterations = 1000000; for (int i = 0; i < traning_iterations; ++i) { Board board = 0; player_clear_choices(&p1); player_clear_choices(&p2); while (true) { player_take_turn(&p1, &board); if (board_player_has_won(board, Ch_Cross)) { p1_wins += 1; player_reward_win(&p1); player_punish_loss(&p2); break; } if (board_is_draw(board)) { draws += 1; player_reward_or_punish(&p1, rand() % 5 - 2); player_reward_or_punish(&p2, rand() % 5 - 2); break; } player_take_turn(&p2, &board); if (board_player_has_won(board, Ch_Circle)) { p2_wins += 1; player_reward_win(&p2); player_punish_loss(&p1); break; } if (board_is_draw(board)) { draws += 1; player_reward_or_punish(&p1, rand() % 5 - 2); player_reward_or_punish(&p2, rand() % 5 - 2); break; } } games += 1; } printf("Games %d, Wins: P1: %d, P2: %d, Draws: %d\n", games, p1_wins, p2_wins, draws); while (true) { printf("\n\nNew game\n"); Board board = 0; player_clear_choices(&p1); while (true) { printf("P1's turn\n"); player_take_turn(&p1, &board); print_board(board); if (board_player_has_won(board, Ch_Cross)) { fputs("\x1b[1;91mYou lost!\x1b[0m", stdout); break; } if (board_is_draw(board)) { fputs("\x1b[1;93mDraw!\x1b[0m", stdout); break; } printf("Your turn (0..8)\n> "); uint32_t possible_places = board_possible_places(board); int choice = 0; while (true) { int r = scanf("%d", &choice); if (r != 1 || (possible_places >> choice & 1) == 0) { printf("invalid choice\n"); } else { break; } } board = board_set_place(board, Ch_Circle, choice); print_board(board); if (board_player_has_won(board, Ch_Circle)) { fputs("\x1b[1;94mYou won!\x1b[0m", stdout); break; } if (board_is_draw(board)) { fputs("\x1b[1;93mDraw!\x1b[0m", stdout); break; } } } }