2024-12-20 16:23:20 +04:00

3440 lines
71 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Крестики-нолики: https://github.com/nczempin/gym-tic-tac-toe"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import gymnasium as gym\n",
"from gymnasium import spaces\n",
"\n",
"class TicTacToeEnv(gym.Env):\n",
" metadata = {'render.modes': ['human']}\n",
" \n",
" symbols = ['O', ' ', 'X']\n",
"\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.action_space = spaces.Discrete(9) \n",
" self.observation_space = spaces.Discrete(9 * 3 * 2) \n",
" self.reset()\n",
"\n",
" def step(self, action):\n",
" done = False\n",
" reward = 0\n",
"\n",
" p, square = action\n",
"\n",
" board = self.state['board']\n",
" proposed = board[square] \n",
" om = self.state['on_move'] \n",
" if proposed != 0:\n",
" print(f\"Незаконный ход: Квадрат {square} уже занят.\")\n",
" done = True\n",
" reward = -1 * om \n",
" if p != om:\n",
" print(f\"Незаконный ход: игрок {p} не находится в движении\")\n",
" done = True\n",
" reward = -1 * om\n",
" else:\n",
" board[square] = p\n",
" self.state['on_move'] = -p\n",
"\n",
" for i in range(3):\n",
" if (board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) or \\\n",
" (board[i] == p and board[i + 3] == p and board[i + 6] == p):\n",
" reward = p\n",
" done = True\n",
" break\n",
"\n",
" if (board[0] == p and board[4] == p and board[8] == p) or \\\n",
" (board[2] == p and board[4] == p and board[6] == p):\n",
" reward = p\n",
" done = True\n",
" \n",
" return self.state, reward, done, {}\n",
"\n",
" def reset(self):\n",
" self.state = {}\n",
" self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] \n",
" self.state['on_move'] = 1 \n",
" return self.state, {}\n",
"\n",
" def render(self, close=False):\n",
" if close:\n",
" return\n",
" print(\"on move: \" , self.symbols[self.state['on_move']+1])\n",
" for i in range (9):\n",
" print (self.symbols[self.state['board'][i]+1], end=\" \")\n",
" if ((i % 3) == 2):\n",
" print()\n",
"\n",
" def move_generator(self):\n",
" moves = []\n",
" for i in range(9):\n",
" if self.state['board'][i] == 0:\n",
" p = self.state['on_move']\n",
" m = [p, i]\n",
" moves.append(m)\n",
" return moves"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"class Agent:\n",
" def __init__(self, symbol):\n",
" self.symbol = symbol\n",
" \n",
" def get_action(self, moves):\n",
" return random.choice(moves) "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O \n",
" X \n",
"X X O \n",
"on move: O\n",
"O O \n",
"X X \n",
"X X O \n",
"on move: X\n",
"O O O \n",
"X X \n",
"X X O \n",
"Episode 1, Total Reward: -1\n",
"Average Reward: -1.0\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" O X \n",
" X \n",
" O \n",
"on move: O\n",
" O X \n",
" X \n",
" X O \n",
"on move: X\n",
"O O X \n",
" X \n",
" X O \n",
"on move: O\n",
"O O X \n",
"X X \n",
" X O \n",
"on move: X\n",
"O O X \n",
"X X O \n",
" X O \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X X O \n",
"Episode 2, Total Reward: 1\n",
"Average Reward: 0.0\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
" O X \n",
" \n",
" X \n",
"on move: X\n",
" O X \n",
" O \n",
" X \n",
"on move: O\n",
" O X \n",
" O \n",
"X X \n",
"on move: X\n",
" O X \n",
"O O \n",
"X X \n",
"on move: O\n",
" O X \n",
"O O \n",
"X X X \n",
"Episode 3, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" \n",
" X X \n",
"on move: X\n",
"O O \n",
" \n",
" X X \n",
"on move: O\n",
"O O X \n",
" \n",
" X X \n",
"on move: X\n",
"O O X \n",
" O \n",
" X X \n",
"on move: O\n",
"O O X \n",
"X O \n",
" X X \n",
"on move: X\n",
"O O X \n",
"X O O \n",
" X X \n",
"on move: O\n",
"O O X \n",
"X O O \n",
"X X X \n",
"Episode 4, Total Reward: 1\n",
"Average Reward: 0.5\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
"X X \n",
" O \n",
"X O \n",
"on move: X\n",
"X X \n",
" O \n",
"X O O \n",
"on move: O\n",
"X X \n",
"X O \n",
"X O O \n",
"Episode 5, Total Reward: 1\n",
"Average Reward: 0.6\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
"O X \n",
" \n",
"X O \n",
"on move: O\n",
"O X \n",
"X \n",
"X O \n",
"on move: X\n",
"O X O \n",
"X \n",
"X O \n",
"on move: O\n",
"O X O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O X O \n",
"X O X \n",
"X O \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 6, Total Reward: 0\n",
"Average Reward: 0.5\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" \n",
"X O X \n",
"on move: X\n",
" O \n",
" \n",
"X O X \n",
"on move: O\n",
" O \n",
" X \n",
"X O X \n",
"on move: X\n",
" O \n",
"O X \n",
"X O X \n",
"on move: O\n",
"X O \n",
"O X \n",
"X O X \n",
"on move: X\n",
"X O O \n",
"O X \n",
"X O X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Episode 7, Total Reward: 1\n",
"Average Reward: 0.5714285714285714\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
" O \n",
"O X \n",
"on move: O\n",
"X X \n",
" O \n",
"O X \n",
"on move: X\n",
"X X O \n",
" O \n",
"O X \n",
"on move: O\n",
"X X O \n",
" O \n",
"O X X \n",
"on move: X\n",
"X X O \n",
" O O \n",
"O X X \n",
"Episode 8, Total Reward: -1\n",
"Average Reward: 0.375\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
" X \n",
" O \n",
"on move: X\n",
"X \n",
" X \n",
" O O \n",
"on move: O\n",
"X \n",
" X X \n",
" O O \n",
"on move: X\n",
"X O \n",
" X X \n",
" O O \n",
"on move: O\n",
"X X O \n",
" X X \n",
" O O \n",
"on move: X\n",
"X X O \n",
" X X \n",
"O O O \n",
"Episode 9, Total Reward: -1\n",
"Average Reward: 0.2222222222222222\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X \n",
" X \n",
"O \n",
"on move: X\n",
"X O \n",
" X \n",
"O \n",
"on move: O\n",
"X O \n",
" X \n",
"O X \n",
"on move: X\n",
"X O \n",
"O X \n",
"O X \n",
"on move: O\n",
"X O \n",
"O X \n",
"O X X \n",
"on move: X\n",
"X O O \n",
"O X \n",
"O X X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"O X X \n",
"Episode 10, Total Reward: 1\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
" \n",
"X O O \n",
"on move: O\n",
" X \n",
" X \n",
"X O O \n",
"on move: X\n",
"O X \n",
" X \n",
"X O O \n",
"on move: O\n",
"O X \n",
" X X \n",
"X O O \n",
"on move: X\n",
"O X O \n",
" X X \n",
"X O O \n",
"on move: O\n",
"O X O \n",
"X X X \n",
"X O O \n",
"Episode 11, Total Reward: 1\n",
"Average Reward: 0.36363636363636365\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
"X \n",
" X \n",
" O \n",
"on move: X\n",
"X \n",
" O X \n",
" O \n",
"on move: O\n",
"X X \n",
" O X \n",
" O \n",
"on move: X\n",
"X X \n",
"O O X \n",
" O \n",
"on move: O\n",
"X X X \n",
"O O X \n",
" O \n",
"Episode 12, Total Reward: 1\n",
"Average Reward: 0.4166666666666667\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O \n",
"X O \n",
" X X \n",
"on move: X\n",
" O \n",
"X O \n",
"O X X \n",
"Episode 13, Total Reward: -1\n",
"Average Reward: 0.3076923076923077\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
"X O \n",
" O \n",
"X X \n",
"on move: X\n",
"X O O \n",
" O \n",
"X X \n",
"on move: O\n",
"X O O \n",
" O X \n",
"X X \n",
"on move: X\n",
"X O O \n",
" O X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"X O X \n",
"X X O \n",
"Episode 14, Total Reward: 1\n",
"Average Reward: 0.35714285714285715\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" X O \n",
" \n",
"on move: X\n",
"X \n",
"O X O \n",
" \n",
"on move: O\n",
"X \n",
"O X O \n",
" X \n",
"Episode 15, Total Reward: 1\n",
"Average Reward: 0.4\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X \n",
"O \n",
"O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Episode 16, Total Reward: 1\n",
"Average Reward: 0.4375\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X O \n",
"X O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
" X \n",
"X O \n",
"X O O \n",
"on move: O\n",
" X \n",
"X O X \n",
"X O O \n",
"on move: X\n",
"O X \n",
"X O X \n",
"X O O \n",
"Episode 17, Total Reward: -1\n",
"Average Reward: 0.35294117647058826\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
"X X \n",
" O \n",
"X O \n",
"on move: X\n",
"X X \n",
" O \n",
"X O O \n",
"on move: O\n",
"X X \n",
"X O \n",
"X O O \n",
"Episode 18, Total Reward: 1\n",
"Average Reward: 0.3888888888888889\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X X \n",
" X \n",
"O O O \n",
"Episode 19, Total Reward: -1\n",
"Average Reward: 0.3157894736842105\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
"O X \n",
"on move: O\n",
"X O \n",
" X \n",
"O X \n",
"on move: X\n",
"X O \n",
"O X \n",
"O X \n",
"on move: O\n",
"X O \n",
"O X \n",
"O X X \n",
"on move: X\n",
"X O O \n",
"O X \n",
"O X X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"O X X \n",
"Episode 20, Total Reward: 1\n",
"Average Reward: 0.35\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X X \n",
" \n",
"on move: X\n",
" O \n",
"O X X \n",
" \n",
"on move: O\n",
" X O \n",
"O X X \n",
" \n",
"on move: X\n",
"O X O \n",
"O X X \n",
" \n",
"on move: O\n",
"O X O \n",
"O X X \n",
" X \n",
"on move: X\n",
"O X O \n",
"O X X \n",
" O X \n",
"on move: O\n",
"O X O \n",
"O X X \n",
"X O X \n",
"Episode 21, Total Reward: 0\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O X \n",
"X \n",
" \n",
"on move: X\n",
" O X \n",
"X \n",
"O \n",
"on move: O\n",
" O X \n",
"X X \n",
"O \n",
"on move: X\n",
" O X \n",
"X X \n",
"O O \n",
"on move: O\n",
" O X \n",
"X X X \n",
"O O \n",
"Episode 22, Total Reward: 1\n",
"Average Reward: 0.36363636363636365\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
"O X \n",
" O \n",
"X \n",
"on move: O\n",
"O X \n",
" O \n",
"X X \n",
"on move: X\n",
"O X \n",
" O O \n",
"X X \n",
"on move: O\n",
"O X X \n",
" O O \n",
"X X \n",
"on move: X\n",
"O X X \n",
"O O O \n",
"X X \n",
"Episode 23, Total Reward: -1\n",
"Average Reward: 0.30434782608695654\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
" O X \n",
"X O \n",
"on move: O\n",
"X \n",
" O X \n",
"X O \n",
"on move: X\n",
"X O \n",
" O X \n",
"X O \n",
"on move: O\n",
"X O \n",
" O X \n",
"X X O \n",
"on move: X\n",
"X O O \n",
" O X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"X O X \n",
"X X O \n",
"Episode 24, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X \n",
"O \n",
"on move: O\n",
" O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O O X \n",
"X X \n",
"O X \n",
"on move: X\n",
"O O X \n",
"X X O \n",
"O X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"O X X \n",
"Episode 25, Total Reward: 0\n",
"Average Reward: 0.32\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
" X O \n",
"X O \n",
"on move: O\n",
" \n",
"X X O \n",
"X O \n",
"on move: X\n",
"O \n",
"X X O \n",
"X O \n",
"on move: O\n",
"O X \n",
"X X O \n",
"X O \n",
"Episode 26, Total Reward: 1\n",
"Average Reward: 0.34615384615384615\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
"X \n",
"O \n",
" X \n",
"on move: X\n",
"X \n",
"O \n",
"O X \n",
"on move: O\n",
"X \n",
"O \n",
"O X X \n",
"on move: X\n",
"X \n",
"O O \n",
"O X X \n",
"on move: O\n",
"X X \n",
"O O \n",
"O X X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"O X X \n",
"Episode 27, Total Reward: -1\n",
"Average Reward: 0.2962962962962963\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X \n",
" X \n",
"O X O \n",
"on move: X\n",
" X \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O X O \n",
"on move: X\n",
"X O X \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O X O \n",
"Episode 28, Total Reward: 0\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X O \n",
"X O \n",
" \n",
"on move: O\n",
"X O \n",
"X O \n",
" X \n",
"on move: X\n",
"X O \n",
"X O O \n",
" X \n",
"on move: O\n",
"X O \n",
"X O O \n",
"X X \n",
"Episode 29, Total Reward: 1\n",
"Average Reward: 0.3103448275862069\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O \n",
"X X \n",
" \n",
"on move: X\n",
"O \n",
"X O X \n",
" \n",
"on move: O\n",
"O \n",
"X O X \n",
" X \n",
"on move: X\n",
"O O \n",
"X O X \n",
" X \n",
"on move: O\n",
"O O \n",
"X O X \n",
"X X \n",
"on move: X\n",
"O O O \n",
"X O X \n",
"X X \n",
"Episode 30, Total Reward: -1\n",
"Average Reward: 0.26666666666666666\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X \n",
"X O \n",
" O \n",
"on move: O\n",
"X \n",
"X O \n",
"X O \n",
"Episode 31, Total Reward: 1\n",
"Average Reward: 0.2903225806451613\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" O X \n",
" O \n",
" X \n",
"on move: O\n",
"X O X \n",
" O \n",
" X \n",
"on move: X\n",
"X O X \n",
" O \n",
" X O \n",
"on move: O\n",
"X O X \n",
"X O \n",
" X O \n",
"on move: X\n",
"X O X \n",
"X O \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Episode 32, Total Reward: 0\n",
"Average Reward: 0.28125\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X \n",
"O X X \n",
"O O \n",
"on move: X\n",
"X X \n",
"O X X \n",
"O O O \n",
"Episode 33, Total Reward: -1\n",
"Average Reward: 0.24242424242424243\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O O \n",
" X \n",
"on move: O\n",
" X \n",
" O O \n",
"X X \n",
"on move: X\n",
" O X \n",
" O O \n",
"X X \n",
"on move: O\n",
" O X \n",
" O O \n",
"X X X \n",
"Episode 34, Total Reward: 1\n",
"Average Reward: 0.2647058823529412\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
" \n",
" X O \n",
"O X X \n",
"on move: X\n",
" O \n",
" X O \n",
"O X X \n",
"on move: O\n",
" X O \n",
" X O \n",
"O X X \n",
"Episode 35, Total Reward: 1\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O O \n",
" \n",
"X X \n",
"on move: O\n",
" O O \n",
" \n",
"X X X \n",
"Episode 36, Total Reward: 1\n",
"Average Reward: 0.3055555555555556\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
"X \n",
" X O \n",
" \n",
"on move: X\n",
"X \n",
" X O \n",
" O \n",
"on move: O\n",
"X \n",
"X X O \n",
" O \n",
"on move: X\n",
"X \n",
"X X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X X O \n",
"O O \n",
"on move: X\n",
"X X \n",
"X X O \n",
"O O O \n",
"Episode 37, Total Reward: -1\n",
"Average Reward: 0.2702702702702703\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O X \n",
"X \n",
"on move: X\n",
" \n",
"O X \n",
"X O \n",
"on move: O\n",
" \n",
"O X X \n",
"X O \n",
"on move: X\n",
"O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O X \n",
"O X X \n",
"X O \n",
"Episode 38, Total Reward: 1\n",
"Average Reward: 0.2894736842105263\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
" \n",
"X O \n",
"on move: X\n",
"X \n",
" O \n",
"X O \n",
"on move: O\n",
"X X \n",
" O \n",
"X O \n",
"on move: X\n",
"X X O \n",
" O \n",
"X O \n",
"on move: O\n",
"X X O \n",
" O \n",
"X O X \n",
"on move: X\n",
"X X O \n",
" O O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"X O O \n",
"X O X \n",
"Episode 39, Total Reward: 1\n",
"Average Reward: 0.3076923076923077\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
" O X \n",
"on move: X\n",
"O \n",
"X \n",
" O X \n",
"on move: O\n",
"O \n",
"X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"X \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X \n",
"X O X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"X O X \n",
"Episode 40, Total Reward: 0\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
" O X \n",
"on move: O\n",
" X \n",
" X O \n",
" O X \n",
"on move: X\n",
" X \n",
" X O \n",
"O O X \n",
"on move: O\n",
" X X \n",
" X O \n",
"O O X \n",
"on move: X\n",
"O X X \n",
" X O \n",
"O O X \n",
"on move: O\n",
"O X X \n",
"X X O \n",
"O O X \n",
"Episode 41, Total Reward: 0\n",
"Average Reward: 0.2926829268292683\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
" O \n",
"O \n",
"on move: O\n",
"X X X \n",
" O \n",
"O \n",
"Episode 42, Total Reward: 1\n",
"Average Reward: 0.30952380952380953\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" X \n",
" \n",
" X O \n",
"on move: X\n",
" X \n",
" O \n",
" X O \n",
"on move: O\n",
"X X \n",
" O \n",
" X O \n",
"on move: X\n",
"X X O \n",
" O \n",
" X O \n",
"on move: O\n",
"X X O \n",
" O \n",
"X X O \n",
"on move: X\n",
"X X O \n",
" O O \n",
"X X O \n",
"Episode 43, Total Reward: -1\n",
"Average Reward: 0.27906976744186046\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
" O X \n",
"X \n",
" \n",
"on move: X\n",
" O X \n",
"X \n",
" O \n",
"on move: O\n",
"X O X \n",
"X \n",
" O \n",
"on move: X\n",
"X O X \n",
"X \n",
"O O \n",
"on move: O\n",
"X O X \n",
"X X \n",
"O O \n",
"on move: X\n",
"X O X \n",
"X O X \n",
"O O \n",
"Episode 44, Total Reward: -1\n",
"Average Reward: 0.25\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X \n",
" X \n",
"O \n",
"on move: X\n",
"X \n",
" X O \n",
"O \n",
"on move: O\n",
"X \n",
" X O \n",
"O X \n",
"Episode 45, Total Reward: 1\n",
"Average Reward: 0.26666666666666666\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" X O \n",
" \n",
"on move: X\n",
"X \n",
"O X O \n",
" \n",
"on move: O\n",
"X \n",
"O X O \n",
" X \n",
"on move: X\n",
"X \n",
"O X O \n",
"O X \n",
"on move: O\n",
"X X \n",
"O X O \n",
"O X \n",
"Episode 46, Total Reward: 1\n",
"Average Reward: 0.2826086956521739\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
"O O \n",
" X \n",
"on move: O\n",
"X X \n",
"O O \n",
" X \n",
"on move: X\n",
"X X \n",
"O O \n",
" O X \n",
"on move: O\n",
"X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
"X X \n",
"O O O \n",
"X O X \n",
"Episode 47, Total Reward: -1\n",
"Average Reward: 0.2553191489361702\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" O X \n",
"on move: O\n",
" \n",
" X \n",
" O X \n",
"on move: X\n",
" O \n",
" X \n",
" O X \n",
"on move: O\n",
" X O \n",
" X \n",
" O X \n",
"on move: X\n",
" X O \n",
" O X \n",
" O X \n",
"on move: O\n",
" X O \n",
" O X \n",
"X O X \n",
"on move: X\n",
"O X O \n",
" O X \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 48, Total Reward: 0\n",
"Average Reward: 0.25\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X \n",
"O \n",
"on move: O\n",
" O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O O \n",
"X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"X X O \n",
"O X X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"O X X \n",
"Episode 49, Total Reward: 0\n",
"Average Reward: 0.24489795918367346\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O \n",
"X \n",
"X \n",
"on move: X\n",
"O \n",
"X \n",
"X O \n",
"on move: O\n",
"O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O \n",
"X X X \n",
"X O \n",
"Episode 50, Total Reward: 1\n",
"Average Reward: 0.26\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" \n",
" X O \n",
"X \n",
"on move: X\n",
" \n",
" X O \n",
"X O \n",
"on move: O\n",
" X \n",
" X O \n",
"X O \n",
"Episode 51, Total Reward: 1\n",
"Average Reward: 0.27450980392156865\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
" \n",
"X X \n",
"O O \n",
"on move: O\n",
"X \n",
"X X \n",
"O O \n",
"on move: X\n",
"X \n",
"X X \n",
"O O O \n",
"Episode 52, Total Reward: -1\n",
"Average Reward: 0.25\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" \n",
"O O \n",
"X X \n",
"on move: O\n",
" X \n",
"O O \n",
"X X \n",
"on move: X\n",
" O X \n",
"O O \n",
"X X \n",
"on move: O\n",
" O X \n",
"O O \n",
"X X X \n",
"Episode 53, Total Reward: 1\n",
"Average Reward: 0.2641509433962264\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
"O O \n",
"X X \n",
" \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X O X \n",
" X \n",
"on move: O\n",
"O O X \n",
"X O X \n",
" X \n",
"Episode 54, Total Reward: 1\n",
"Average Reward: 0.2777777777777778\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X \n",
"O \n",
"X \n",
"on move: X\n",
" X \n",
"O O \n",
"X \n",
"on move: O\n",
" X X \n",
"O O \n",
"X \n",
"on move: X\n",
" X X \n",
"O O \n",
"X O \n",
"on move: O\n",
"X X X \n",
"O O \n",
"X O \n",
"Episode 55, Total Reward: 1\n",
"Average Reward: 0.2909090909090909\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
"X O \n",
"X \n",
"on move: X\n",
" \n",
"X O \n",
"X O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
" X O \n",
"X O \n",
"X O \n",
"on move: O\n",
"X X O \n",
"X O \n",
"X O \n",
"Episode 56, Total Reward: 1\n",
"Average Reward: 0.30357142857142855\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
"O O \n",
" X X \n",
" \n",
"on move: O\n",
"O O X \n",
" X X \n",
" \n",
"on move: X\n",
"O O X \n",
" X X \n",
" O \n",
"on move: O\n",
"O O X \n",
"X X X \n",
" O \n",
"Episode 57, Total Reward: 1\n",
"Average Reward: 0.3157894736842105\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
" O \n",
"O X \n",
"on move: O\n",
"X \n",
" X O \n",
"O X \n",
"Episode 58, Total Reward: 1\n",
"Average Reward: 0.3275862068965517\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O O \n",
" X \n",
" X \n",
"on move: O\n",
"X O O \n",
" X \n",
" X \n",
"on move: X\n",
"X O O \n",
"O X \n",
" X \n",
"on move: O\n",
"X O O \n",
"O X \n",
" X X \n",
"Episode 59, Total Reward: 1\n",
"Average Reward: 0.3389830508474576\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" \n",
"O O \n",
"X X \n",
"on move: O\n",
" X \n",
"O O \n",
"X X \n",
"on move: X\n",
" X \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Episode 60, Total Reward: 1\n",
"Average Reward: 0.35\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" X \n",
"O X \n",
" \n",
"on move: X\n",
"O X \n",
"O X \n",
" \n",
"on move: O\n",
"O X \n",
"O X \n",
"X \n",
"on move: X\n",
"O O X \n",
"O X \n",
"X \n",
"on move: O\n",
"O O X \n",
"O X \n",
"X X \n",
"Episode 61, Total Reward: 1\n",
"Average Reward: 0.36065573770491804\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" \n",
"X X O \n",
" \n",
"on move: X\n",
"O \n",
"X X O \n",
" \n",
"on move: O\n",
"O \n",
"X X O \n",
" X \n",
"on move: X\n",
"O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"O O \n",
"X X O \n",
" X X \n",
"on move: X\n",
"O O O \n",
"X X O \n",
" X X \n",
"Episode 62, Total Reward: -1\n",
"Average Reward: 0.3387096774193548\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O X \n",
" \n",
"X \n",
"on move: X\n",
"O X \n",
" O \n",
"X \n",
"on move: O\n",
"O X \n",
" O \n",
"X X \n",
"on move: X\n",
"O X \n",
" O O \n",
"X X \n",
"on move: O\n",
"O X \n",
"X O O \n",
"X X \n",
"on move: X\n",
"O O X \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O O X \n",
"X O O \n",
"X X X \n",
"Episode 63, Total Reward: 1\n",
"Average Reward: 0.3492063492063492\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" X O \n",
"X \n",
" \n",
"on move: X\n",
" X O \n",
"X \n",
" O \n",
"on move: O\n",
" X O \n",
"X \n",
" O X \n",
"on move: X\n",
" X O \n",
"X O \n",
" O X \n",
"on move: O\n",
" X O \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 64, Total Reward: 0\n",
"Average Reward: 0.34375\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O X \n",
" \n",
"on move: X\n",
" X \n",
" O X \n",
" O \n",
"on move: O\n",
" X \n",
" O X \n",
" X O \n",
"on move: X\n",
" O X \n",
" O X \n",
" X O \n",
"on move: O\n",
" O X \n",
" O X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
" O X \n",
"X X O \n",
"Episode 65, Total Reward: -1\n",
"Average Reward: 0.3230769230769231\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
" X \n",
" O X \n",
"X O \n",
"on move: X\n",
" X \n",
" O X \n",
"X O O \n",
"on move: O\n",
"X X \n",
" O X \n",
"X O O \n",
"on move: X\n",
"X O X \n",
" O X \n",
"X O O \n",
"Episode 66, Total Reward: -1\n",
"Average Reward: 0.30303030303030304\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
"O X \n",
" X \n",
"on move: O\n",
" O \n",
"O X \n",
" X X \n",
"on move: X\n",
" O O \n",
"O X \n",
" X X \n",
"on move: O\n",
" O O \n",
"O X \n",
"X X X \n",
"Episode 67, Total Reward: 1\n",
"Average Reward: 0.31343283582089554\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X O \n",
" X \n",
"on move: O\n",
" O X \n",
" X O \n",
" X \n",
"on move: X\n",
" O X \n",
"O X O \n",
" X \n",
"on move: O\n",
" O X \n",
"O X O \n",
"X X \n",
"Episode 68, Total Reward: 1\n",
"Average Reward: 0.3235294117647059\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
" X \n",
"X \n",
"on move: X\n",
"O \n",
" O X \n",
"X \n",
"on move: O\n",
"O \n",
"X O X \n",
"X \n",
"on move: X\n",
"O \n",
"X O X \n",
"X O \n",
"on move: O\n",
"O \n",
"X O X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X O X \n",
"Episode 69, Total Reward: -1\n",
"Average Reward: 0.30434782608695654\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X X \n",
" X \n",
"O O \n",
"on move: X\n",
" X X \n",
" O X \n",
"O O \n",
"on move: O\n",
"X X X \n",
" O X \n",
"O O \n",
"Episode 70, Total Reward: 1\n",
"Average Reward: 0.3142857142857143\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
"O O \n",
"X \n",
" X \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O O \n",
"X X \n",
"O X X \n",
"on move: X\n",
"O O O \n",
"X X \n",
"O X X \n",
"Episode 71, Total Reward: -1\n",
"Average Reward: 0.29577464788732394\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O O \n",
" X \n",
"on move: O\n",
" X \n",
" O O \n",
"X X \n",
"on move: X\n",
" X \n",
" O O \n",
"X X O \n",
"on move: O\n",
"X X \n",
" O O \n",
"X X O \n",
"on move: X\n",
"X O X \n",
" O O \n",
"X X O \n",
"on move: O\n",
"X O X \n",
"X O O \n",
"X X O \n",
"Episode 72, Total Reward: 1\n",
"Average Reward: 0.3055555555555556\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
"X \n",
"on move: X\n",
"O \n",
" X \n",
"X O \n",
"on move: O\n",
"O \n",
" X \n",
"X X O \n",
"on move: X\n",
"O \n",
"O X \n",
"X X O \n",
"on move: O\n",
"O X \n",
"O X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
"O X \n",
"X X O \n",
"on move: O\n",
"O O X \n",
"O X X \n",
"X X O \n",
"Episode 73, Total Reward: 1\n",
"Average Reward: 0.3150684931506849\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
"O \n",
"X \n",
"X O \n",
"on move: O\n",
"O X \n",
"X \n",
"X O \n",
"on move: X\n",
"O X \n",
"X \n",
"X O O \n",
"on move: O\n",
"O X X \n",
"X \n",
"X O O \n",
"on move: X\n",
"O X X \n",
"X O \n",
"X O O \n",
"Episode 74, Total Reward: -1\n",
"Average Reward: 0.2972972972972973\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
" O X \n",
" X \n",
" \n",
"on move: X\n",
" O X \n",
" X \n",
" O \n",
"on move: O\n",
" O X \n",
"X X \n",
" O \n",
"on move: X\n",
" O X \n",
"X X \n",
" O O \n",
"on move: O\n",
"X O X \n",
"X X \n",
" O O \n",
"on move: X\n",
"X O X \n",
"X O X \n",
" O O \n",
"Episode 75, Total Reward: -1\n",
"Average Reward: 0.28\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
" X O \n",
"on move: X\n",
" X \n",
" O \n",
" X O \n",
"on move: O\n",
" X X \n",
" O \n",
" X O \n",
"on move: X\n",
" X X \n",
" O O \n",
" X O \n",
"on move: O\n",
"X X X \n",
" O O \n",
" X O \n",
"Episode 76, Total Reward: 1\n",
"Average Reward: 0.2894736842105263\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
"X O \n",
" X \n",
" \n",
"on move: X\n",
"X O \n",
" O X \n",
" \n",
"on move: O\n",
"X O \n",
" O X \n",
"X \n",
"on move: X\n",
"X O \n",
"O O X \n",
"X \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X \n",
"on move: X\n",
"X O X \n",
"O O X \n",
"X O \n",
"Episode 77, Total Reward: -1\n",
"Average Reward: 0.2727272727272727\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" \n",
" X X \n",
"on move: X\n",
"O O \n",
" \n",
" X X \n",
"on move: O\n",
"O O \n",
" X \n",
" X X \n",
"on move: X\n",
"O O \n",
" X \n",
"O X X \n",
"on move: O\n",
"O O X \n",
" X \n",
"O X X \n",
"Episode 78, Total Reward: 1\n",
"Average Reward: 0.28205128205128205\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
"O O \n",
" \n",
"X X \n",
"on move: O\n",
"O O \n",
"X \n",
"X X \n",
"on move: X\n",
"O O \n",
"X \n",
"X X O \n",
"on move: O\n",
"O O X \n",
"X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X X O \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X X O \n",
"Episode 79, Total Reward: 1\n",
"Average Reward: 0.2911392405063291\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
" O \n",
"X X \n",
" O \n",
"on move: O\n",
"X O \n",
"X X \n",
" O \n",
"on move: X\n",
"X O O \n",
"X X \n",
" O \n",
"on move: O\n",
"X O O \n",
"X X \n",
"X O \n",
"Episode 80, Total Reward: 1\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
" O \n",
"X X \n",
" O \n",
"on move: O\n",
" O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O X \n",
"X X \n",
"X O \n",
"Episode 81, Total Reward: 1\n",
"Average Reward: 0.30864197530864196\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
"X \n",
" O \n",
"on move: X\n",
"X \n",
"X O \n",
" O \n",
"on move: O\n",
"X X \n",
"X O \n",
" O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O \n",
"O X O \n",
"on move: X\n",
"X X \n",
"X O O \n",
"O X O \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"O X O \n",
"Episode 82, Total Reward: 1\n",
"Average Reward: 0.3170731707317073\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
"O X \n",
"X \n",
" O \n",
"on move: O\n",
"O X \n",
"X \n",
" O X \n",
"on move: X\n",
"O X \n",
"X O \n",
" O X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X O X \n",
"Episode 83, Total Reward: 1\n",
"Average Reward: 0.3253012048192771\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X X \n",
" \n",
"on move: X\n",
"O \n",
"O X X \n",
" \n",
"on move: O\n",
"O \n",
"O X X \n",
" X \n",
"on move: X\n",
"O \n",
"O X X \n",
" X O \n",
"on move: O\n",
"O X \n",
"O X X \n",
" X O \n",
"on move: X\n",
"O X \n",
"O X X \n",
"O X O \n",
"Episode 84, Total Reward: -1\n",
"Average Reward: 0.30952380952380953\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
" X O \n",
"on move: O\n",
" O X \n",
" X \n",
" X O \n",
"on move: X\n",
"O O X \n",
" X \n",
" X O \n",
"on move: O\n",
"O O X \n",
" X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
" O X \n",
"X X O \n",
"Episode 85, Total Reward: -1\n",
"Average Reward: 0.29411764705882354\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" O \n",
"X \n",
"X O \n",
"on move: O\n",
" O X \n",
"X \n",
"X O \n",
"on move: X\n",
" O X \n",
"X O \n",
"X O \n",
"on move: O\n",
"X O X \n",
"X O \n",
"X O \n",
"Episode 86, Total Reward: 1\n",
"Average Reward: 0.3023255813953488\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X O \n",
" O \n",
" \n",
"on move: O\n",
"X X O \n",
" O \n",
" X \n",
"on move: X\n",
"X X O \n",
"O O \n",
" X \n",
"on move: O\n",
"X X O \n",
"O O X \n",
" X \n",
"on move: X\n",
"X X O \n",
"O O X \n",
" X O \n",
"on move: O\n",
"X X O \n",
"O O X \n",
"X X O \n",
"Episode 87, Total Reward: 0\n",
"Average Reward: 0.2988505747126437\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X X \n",
" \n",
"on move: X\n",
"O \n",
" X X \n",
"O \n",
"on move: O\n",
"O \n",
" X X \n",
"O X \n",
"on move: X\n",
"O \n",
"O X X \n",
"O X \n",
"Episode 88, Total Reward: -1\n",
"Average Reward: 0.2840909090909091\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X \n",
" X X \n",
"O O \n",
"on move: X\n",
"O X \n",
" X X \n",
"O O \n",
"on move: O\n",
"O X \n",
"X X X \n",
"O O \n",
"Episode 89, Total Reward: 1\n",
"Average Reward: 0.29213483146067415\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O O \n",
"X X \n",
" \n",
"on move: O\n",
" O O \n",
"X X \n",
" X \n",
"on move: X\n",
" O O \n",
"X X \n",
" O X \n",
"on move: O\n",
" O O \n",
"X X X \n",
" O X \n",
"Episode 90, Total Reward: 1\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X \n",
"O \n",
"on move: O\n",
" O \n",
"X X \n",
"O X \n",
"on move: X\n",
" O \n",
"X X \n",
"O X O \n",
"on move: O\n",
" X O \n",
"X X \n",
"O X O \n",
"on move: X\n",
" X O \n",
"X O X \n",
"O X O \n",
"Episode 91, Total Reward: -1\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" O X \n",
" \n",
"on move: O\n",
" X \n",
" O X \n",
" \n",
"on move: X\n",
" X \n",
" O X \n",
" O \n",
"on move: O\n",
" X \n",
"X O X \n",
" O \n",
"on move: X\n",
" X \n",
"X O X \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O X \n",
"O O O \n",
"Episode 92, Total Reward: -1\n",
"Average Reward: 0.2717391304347826\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" X \n",
" \n",
"O X \n",
"on move: X\n",
" X \n",
" O \n",
"O X \n",
"on move: O\n",
"X X \n",
" O \n",
"O X \n",
"on move: X\n",
"X X \n",
" O \n",
"O X O \n",
"on move: O\n",
"X X \n",
"X O \n",
"O X O \n",
"on move: X\n",
"X X \n",
"X O O \n",
"O X O \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"O X O \n",
"Episode 93, Total Reward: 1\n",
"Average Reward: 0.27956989247311825\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X X \n",
" \n",
"on move: X\n",
" \n",
"O X X \n",
"O \n",
"on move: O\n",
" X \n",
"O X X \n",
"O \n",
"on move: X\n",
" O X \n",
"O X X \n",
"O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O \n",
"on move: X\n",
"X O X \n",
"O X X \n",
"O O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O X O \n",
"Episode 94, Total Reward: 0\n",
"Average Reward: 0.2765957446808511\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" \n",
"O X \n",
"on move: X\n",
" O X \n",
" \n",
"O X \n",
"on move: O\n",
" O X \n",
" X \n",
"O X \n",
"Episode 95, Total Reward: 1\n",
"Average Reward: 0.28421052631578947\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
"X \n",
" X O \n",
" \n",
"on move: X\n",
"X \n",
" X O \n",
" O \n",
"on move: O\n",
"X X \n",
" X O \n",
" O \n",
"on move: X\n",
"X X O \n",
" X O \n",
" O \n",
"on move: O\n",
"X X O \n",
" X O \n",
" O X \n",
"Episode 96, Total Reward: 1\n",
"Average Reward: 0.2916666666666667\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
"X \n",
"X \n",
" O \n",
"on move: X\n",
"X \n",
"X \n",
"O O \n",
"on move: O\n",
"X X \n",
"X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O X \n",
"O O O \n",
"Episode 97, Total Reward: -1\n",
"Average Reward: 0.27835051546391754\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
"O X \n",
"X \n",
" O \n",
"on move: O\n",
"O X \n",
"X X \n",
" O \n",
"on move: X\n",
"O X \n",
"X X \n",
"O O \n",
"on move: O\n",
"O X \n",
"X X X \n",
"O O \n",
"Episode 98, Total Reward: 1\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
"X \n",
"on move: X\n",
"O O \n",
" X \n",
"X \n",
"on move: O\n",
"O O \n",
" X X \n",
"X \n",
"on move: X\n",
"O O O \n",
" X X \n",
"X \n",
"Episode 99, Total Reward: -1\n",
"Average Reward: 0.2727272727272727\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
" O \n",
"O X \n",
"on move: O\n",
"X X \n",
" O \n",
"O X \n",
"on move: X\n",
"X O X \n",
" O \n",
"O X \n",
"on move: O\n",
"X O X \n",
" O X \n",
"O X \n",
"on move: X\n",
"X O X \n",
" O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X O X \n",
"O X O \n",
"Episode 100, Total Reward: 0\n",
"Average Reward: 0.27\n"
]
}
],
"source": [
"environment = TicTacToeEnv()\n",
"agent = Agent(symbol=1)\n",
"\n",
"num_episodes = 100\n",
"collected_rewards = []\n",
"oom = 1\n",
"\n",
"for i in range(num_episodes):\n",
" state, _ = environment.reset() \n",
" total_reward = 0\n",
" done = False\n",
" om = oom \n",
"\n",
" for j in range(9): \n",
" moves = environment.move_generator() \n",
"\n",
" if not moves:\n",
" break\n",
"\n",
" if len(moves) == 1:\n",
" move = moves[0] \n",
" else:\n",
" move = agent.get_action(moves) \n",
"\n",
" next_state, reward, done, info = environment.step(move)\n",
" total_reward += reward\n",
" state = next_state\n",
"\n",
" environment.render()\n",
"\n",
" if done:\n",
" break\n",
"\n",
" om = -om \n",
"\n",
" collected_rewards.append(total_reward)\n",
"\n",
" print(f\"Episode {i+1}, Total Reward: {total_reward}\")\n",
" average_reward = sum(collected_rewards) / len(collected_rewards)\n",
" print(f\"Average Reward: {average_reward}\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}