{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Крестики-нолики: https://github.com/nczempin/gym-tic-tac-toe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Перевод среды на Gymnasium \n", "\n", "*Ключевые функции Gymnasium:* \n", "env.reset() — инициализация среды. \n", "env.step(action) — выполнение действия и переход в новое состояние. \n", "env.render() — визуализация текущего состояния среды. " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import gymnasium as gym\n", "from gymnasium import spaces\n", "import numpy as np\n", "\n", "class TicTacToeEnv(gym.Env):\n", " metadata = {'render.modes': ['human']}\n", " \n", " symbols = ['O', ' ', 'X']\n", "\n", " def __init__(self):\n", " super().__init__()\n", " self.action_space = spaces.Discrete(9)\n", " self.observation_space = spaces.Discrete(9 * 3 * 2)\n", " self.reset()\n", "\n", " def step(self, action):\n", " done = False\n", " reward = 0\n", "\n", " p, square = action # p - игрок (1 или -1), square - номер клетки\n", "\n", " board = self.state['board']\n", " proposed = board[square] \n", " om = self.state['on_move'] \n", " if proposed != 0: # Клетка уже занята\n", " print(f\"Незаконный ход: Квадрат {square} уже занят.\")\n", " done = True\n", " reward = -1 * om \n", " if p != om: # Не тот игрок на ходу\n", " print(f\"Незаконный ход: игрок {p} не находится в движении\")\n", " done = True\n", " reward = -1 * om\n", " else:\n", " board[square] = p\n", " self.state['on_move'] = -p\n", "\n", " for i in range(3):\n", " # Горизонтали и вертикали\n", " if (board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) or \\\n", " (board[i] == p and board[i + 3] == p and board[i + 6] == p):\n", " reward = p\n", " done = True\n", " break\n", "\n", " # Диагонали\n", " if (board[0] == p and board[4] == p and board[8] == p) or \\\n", " (board[2] == p and board[4] == p and board[6] == p):\n", " reward = p\n", " done = True\n", " \n", " return self.state, reward, done, {}\n", "\n", " def reset(self):\n", " self.state = {}\n", " self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] \n", " self.state['on_move'] = 1 \n", " return self.state, {}\n", "\n", " def render(self, close=False):\n", " if close:\n", " return\n", " print(\"on move: \" , self.symbols[self.state['on_move']+1])\n", " for i in range (9):\n", " print (self.symbols[self.state['board'][i]+1], end=\" \");\n", " if ((i % 3) == 2):\n", " print();\n", "\n", " def move_generator(self):\n", " moves = []\n", " for i in range(9):\n", " if self.state['board'][i] == 0:\n", " p = self.state['on_move']\n", " m = [p, i]\n", " moves.append(m)\n", " return moves" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Реализация агента \n", "\n", "Роль агента: \n", "Агент принимает решение (выбирает действие), основываясь на текущем состоянии среды, и затем получает обратную связь (награду и новое состояние) от среды. \n", "\n", "Функционал агента: \n", "Выбор действия: Использует алгоритмы или стратегии, чтобы определить, что делать дальше. \n", "Обучение: Обновляет свои знания или стратегию на основе опыта, чтобы лучше справляться с задачей. \n", "Интерактивность: Адаптируется к изменениям в среде. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import random\n", "\n", "# Реализация Агента, который в рамках обучения с подкреплением взаимодействует со средой и вырабатывает наилучшую стратегию \n", "\n", "class Agent:\n", " def __init__(self, symbol):\n", " self.symbol = symbol # Символ игрока (1 - X, -1 - O)\n", " \n", " def get_action(self, moves):\n", " return random.choice(moves) # Выбираем случайный ход из доступных" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Основной цикл обучения \n", "\n", "Основной цикл включает в себя создание среды и работу с ней. По истечению выполнения игры, начисляются очки, которые будут свидетельствовать о том, какие результаты достиг наш агент, обучилась ли наша система. " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " \n", " \n", "O X X \n", "on move: X\n", " \n", " O \n", "O X X \n", "on move: O\n", " \n", " X O \n", "O X X \n", "on move: X\n", " O \n", " X O \n", "O X X \n", "on move: O\n", " O \n", "X X O \n", "O X X \n", "on move: X\n", "O O \n", "X X O \n", "O X X \n", "on move: O\n", "O X O \n", "X X O \n", "O X X \n", "Episode 1, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " O X \n", "on move: O\n", " \n", "X \n", " O X \n", "on move: X\n", " O \n", "X \n", " O X \n", "on move: O\n", " O \n", "X X \n", " O X \n", "on move: X\n", " O O \n", "X X \n", " O X \n", "on move: O\n", " O O \n", "X X X \n", " O X \n", "Episode 2, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", "X \n", " O \n", "on move: X\n", "X O \n", "X \n", " O \n", "on move: O\n", "X O \n", "X X \n", " O \n", "on move: X\n", "X O \n", "X X \n", " O O \n", "on move: O\n", "X O \n", "X X \n", "X O O \n", "Episode 3, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X X O \n", " X \n", "on move: X\n", "O O \n", "X X O \n", " X \n", "on move: O\n", "O O \n", "X X O \n", "X X \n", "on move: X\n", "O O O \n", "X X O \n", "X X \n", "Episode 4, Total Reward: -1\n", "Average Reward: 0.5\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", " O \n", " O \n", " X X \n", "on move: O\n", " O X \n", " O \n", " X X \n", "on move: X\n", "O O X \n", " O \n", " X X \n", "on move: O\n", "O O X \n", "X O \n", " X X \n", "on move: X\n", "O O X \n", "X O O \n", " X X \n", "on move: O\n", "O O X \n", "X O O \n", "X X X \n", "Episode 5, Total Reward: 1\n", "Average Reward: 0.6\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X \n", "X O \n", " O \n", "on move: O\n", "X \n", "X O \n", "X O \n", "Episode 6, Total Reward: 1\n", "Average Reward: 0.6666666666666666\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O X \n", " X \n", " \n", "on move: X\n", " O X \n", "O X \n", " \n", "on move: O\n", "X O X \n", "O X \n", " \n", "on move: X\n", "X O X \n", "O X \n", " O \n", "on move: O\n", "X O X \n", "O X \n", "X O \n", "Episode 7, Total Reward: 1\n", "Average Reward: 0.7142857142857143\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", "O X \n", " O \n", "on move: O\n", " X \n", "O X \n", "X O \n", "Episode 8, Total Reward: 1\n", "Average Reward: 0.75\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", " \n", " O X \n", "on move: X\n", "X O \n", " \n", " O X \n", "on move: O\n", "X O \n", "X \n", " O X \n", "on move: X\n", "X O O \n", "X \n", " O X \n", "on move: O\n", "X O O \n", "X \n", "X O X \n", "Episode 9, Total Reward: 1\n", "Average Reward: 0.7777777777777778\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X X \n", "O \n", "on move: X\n", " O \n", " X X \n", "O \n", "on move: O\n", "X O \n", " X X \n", "O \n", "on move: X\n", "X O \n", "O X X \n", "O \n", "on move: O\n", "X O \n", "O X X \n", "O X \n", "Episode 10, Total Reward: 1\n", "Average Reward: 0.8\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X X \n", " \n", " O \n", "on move: X\n", " X X \n", " O \n", " O \n", "on move: O\n", " X X \n", " X O \n", " O \n", "on move: X\n", " X X \n", " X O \n", " O O \n", "on move: O\n", " X X \n", "X X O \n", " O O \n", "on move: X\n", " X X \n", "X X O \n", "O O O \n", "Episode 11, Total Reward: -1\n", "Average Reward: 0.6363636363636364\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " O X \n", " \n", " \n", "on move: O\n", " O X \n", " X \n", " \n", "on move: X\n", " O X \n", "O X \n", " \n", "on move: O\n", "X O X \n", "O X \n", " \n", "on move: X\n", "X O X \n", "O X \n", " O \n", "on move: O\n", "X O X \n", "O X X \n", " O \n", "on move: X\n", "X O X \n", "O X X \n", "O O \n", "on move: O\n", "X O X \n", "O X X \n", "O O X \n", "Episode 12, Total Reward: 1\n", "Average Reward: 0.6666666666666666\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " X O \n", " \n", " X \n", "on move: X\n", " X O \n", " \n", " O X \n", "on move: O\n", " X O \n", " X \n", " O X \n", "on move: X\n", "O X O \n", " X \n", " O X \n", "on move: O\n", "O X O \n", " X X \n", " O X \n", "on move: X\n", "O X O \n", " X X \n", "O O X \n", "on move: O\n", "O X O \n", "X X X \n", "O O X \n", "Episode 13, Total Reward: 1\n", "Average Reward: 0.6923076923076923\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", " X O \n", "on move: X\n", " O \n", "X \n", " X O \n", "on move: O\n", " O \n", "X X \n", " X O \n", "on move: X\n", " O \n", "X X O \n", " X O \n", "on move: O\n", " O X \n", "X X O \n", " X O \n", "on move: X\n", " O X \n", "X X O \n", "O X O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 14, Total Reward: 0\n", "Average Reward: 0.6428571428571429\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X \n", "O O \n", " \n", "on move: O\n", "X X \n", "O O \n", " X \n", "on move: X\n", "X O X \n", "O O \n", " X \n", "on move: O\n", "X O X \n", "O O \n", "X X \n", "on move: X\n", "X O X \n", "O O \n", "X X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 15, Total Reward: 0\n", "Average Reward: 0.6\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", " O \n", "X O \n", "on move: O\n", " X \n", " O X \n", "X O \n", "on move: X\n", " X \n", "O O X \n", "X O \n", "on move: O\n", " X \n", "O O X \n", "X X O \n", "on move: X\n", "O X \n", "O O X \n", "X X O \n", "Episode 16, Total Reward: -1\n", "Average Reward: 0.5\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " X O \n", " \n", "on move: X\n", "O X \n", " X O \n", " \n", "on move: O\n", "O X \n", " X O \n", " X \n", "Episode 17, Total Reward: 1\n", "Average Reward: 0.5294117647058824\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X \n", "O O \n", " X X \n", "on move: X\n", " X \n", "O O \n", "O X X \n", "on move: O\n", "X X \n", "O O \n", "O X X \n", "on move: X\n", "X X \n", "O O O \n", "O X X \n", "Episode 18, Total Reward: -1\n", "Average Reward: 0.4444444444444444\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " O X \n", " O \n", "on move: O\n", "X X \n", " O X \n", " O \n", "on move: X\n", "X O X \n", " O X \n", " O \n", "Episode 19, Total Reward: -1\n", "Average Reward: 0.3684210526315789\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", "X X \n", "O \n", "on move: X\n", " \n", "X X \n", "O O \n", "on move: O\n", " X \n", "X X \n", "O O \n", "on move: X\n", " O X \n", "X X \n", "O O \n", "on move: O\n", "X O X \n", "X X \n", "O O \n", "on move: X\n", "X O X \n", "X X \n", "O O O \n", "Episode 20, Total Reward: -1\n", "Average Reward: 0.3\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", " \n", "O X \n", "on move: X\n", " X \n", " \n", "O O X \n", "on move: O\n", "X X \n", " \n", "O O X \n", "on move: X\n", "X X \n", "O \n", "O O X \n", "on move: O\n", "X X \n", "O X \n", "O O X \n", "Episode 21, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X \n", "X \n", "on move: X\n", "O \n", " X O \n", "X \n", "on move: O\n", "O \n", " X O \n", "X X \n", "on move: X\n", "O \n", " X O \n", "X X O \n", "on move: O\n", "O X \n", " X O \n", "X X O \n", "Episode 22, Total Reward: 1\n", "Average Reward: 0.36363636363636365\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", "X \n", "on move: X\n", " \n", "X O \n", "X O \n", "on move: O\n", " \n", "X O \n", "X O X \n", "on move: X\n", " O \n", "X O \n", "X O X \n", "on move: O\n", "X O \n", "X O \n", "X O X \n", "Episode 23, Total Reward: 1\n", "Average Reward: 0.391304347826087\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X \n", "O X \n", " \n", "on move: X\n", "X \n", "O X \n", "O \n", "on move: O\n", "X \n", "O X \n", "O X \n", "on move: X\n", "X \n", "O X \n", "O X O \n", "on move: O\n", "X \n", "O X X \n", "O X O \n", "on move: X\n", "X O \n", "O X X \n", "O X O \n", "on move: O\n", "X O X \n", "O X X \n", "O X O \n", "Episode 24, Total Reward: 0\n", "Average Reward: 0.375\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", " X O \n", "on move: X\n", " \n", "O X \n", " X O \n", "on move: O\n", " \n", "O X \n", "X X O \n", "on move: X\n", " \n", "O X O \n", "X X O \n", "on move: O\n", " X \n", "O X O \n", "X X O \n", "Episode 25, Total Reward: 1\n", "Average Reward: 0.4\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X O \n", "O \n", " \n", "on move: O\n", "X X O \n", "O X \n", " \n", "on move: X\n", "X X O \n", "O O X \n", " \n", "on move: O\n", "X X O \n", "O O X \n", "X \n", "on move: X\n", "X X O \n", "O O X \n", "X O \n", "on move: O\n", "X X O \n", "O O X \n", "X O X \n", "Episode 26, Total Reward: 0\n", "Average Reward: 0.38461538461538464\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O \n", "X X \n", " O \n", "on move: O\n", " O \n", "X X \n", " O X \n", "on move: X\n", " O \n", "X X \n", "O O X \n", "on move: O\n", " O \n", "X X X \n", "O O X \n", "Episode 27, Total Reward: 1\n", "Average Reward: 0.4074074074074074\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O O \n", "X \n", " X \n", "on move: O\n", "O O \n", "X \n", "X X \n", "on move: X\n", "O O O \n", "X \n", "X X \n", "Episode 28, Total Reward: -1\n", "Average Reward: 0.35714285714285715\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X X \n", " O \n", "on move: X\n", " O \n", "X X \n", " O \n", "on move: O\n", " X O \n", "X X \n", " O \n", "on move: X\n", " X O \n", "X X \n", "O O \n", "on move: O\n", "X X O \n", "X X \n", "O O \n", "on move: X\n", "X X O \n", "X X O \n", "O O \n", "on move: O\n", "X X O \n", "X X O \n", "O O X \n", "Episode 29, Total Reward: 1\n", "Average Reward: 0.3793103448275862\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", " \n", "X O \n", " X O \n", "on move: O\n", " \n", "X O \n", "X X O \n", "on move: X\n", " \n", "X O O \n", "X X O \n", "on move: O\n", "X \n", "X O O \n", "X X O \n", "Episode 30, Total Reward: 1\n", "Average Reward: 0.4\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", " X \n", " O X \n", " \n", "on move: X\n", "O X \n", " O X \n", " \n", "on move: O\n", "O X \n", " O X \n", " X \n", "on move: X\n", "O X \n", " O X \n", "O X \n", "on move: O\n", "O X X \n", " O X \n", "O X \n", "on move: X\n", "O X X \n", " O X \n", "O X O \n", "Episode 31, Total Reward: -1\n", "Average Reward: 0.3548387096774194\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", " X \n", "X X \n", "on move: X\n", "O O \n", " X O \n", "X X \n", "on move: O\n", "O O \n", "X X O \n", "X X \n", "on move: X\n", "O O O \n", "X X O \n", "X X \n", "Episode 32, Total Reward: -1\n", "Average Reward: 0.3125\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", " X O \n", "on move: X\n", "O X \n", " \n", " X O \n", "on move: O\n", "O X \n", " X \n", " X O \n", "on move: X\n", "O X \n", "O X \n", " X O \n", "on move: O\n", "O X \n", "O X \n", "X X O \n", "on move: X\n", "O X O \n", "O X \n", "X X O \n", "on move: O\n", "O X O \n", "O X X \n", "X X O \n", "Episode 33, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O X \n", "X \n", " \n", "on move: X\n", "O X \n", "X \n", "O \n", "on move: O\n", "O X \n", "X \n", "O X \n", "on move: X\n", "O X O \n", "X \n", "O X \n", "on move: O\n", "O X O \n", "X X \n", "O X \n", "on move: X\n", "O X O \n", "X X O \n", "O X \n", "on move: O\n", "O X O \n", "X X O \n", "O X X \n", "Episode 34, Total Reward: 1\n", "Average Reward: 0.35294117647058826\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " O X \n", " \n", " \n", "on move: O\n", " O X \n", " \n", "X \n", "on move: X\n", "O O X \n", " \n", "X \n", "on move: O\n", "O O X \n", " \n", "X X \n", "on move: X\n", "O O X \n", "O \n", "X X \n", "on move: O\n", "O O X \n", "O X \n", "X X \n", "Episode 35, Total Reward: 1\n", "Average Reward: 0.37142857142857144\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", " X \n", " O \n", "X O \n", "on move: O\n", " X X \n", " O \n", "X O \n", "on move: X\n", " X X \n", " O O \n", "X O \n", "on move: O\n", "X X X \n", " O O \n", "X O \n", "Episode 36, Total Reward: 1\n", "Average Reward: 0.3888888888888889\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", "O \n", "O \n", "X X \n", "on move: O\n", "O \n", "O X \n", "X X \n", "on move: X\n", "O O \n", "O X \n", "X X \n", "on move: O\n", "O O \n", "O X \n", "X X X \n", "Episode 37, Total Reward: 1\n", "Average Reward: 0.40540540540540543\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", "O X \n", " X \n", "on move: O\n", "X O \n", "O X \n", " X \n", "Episode 38, Total Reward: 1\n", "Average Reward: 0.42105263157894735\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " X \n", "O \n", "X \n", "on move: X\n", " O X \n", "O \n", "X \n", "on move: O\n", " O X \n", "O \n", "X X \n", "on move: X\n", " O X \n", "O \n", "X O X \n", "on move: O\n", " O X \n", "O X \n", "X O X \n", "Episode 39, Total Reward: 1\n", "Average Reward: 0.4358974358974359\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O \n", " X X \n", "on move: X\n", " \n", "O O \n", " X X \n", "on move: O\n", " \n", "O O X \n", " X X \n", "on move: X\n", "O \n", "O O X \n", " X X \n", "on move: O\n", "O X \n", "O O X \n", " X X \n", "Episode 40, Total Reward: 1\n", "Average Reward: 0.45\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", " X \n", "X X \n", "O O \n", "on move: X\n", " X \n", "X X \n", "O O O \n", "Episode 41, Total Reward: -1\n", "Average Reward: 0.4146341463414634\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " O \n", "X \n", "X O \n", "on move: O\n", " O \n", "X \n", "X X O \n", "on move: X\n", " O \n", "X O \n", "X X O \n", "on move: O\n", "X O \n", "X O \n", "X X O \n", "Episode 42, Total Reward: 1\n", "Average Reward: 0.42857142857142855\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O O \n", " \n", "on move: O\n", " X X \n", "X O O \n", " \n", "on move: X\n", "O X X \n", "X O O \n", " \n", "on move: O\n", "O X X \n", "X O O \n", " X \n", "on move: X\n", "O X X \n", "X O O \n", " X O \n", "Episode 43, Total Reward: -1\n", "Average Reward: 0.3953488372093023\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X \n", "O X \n", "on move: X\n", " O \n", " X \n", "O X \n", "on move: O\n", " O X \n", " X \n", "O X \n", "on move: X\n", " O X \n", " X \n", "O X O \n", "on move: O\n", " O X \n", "X X \n", "O X O \n", "on move: X\n", " O X \n", "X X O \n", "O X O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 44, Total Reward: 0\n", "Average Reward: 0.38636363636363635\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X \n", "O O \n", "X X \n", "on move: X\n", " X \n", "O O O \n", "X X \n", "Episode 45, Total Reward: -1\n", "Average Reward: 0.35555555555555557\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", "O O \n", " \n", " X X \n", "on move: O\n", "O O \n", " X \n", " X X \n", "on move: X\n", "O O \n", " X \n", "O X X \n", "on move: O\n", "O O \n", "X X \n", "O X X \n", "on move: X\n", "O O O \n", "X X \n", "O X X \n", "Episode 46, Total Reward: -1\n", "Average Reward: 0.32608695652173914\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " X \n", "X \n", "on move: X\n", "O O \n", " X \n", "X \n", "on move: O\n", "O O \n", " X X \n", "X \n", "on move: X\n", "O O \n", "O X X \n", "X \n", "on move: O\n", "O O X \n", "O X X \n", "X \n", "Episode 47, Total Reward: 1\n", "Average Reward: 0.3404255319148936\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X X \n", " \n", "O \n", "on move: X\n", " X X \n", " \n", "O O \n", "on move: O\n", "X X X \n", " \n", "O O \n", "Episode 48, Total Reward: 1\n", "Average Reward: 0.3541666666666667\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X X \n", " O \n", "on move: X\n", " O \n", " X X \n", " O \n", "on move: O\n", "X O \n", " X X \n", " O \n", "on move: X\n", "X O \n", " X X \n", " O O \n", "on move: O\n", "X O \n", " X X \n", "X O O \n", "on move: X\n", "X O \n", "O X X \n", "X O O \n", "on move: O\n", "X O X \n", "O X X \n", "X O O \n", "Episode 49, Total Reward: 1\n", "Average Reward: 0.3673469387755102\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " \n", " X X \n", "on move: X\n", "O \n", " \n", "O X X \n", "on move: O\n", "O X \n", " \n", "O X X \n", "on move: X\n", "O X \n", "O \n", "O X X \n", "Episode 50, Total Reward: -1\n", "Average Reward: 0.34\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " X O \n", " \n", "on move: X\n", " X \n", "O X O \n", " \n", "on move: O\n", " X \n", "O X O \n", "X \n", "on move: X\n", " X \n", "O X O \n", "X O \n", "on move: O\n", "X X \n", "O X O \n", "X O \n", "on move: X\n", "X X \n", "O X O \n", "X O O \n", "on move: O\n", "X X X \n", "O X O \n", "X O O \n", "Episode 51, Total Reward: 1\n", "Average Reward: 0.35294117647058826\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O O \n", " X \n", " X \n", "on move: O\n", " O O \n", " X \n", " X X \n", "on move: X\n", " O O \n", "O X \n", " X X \n", "on move: O\n", " O O \n", "O X \n", "X X X \n", "Episode 52, Total Reward: 1\n", "Average Reward: 0.36538461538461536\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", "X O \n", "X \n", " \n", "on move: X\n", "X O \n", "X O \n", " \n", "on move: O\n", "X O \n", "X O \n", "X \n", "Episode 53, Total Reward: 1\n", "Average Reward: 0.37735849056603776\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O \n", "X O \n", " X \n", "on move: O\n", "O \n", "X O X \n", " X \n", "on move: X\n", "O \n", "X O X \n", " O X \n", "on move: O\n", "O X \n", "X O X \n", " O X \n", "on move: X\n", "O X O \n", "X O X \n", " O X \n", "on move: O\n", "O X O \n", "X O X \n", "X O X \n", "Episode 54, Total Reward: 0\n", "Average Reward: 0.37037037037037035\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " X \n", "O X \n", " O \n", "on move: O\n", "X X \n", "O X \n", " O \n", "on move: X\n", "X X \n", "O O X \n", " O \n", "on move: O\n", "X X \n", "O O X \n", " X O \n", "on move: X\n", "X X \n", "O O X \n", "O X O \n", "on move: O\n", "X X X \n", "O O X \n", "O X O \n", "Episode 55, Total Reward: 1\n", "Average Reward: 0.38181818181818183\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " \n", "X \n", " X O \n", "on move: X\n", " \n", "X O \n", " X O \n", "on move: O\n", " X \n", "X O \n", " X O \n", "on move: X\n", " X \n", "X O O \n", " X O \n", "on move: O\n", " X \n", "X O O \n", "X X O \n", "on move: X\n", "O X \n", "X O O \n", "X X O \n", "Episode 56, Total Reward: -1\n", "Average Reward: 0.35714285714285715\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O \n", "X X \n", "O \n", "on move: O\n", " O \n", "X X \n", "O X \n", "on move: X\n", " O \n", "X X \n", "O O X \n", "on move: O\n", " O \n", "X X X \n", "O O X \n", "Episode 57, Total Reward: 1\n", "Average Reward: 0.3684210526315789\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", " O \n", "X O \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", " O X \n", " O \n", "X O X \n", "Episode 58, Total Reward: -1\n", "Average Reward: 0.3448275862068966\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X \n", "X \n", "on move: X\n", "O O \n", "X \n", "X \n", "on move: O\n", "O O \n", "X X \n", "X \n", "on move: X\n", "O O O \n", "X X \n", "X \n", "Episode 59, Total Reward: -1\n", "Average Reward: 0.3220338983050847\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", "X O \n", " \n", "X \n", "on move: X\n", "X O O \n", " \n", "X \n", "on move: O\n", "X O O \n", " X \n", "X \n", "on move: X\n", "X O O \n", " O X \n", "X \n", "on move: O\n", "X O O \n", "X O X \n", "X \n", "Episode 60, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X O \n", " O \n", "on move: O\n", "X X \n", " X O \n", " O \n", "on move: X\n", "X X \n", " X O \n", " O O \n", "on move: O\n", "X X X \n", " X O \n", " O O \n", "Episode 61, Total Reward: 1\n", "Average Reward: 0.3442622950819672\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " X O \n", "O X \n", " \n", "on move: O\n", " X O \n", "O X \n", " X \n", "Episode 62, Total Reward: 1\n", "Average Reward: 0.3548387096774194\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X X \n", " O \n", "on move: X\n", " \n", "X X \n", "O O \n", "on move: O\n", "X \n", "X X \n", "O O \n", "on move: X\n", "X O \n", "X X \n", "O O \n", "on move: O\n", "X O X \n", "X X \n", "O O \n", "on move: X\n", "X O X \n", "X O X \n", "O O \n", "Episode 63, Total Reward: -1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X O \n", " \n", "on move: O\n", " \n", "X X O \n", " \n", "on move: X\n", " \n", "X X O \n", "O \n", "on move: O\n", "X \n", "X X O \n", "O \n", "on move: X\n", "X O \n", "X X O \n", "O \n", "on move: O\n", "X O \n", "X X O \n", "O X \n", "on move: X\n", "X O \n", "X X O \n", "O X O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 64, Total Reward: 0\n", "Average Reward: 0.328125\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " X \n", "X \n", "on move: X\n", "O O \n", " X \n", "X \n", "on move: O\n", "O O X \n", " X \n", "X \n", "on move: X\n", "O O X \n", " X \n", "X O \n", "on move: O\n", "O O X \n", " X \n", "X O X \n", "Episode 65, Total Reward: 1\n", "Average Reward: 0.3384615384615385\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", "X X \n", " X \n", "on move: X\n", "O O \n", "X X O \n", " X \n", "on move: O\n", "O O \n", "X X O \n", "X X \n", "on move: X\n", "O O \n", "X X O \n", "X O X \n", "on move: O\n", "O O X \n", "X X O \n", "X O X \n", "Episode 66, Total Reward: 1\n", "Average Reward: 0.3484848484848485\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X O \n", "X O \n", " X \n", "on move: X\n", "X O \n", "X O \n", "O X \n", "on move: O\n", "X O \n", "X O \n", "O X X \n", "on move: X\n", "X O O \n", "X O \n", "O X X \n", "on move: O\n", "X O O \n", "X X O \n", "O X X \n", "Episode 67, Total Reward: 1\n", "Average Reward: 0.3582089552238806\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " X O \n", "O X \n", " \n", "on move: O\n", " X O \n", "O X X \n", " \n", "on move: X\n", "O X O \n", "O X X \n", " \n", "on move: O\n", "O X O \n", "O X X \n", " X \n", "on move: X\n", "O X O \n", "O X X \n", " O X \n", "on move: O\n", "O X O \n", "O X X \n", "X O X \n", "Episode 68, Total Reward: 0\n", "Average Reward: 0.35294117647058826\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O X \n", " \n", " X \n", "on move: X\n", " O X \n", " \n", "O X \n", "on move: O\n", "X O X \n", " \n", "O X \n", "on move: X\n", "X O X \n", " O \n", "O X \n", "on move: O\n", "X O X \n", "X O \n", "O X \n", "on move: X\n", "X O X \n", "X O O \n", "O X \n", "on move: O\n", "X O X \n", "X O O \n", "O X X \n", "Episode 69, Total Reward: 0\n", "Average Reward: 0.34782608695652173\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", "O X \n", " X \n", " O \n", "on move: O\n", "O X X \n", " X \n", " O \n", "on move: X\n", "O X X \n", " X \n", " O O \n", "on move: O\n", "O X X \n", " X X \n", " O O \n", "on move: X\n", "O X X \n", " X X \n", "O O O \n", "Episode 70, Total Reward: -1\n", "Average Reward: 0.32857142857142857\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " O X \n", "on move: O\n", " \n", " X \n", " O X \n", "on move: X\n", " \n", " X O \n", " O X \n", "on move: O\n", " X \n", " X O \n", " O X \n", "on move: X\n", " X \n", "O X O \n", " O X \n", "on move: O\n", "X X \n", "O X O \n", " O X \n", "Episode 71, Total Reward: 1\n", "Average Reward: 0.3380281690140845\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O O \n", "X X \n", " \n", "on move: O\n", "X O O \n", "X X \n", " \n", "on move: X\n", "X O O \n", "X X \n", " O \n", "on move: O\n", "X O O \n", "X X \n", " X O \n", "on move: X\n", "X O O \n", "X X \n", "O X O \n", "on move: O\n", "X O O \n", "X X X \n", "O X O \n", "Episode 72, Total Reward: 1\n", "Average Reward: 0.3472222222222222\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " \n", " X \n", "O X \n", "on move: X\n", "O \n", " X \n", "O X \n", "on move: O\n", "O X \n", " X \n", "O X \n", "on move: X\n", "O X \n", "O X \n", "O X \n", "Episode 73, Total Reward: -1\n", "Average Reward: 0.3287671232876712\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", " \n", "X O X \n", " \n", "on move: X\n", " \n", "X O X \n", "O \n", "on move: O\n", "X \n", "X O X \n", "O \n", "on move: X\n", "X O \n", "X O X \n", "O \n", "Episode 74, Total Reward: -1\n", "Average Reward: 0.3108108108108108\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X X O \n", " X \n", "on move: X\n", " O \n", "X X O \n", "O X \n", "on move: O\n", " O \n", "X X O \n", "O X X \n", "on move: X\n", "O O \n", "X X O \n", "O X X \n", "on move: O\n", "O X O \n", "X X O \n", "O X X \n", "Episode 75, Total Reward: 1\n", "Average Reward: 0.32\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", "O X \n", "X O \n", " \n", "on move: O\n", "O X \n", "X O \n", " X \n", "on move: X\n", "O X \n", "X O O \n", " X \n", "on move: O\n", "O X X \n", "X O O \n", " X \n", "on move: X\n", "O X X \n", "X O O \n", " O X \n", "on move: O\n", "O X X \n", "X O O \n", "X O X \n", "Episode 76, Total Reward: 0\n", "Average Reward: 0.3157894736842105\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " O \n", "X \n", "X O \n", "on move: O\n", "X O \n", "X \n", "X O \n", "Episode 77, Total Reward: 1\n", "Average Reward: 0.3246753246753247\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X \n", " O X \n", "on move: O\n", " O X \n", " X \n", " O X \n", "Episode 78, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", " O O \n", " \n", " X X \n", "on move: O\n", "X O O \n", " \n", " X X \n", "on move: X\n", "X O O \n", " O \n", " X X \n", "on move: O\n", "X O O \n", " O X \n", " X X \n", "on move: X\n", "X O O \n", " O X \n", "O X X \n", "Episode 79, Total Reward: -1\n", "Average Reward: 0.31645569620253167\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", "O X \n", " O \n", "on move: O\n", " X \n", "O X \n", "X O \n", "on move: X\n", "O X \n", "O X \n", "X O \n", "on move: O\n", "O X \n", "O X X \n", "X O \n", "on move: X\n", "O X \n", "O X X \n", "X O O \n", "on move: O\n", "O X X \n", "O X X \n", "X O O \n", "Episode 80, Total Reward: 1\n", "Average Reward: 0.325\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", "O X \n", "O \n", "on move: O\n", "X X \n", "O X \n", "O \n", "on move: X\n", "X X \n", "O X O \n", "O \n", "on move: O\n", "X X \n", "O X O \n", "O X \n", "Episode 81, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X X \n", " \n", "on move: X\n", "O \n", "O X X \n", " \n", "on move: O\n", "O \n", "O X X \n", " X \n", "on move: X\n", "O O \n", "O X X \n", " X \n", "on move: O\n", "O O \n", "O X X \n", "X X \n", "on move: X\n", "O O \n", "O X X \n", "X O X \n", "on move: O\n", "O X O \n", "O X X \n", "X O X \n", "Episode 82, Total Reward: 0\n", "Average Reward: 0.32926829268292684\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " O \n", "O X \n", " X \n", "on move: O\n", " X O \n", "O X \n", " X \n", "on move: X\n", "O X O \n", "O X \n", " X \n", "on move: O\n", "O X O \n", "O X \n", " X X \n", "on move: X\n", "O X O \n", "O O X \n", " X X \n", "on move: O\n", "O X O \n", "O O X \n", "X X X \n", "Episode 83, Total Reward: 1\n", "Average Reward: 0.3373493975903614\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", "O \n", "O X \n", " X \n", "on move: O\n", "O X \n", "O X \n", " X \n", "on move: X\n", "O X \n", "O X \n", "O X \n", "Episode 84, Total Reward: -1\n", "Average Reward: 0.32142857142857145\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " \n", "O X \n", " X O \n", "on move: O\n", " X \n", "O X \n", " X O \n", "on move: X\n", " O X \n", "O X \n", " X O \n", "on move: O\n", " O X \n", "O X X \n", " X O \n", "on move: X\n", " O X \n", "O X X \n", "O X O \n", "on move: O\n", "X O X \n", "O X X \n", "O X O \n", "Episode 85, Total Reward: 0\n", "Average Reward: 0.3176470588235294\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", "O O \n", " \n", " X X \n", "on move: O\n", "O O \n", " X \n", " X X \n", "on move: X\n", "O O \n", "O X \n", " X X \n", "on move: O\n", "O X O \n", "O X \n", " X X \n", "Episode 86, Total Reward: 1\n", "Average Reward: 0.32558139534883723\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X \n", "X O \n", " O \n", "on move: X\n", "X X O \n", "X O \n", " O \n", "on move: O\n", "X X O \n", "X O \n", " O X \n", "on move: X\n", "X X O \n", "X O O \n", " O X \n", "on move: O\n", "X X O \n", "X O O \n", "X O X \n", "Episode 87, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " O \n", " X X \n", "on move: X\n", " \n", "O O \n", " X X \n", "on move: O\n", "X \n", "O O \n", " X X \n", "on move: X\n", "X \n", "O O \n", "O X X \n", "on move: O\n", "X X \n", "O O \n", "O X X \n", "on move: X\n", "X O X \n", "O O \n", "O X X \n", "on move: O\n", "X O X \n", "O O X \n", "O X X \n", "Episode 88, Total Reward: 1\n", "Average Reward: 0.3409090909090909\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", " X O \n", "on move: X\n", " \n", " X \n", "O X O \n", "on move: O\n", "X \n", " X \n", "O X O \n", "on move: X\n", "X \n", " O X \n", "O X O \n", "on move: O\n", "X X \n", " O X \n", "O X O \n", "on move: X\n", "X O X \n", " O X \n", "O X O \n", "on move: O\n", "X O X \n", "X O X \n", "O X O \n", "Episode 89, Total Reward: 0\n", "Average Reward: 0.33707865168539325\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X \n", "O O \n", " X \n", "on move: O\n", "X \n", "O O X \n", " X \n", "on move: X\n", "X O \n", "O O X \n", " X \n", "on move: O\n", "X O \n", "O O X \n", " X X \n", "on move: X\n", "X O \n", "O O X \n", "O X X \n", "on move: O\n", "X O X \n", "O O X \n", "O X X \n", "Episode 90, Total Reward: 1\n", "Average Reward: 0.34444444444444444\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " X O \n", " O \n", "X X \n", "on move: X\n", "O X O \n", " O \n", "X X \n", "on move: O\n", "O X O \n", " X O \n", "X X \n", "on move: X\n", "O X O \n", " X O \n", "X O X \n", "on move: O\n", "O X O \n", "X X O \n", "X O X \n", "Episode 91, Total Reward: 0\n", "Average Reward: 0.34065934065934067\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O O \n", " X \n", "on move: O\n", "X X \n", " O O \n", " X \n", "on move: X\n", "X X \n", " O O \n", "O X \n", "on move: O\n", "X X \n", " O O \n", "O X X \n", "on move: X\n", "X X \n", "O O O \n", "O X X \n", "Episode 92, Total Reward: -1\n", "Average Reward: 0.32608695652173914\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " \n", " X \n", "on move: X\n", "O X \n", "O \n", " X \n", "on move: O\n", "O X \n", "O X \n", " X \n", "Episode 93, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " \n", " O O \n", "on move: O\n", "X X X \n", " \n", " O O \n", "Episode 94, Total Reward: 1\n", "Average Reward: 0.3404255319148936\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", "X \n", " O \n", "on move: X\n", "X \n", "X \n", "O O \n", "on move: O\n", "X X \n", "X \n", "O O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X \n", "X O \n", "O O X \n", "on move: X\n", "X X O \n", "X O \n", "O O X \n", "Episode 95, Total Reward: -1\n", "Average Reward: 0.3263157894736842\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " \n", " O \n", "X X O \n", "on move: O\n", " \n", "X O \n", "X X O \n", "on move: X\n", " O \n", "X O \n", "X X O \n", "Episode 96, Total Reward: -1\n", "Average Reward: 0.3125\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " O X \n", "X \n", " O \n", "on move: O\n", " O X \n", "X \n", " O X \n", "on move: X\n", " O X \n", "X \n", "O O X \n", "on move: O\n", "X O X \n", "X \n", "O O X \n", "on move: X\n", "X O X \n", "X O \n", "O O X \n", "on move: O\n", "X O X \n", "X X O \n", "O O X \n", "Episode 97, Total Reward: 1\n", "Average Reward: 0.31958762886597936\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", "X O \n", "X O \n", " X \n", "on move: X\n", "X O \n", "X O \n", " O X \n", "Episode 98, Total Reward: -1\n", "Average Reward: 0.30612244897959184\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " \n", "X \n", "O X \n", "on move: X\n", "O \n", "X \n", "O X \n", "on move: O\n", "O \n", "X X \n", "O X \n", "on move: X\n", "O O \n", "X X \n", "O X \n", "on move: O\n", "O O \n", "X X \n", "O X X \n", "on move: X\n", "O O O \n", "X X \n", "O X X \n", "Episode 99, Total Reward: -1\n", "Average Reward: 0.29292929292929293\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " X O \n", " X \n", " \n", "on move: X\n", " X O \n", " X \n", " O \n", "on move: O\n", " X O \n", " X \n", " X O \n", "Episode 100, Total Reward: 1\n", "Average Reward: 0.3\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", " O O \n", "on move: O\n", " X X \n", " X \n", " O O \n", "on move: X\n", " X X \n", " X O \n", " O O \n", "on move: O\n", "X X X \n", " X O \n", " O O \n", "Episode 101, Total Reward: 1\n", "Average Reward: 0.3069306930693069\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O O \n", " \n", "on move: O\n", "X X \n", " O O \n", " X \n", "on move: X\n", "X X O \n", " O O \n", " X \n", "on move: O\n", "X X O \n", " O O \n", "X X \n", "on move: X\n", "X X O \n", "O O O \n", "X X \n", "Episode 102, Total Reward: -1\n", "Average Reward: 0.29411764705882354\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", " \n", " O X \n", " X \n", "on move: X\n", "O \n", " O X \n", " X \n", "on move: O\n", "O X \n", " O X \n", " X \n", "on move: X\n", "O X \n", " O X \n", " O X \n", "on move: O\n", "O X \n", "X O X \n", " O X \n", "on move: X\n", "O X \n", "X O X \n", "O O X \n", "on move: O\n", "O X X \n", "X O X \n", "O O X \n", "Episode 103, Total Reward: 1\n", "Average Reward: 0.30097087378640774\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X X \n", " O \n", "on move: X\n", " \n", " X X \n", " O O \n", "on move: O\n", "X \n", " X X \n", " O O \n", "on move: X\n", "X O \n", " X X \n", " O O \n", "on move: O\n", "X O \n", "X X X \n", " O O \n", "Episode 104, Total Reward: 1\n", "Average Reward: 0.3076923076923077\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X X \n", " O \n", " \n", "on move: X\n", " X X \n", " O \n", " O \n", "on move: O\n", "X X X \n", " O \n", " O \n", "Episode 105, Total Reward: 1\n", "Average Reward: 0.3142857142857143\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O X \n", "X \n", " \n", "on move: X\n", " O X \n", "X \n", " O \n", "on move: O\n", " O X \n", "X X \n", " O \n", "on move: X\n", " O X \n", "X X O \n", " O \n", "on move: O\n", " O X \n", "X X O \n", " O X \n", "on move: X\n", " O X \n", "X X O \n", "O O X \n", "on move: O\n", "X O X \n", "X X O \n", "O O X \n", "Episode 106, Total Reward: 1\n", "Average Reward: 0.32075471698113206\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", " X \n", " X X \n", "on move: X\n", "O O \n", " O X \n", " X X \n", "on move: O\n", "O X O \n", " O X \n", " X X \n", "on move: X\n", "O X O \n", " O X \n", "O X X \n", "Episode 107, Total Reward: -1\n", "Average Reward: 0.308411214953271\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", "X \n", "X \n", "on move: X\n", " O \n", "X O \n", "X \n", "on move: O\n", "X O \n", "X O \n", "X \n", "Episode 108, Total Reward: 1\n", "Average Reward: 0.3148148148148148\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X O \n", " X \n", " O \n", "on move: O\n", " X O \n", " X \n", " X O \n", "on move: X\n", " X O \n", "O X \n", " X O \n", "on move: O\n", " X O \n", "O X X \n", " X O \n", "Episode 109, Total Reward: 1\n", "Average Reward: 0.3211009174311927\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", " \n", "X O \n", "on move: X\n", "X \n", " \n", "X O O \n", "on move: O\n", "X \n", " X \n", "X O O \n", "on move: X\n", "X O \n", " X \n", "X O O \n", "on move: O\n", "X O \n", "X X \n", "X O O \n", "Episode 110, Total Reward: 1\n", "Average Reward: 0.32727272727272727\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", "X \n", " \n", " X O \n", "on move: X\n", "X O \n", " \n", " X O \n", "on move: O\n", "X X O \n", " \n", " X O \n", "on move: X\n", "X X O \n", "O \n", " X O \n", "on move: O\n", "X X O \n", "O X \n", " X O \n", "Episode 111, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", " O X \n", "on move: X\n", " \n", " X O \n", " O X \n", "on move: O\n", "X \n", " X O \n", " O X \n", "Episode 112, Total Reward: 1\n", "Average Reward: 0.3392857142857143\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " O X \n", "on move: O\n", " X \n", " \n", " O X \n", "on move: X\n", " X \n", " \n", "O O X \n", "on move: O\n", " X \n", "X \n", "O O X \n", "on move: X\n", " X \n", "X O \n", "O O X \n", "on move: O\n", " X \n", "X X O \n", "O O X \n", "on move: X\n", "O X \n", "X X O \n", "O O X \n", "on move: O\n", "O X X \n", "X X O \n", "O O X \n", "Episode 113, Total Reward: 0\n", "Average Reward: 0.336283185840708\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " X \n", " O \n", "on move: O\n", "O X \n", " X \n", " X O \n", "on move: X\n", "O O X \n", " X \n", " X O \n", "on move: O\n", "O O X \n", " X X \n", " X O \n", "on move: X\n", "O O X \n", " X X \n", "O X O \n", "on move: O\n", "O O X \n", "X X X \n", "O X O \n", "Episode 114, Total Reward: 1\n", "Average Reward: 0.34210526315789475\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", "X \n", "O \n", " X \n", "on move: X\n", "X \n", "O O \n", " X \n", "on move: O\n", "X X \n", "O O \n", " X \n", "on move: X\n", "X X O \n", "O O \n", " X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O \n", "X O X \n", "on move: O\n", "X X O \n", "O O X \n", "X O X \n", "Episode 115, Total Reward: 0\n", "Average Reward: 0.3391304347826087\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", " O \n", "O \n", " X X \n", "on move: O\n", "X O \n", "O \n", " X X \n", "on move: X\n", "X O \n", "O O \n", " X X \n", "on move: O\n", "X O \n", "O O \n", "X X X \n", "Episode 116, Total Reward: 1\n", "Average Reward: 0.3448275862068966\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", " X O \n", "X O \n", "X O \n", "Episode 117, Total Reward: -1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " X \n", "O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X X \n", "O O \n", " X \n", "on move: X\n", " X X \n", "O O \n", " O X \n", "on move: O\n", " X X \n", "O O \n", "X O X \n", "on move: X\n", "O X X \n", "O O \n", "X O X \n", "on move: O\n", "O X X \n", "O X O \n", "X O X \n", "Episode 118, Total Reward: 1\n", "Average Reward: 0.3389830508474576\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " \n", " O \n", "X O X \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", "O X \n", " O \n", "X O X \n", "on move: O\n", "O X \n", "X O \n", "X O X \n", "on move: X\n", "O X \n", "X O O \n", "X O X \n", "on move: O\n", "O X X \n", "X O O \n", "X O X \n", "Episode 119, Total Reward: 0\n", "Average Reward: 0.33613445378151263\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O \n", "X O \n", " X \n", "on move: O\n", "O X \n", "X O \n", " X \n", "on move: X\n", "O X \n", "X O \n", " O X \n", "on move: O\n", "O X \n", "X O X \n", " O X \n", "on move: X\n", "O X O \n", "X O X \n", " O X \n", "on move: O\n", "O X O \n", "X O X \n", "X O X \n", "Episode 120, Total Reward: 0\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", "O X \n", " X \n", "on move: O\n", "O \n", "O X \n", "X X \n", "on move: X\n", "O \n", "O X \n", "X X O \n", "on move: O\n", "O X \n", "O X \n", "X X O \n", "Episode 121, Total Reward: 1\n", "Average Reward: 0.33884297520661155\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", "O X \n", "O \n", "on move: O\n", "X X \n", "O X \n", "O \n", "on move: X\n", "X X \n", "O O X \n", "O \n", "on move: O\n", "X X \n", "O O X \n", "O X \n", "on move: X\n", "X X \n", "O O X \n", "O X O \n", "on move: O\n", "X X X \n", "O O X \n", "O X O \n", "Episode 122, Total Reward: 1\n", "Average Reward: 0.3442622950819672\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", " O X \n", "on move: X\n", " \n", " X \n", "O O X \n", "on move: O\n", "X \n", " X \n", "O O X \n", "Episode 123, Total Reward: 1\n", "Average Reward: 0.34959349593495936\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X \n", "O X \n", " \n", "on move: X\n", "X \n", "O X O \n", " \n", "on move: O\n", "X \n", "O X O \n", " X \n", "on move: X\n", "X \n", "O X O \n", "O X \n", "on move: O\n", "X \n", "O X O \n", "O X X \n", "Episode 124, Total Reward: 1\n", "Average Reward: 0.3548387096774194\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X \n", "O O \n", "X X \n", "on move: X\n", " X \n", "O O \n", "X O X \n", "on move: O\n", " X \n", "O X O \n", "X O X \n", "on move: X\n", "O X \n", "O X O \n", "X O X \n", "on move: O\n", "O X X \n", "O X O \n", "X O X \n", "Episode 125, Total Reward: 1\n", "Average Reward: 0.36\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", "O \n", "on move: O\n", "X \n", " \n", "O X \n", "on move: X\n", "X \n", " \n", "O O X \n", "on move: O\n", "X \n", " X \n", "O O X \n", "Episode 126, Total Reward: 1\n", "Average Reward: 0.36507936507936506\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", "O \n", " X \n", "X O \n", "on move: O\n", "O \n", "X X \n", "X O \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O \n", "X X X \n", "X O \n", "Episode 127, Total Reward: 1\n", "Average Reward: 0.3700787401574803\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", "O \n", "X \n", "X O \n", "on move: O\n", "O \n", "X X \n", "X O \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O \n", "X X \n", "X O X \n", "on move: X\n", "O O O \n", "X X \n", "X O X \n", "Episode 128, Total Reward: -1\n", "Average Reward: 0.359375\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X \n", "X O \n", " O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X \n", "X O \n", "O X O \n", "on move: X\n", "X X \n", "X O O \n", "O X O \n", "on move: O\n", "X X X \n", "X O O \n", "O X O \n", "Episode 129, Total Reward: 1\n", "Average Reward: 0.3643410852713178\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " O \n", "X \n", "X O \n", "on move: O\n", " X O \n", "X \n", "X O \n", "on move: X\n", " X O \n", "X \n", "X O O \n", "on move: O\n", " X O \n", "X X \n", "X O O \n", "on move: X\n", " X O \n", "X X O \n", "X O O \n", "Episode 130, Total Reward: -1\n", "Average Reward: 0.35384615384615387\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", "O X \n", " X \n", "on move: O\n", "O X \n", "O X \n", " X \n", "Episode 131, Total Reward: 1\n", "Average Reward: 0.35877862595419846\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " O \n", " X \n", "X O \n", "on move: O\n", " O \n", "X X \n", "X O \n", "on move: X\n", " O \n", "X X O \n", "X O \n", "on move: O\n", "X O \n", "X X O \n", "X O \n", "Episode 132, Total Reward: 1\n", "Average Reward: 0.36363636363636365\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X O \n", " \n", " \n", "on move: O\n", " X O \n", " \n", "X \n", "on move: X\n", " X O \n", " O \n", "X \n", "on move: O\n", "X X O \n", " O \n", "X \n", "on move: X\n", "X X O \n", " O \n", "X O \n", "on move: O\n", "X X O \n", "X O \n", "X O \n", "Episode 133, Total Reward: 1\n", "Average Reward: 0.3684210526315789\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", " X \n", "X \n", "O O X \n", "on move: X\n", "O X \n", "X \n", "O O X \n", "on move: O\n", "O X \n", "X X \n", "O O X \n", "on move: X\n", "O X O \n", "X X \n", "O O X \n", "on move: O\n", "O X O \n", "X X X \n", "O O X \n", "Episode 134, Total Reward: 1\n", "Average Reward: 0.373134328358209\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", "X \n", " \n", "on move: X\n", "O X \n", "X \n", " O \n", "on move: O\n", "O X \n", "X X \n", " O \n", "on move: X\n", "O X \n", "X X O \n", " O \n", "on move: O\n", "O X \n", "X X O \n", "X O \n", "on move: X\n", "O X O \n", "X X O \n", "X O \n", "on move: O\n", "O X O \n", "X X O \n", "X O X \n", "Episode 135, Total Reward: 0\n", "Average Reward: 0.37037037037037035\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " \n", " O O \n", "on move: O\n", "X X X \n", " \n", " O O \n", "Episode 136, Total Reward: 1\n", "Average Reward: 0.375\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", "X \n", " \n", "X O \n", "on move: X\n", "X \n", "O \n", "X O \n", "on move: O\n", "X \n", "O X \n", "X O \n", "on move: X\n", "X O \n", "O X \n", "X O \n", "on move: O\n", "X X O \n", "O X \n", "X O \n", "on move: X\n", "X X O \n", "O X O \n", "X O \n", "on move: O\n", "X X O \n", "O X O \n", "X O X \n", "Episode 137, Total Reward: 1\n", "Average Reward: 0.3795620437956204\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O O \n", " \n", " X \n", "on move: O\n", "X O O \n", " X \n", " X \n", "on move: X\n", "X O O \n", " X \n", "O X \n", "on move: O\n", "X O O \n", "X X \n", "O X \n", "on move: X\n", "X O O \n", "X X \n", "O X O \n", "on move: O\n", "X O O \n", "X X X \n", "O X O \n", "Episode 138, Total Reward: 1\n", "Average Reward: 0.38405797101449274\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", "O X \n", " X \n", " O \n", "on move: O\n", "O X \n", " X X \n", " O \n", "on move: X\n", "O X \n", " X X \n", "O O \n", "on move: O\n", "O X X \n", " X X \n", "O O \n", "on move: X\n", "O X X \n", "O X X \n", "O O \n", "Episode 139, Total Reward: -1\n", "Average Reward: 0.37410071942446044\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " X \n", " \n", " X O \n", "on move: X\n", " X O \n", " \n", " X O \n", "on move: O\n", "X X O \n", " \n", " X O \n", "on move: X\n", "X X O \n", "O \n", " X O \n", "on move: O\n", "X X O \n", "O \n", "X X O \n", "on move: X\n", "X X O \n", "O O \n", "X X O \n", "on move: O\n", "X X O \n", "O O X \n", "X X O \n", "Episode 140, Total Reward: 0\n", "Average Reward: 0.37142857142857144\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", "O \n", "on move: O\n", "X X \n", " O \n", "O X \n", "on move: X\n", "X X \n", " O O \n", "O X \n", "on move: O\n", "X X X \n", " O O \n", "O X \n", "Episode 141, Total Reward: 1\n", "Average Reward: 0.375886524822695\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", "O X \n", " O \n", "on move: O\n", " X X \n", "O X \n", " O \n", "on move: X\n", "O X X \n", "O X \n", " O \n", "on move: O\n", "O X X \n", "O X X \n", " O \n", "on move: X\n", "O X X \n", "O X X \n", "O O \n", "Episode 142, Total Reward: -1\n", "Average Reward: 0.36619718309859156\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", " O \n", " \n", "O X X \n", "on move: O\n", " O \n", " X \n", "O X X \n", "on move: X\n", " O O \n", " X \n", "O X X \n", "on move: O\n", "X O O \n", " X \n", "O X X \n", "on move: X\n", "X O O \n", " O X \n", "O X X \n", "Episode 143, Total Reward: -1\n", "Average Reward: 0.35664335664335667\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O \n", "X \n", "on move: X\n", " X \n", "O O \n", "X \n", "on move: O\n", " X \n", "O O \n", "X X \n", "on move: X\n", " X O \n", "O O \n", "X X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O \n", "X X O \n", "on move: O\n", "X X O \n", "O O X \n", "X X O \n", "Episode 144, Total Reward: 0\n", "Average Reward: 0.3541666666666667\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", "O \n", "on move: O\n", "X X \n", "X O \n", "O \n", "on move: X\n", "X X \n", "X O O \n", "O \n", "on move: O\n", "X X X \n", "X O O \n", "O \n", "Episode 145, Total Reward: 1\n", "Average Reward: 0.3586206896551724\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O X \n", " \n", "X \n", "on move: X\n", "O X \n", "O \n", "X \n", "on move: O\n", "O X \n", "O \n", "X X \n", "on move: X\n", "O O X \n", "O \n", "X X \n", "on move: O\n", "O O X \n", "O X \n", "X X \n", "Episode 146, Total Reward: 1\n", "Average Reward: 0.363013698630137\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X X \n", " O \n", " \n", "on move: X\n", "O X X \n", " O \n", " \n", "on move: O\n", "O X X \n", " O \n", " X \n", "on move: X\n", "O X X \n", " O \n", " O X \n", "on move: O\n", "O X X \n", " X O \n", " O X \n", "on move: X\n", "O X X \n", "O X O \n", " O X \n", "on move: O\n", "O X X \n", "O X O \n", "X O X \n", "Episode 147, Total Reward: 1\n", "Average Reward: 0.3673469387755102\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O \n", "X \n", "on move: X\n", " X \n", "O O \n", "X \n", "on move: O\n", " X \n", "O O \n", "X X \n", "on move: X\n", "O X \n", "O O \n", "X X \n", "on move: O\n", "O X \n", "O O \n", "X X X \n", "Episode 148, Total Reward: 1\n", "Average Reward: 0.3716216216216216\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X X \n", " O \n", "on move: X\n", " \n", " X X \n", "O O \n", "on move: O\n", " \n", " X X \n", "O X O \n", "on move: X\n", "O \n", " X X \n", "O X O \n", "on move: O\n", "O \n", "X X X \n", "O X O \n", "Episode 149, Total Reward: 1\n", "Average Reward: 0.37583892617449666\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O X \n", " \n", "on move: X\n", " X \n", " O X \n", " O \n", "on move: O\n", " X \n", "X O X \n", " O \n", "on move: X\n", " X \n", "X O X \n", "O O \n", "on move: O\n", " X \n", "X O X \n", "O O X \n", "Episode 150, Total Reward: 1\n", "Average Reward: 0.38\n" ] } ], "source": [ "# Основной цикл обучения (работа с отдельным классом агента)\n", "\n", "# Создание среды для игры в крестики-нолики\n", "environment = TicTacToeEnv()\n", "\n", "# Создание агента (играющего крестиками)\n", "agent = Agent(symbol=1)\n", "\n", "num_episodes = 150 # Количество эпизодов (игр) для обучения\n", "collected_rewards = [] # Список для хранения наград/побед в каждом эпизоде \n", "\n", "# Переменная для отслеживания символа и текущего игрока\n", "oom = 1\n", "\n", "for i in range(num_episodes):\n", " # Сброс среды и начало нового эпизода\n", " state, _ = environment.reset() \n", "\n", " # Общая награда за эпизод\n", " total_reward = 0\n", "\n", " # Флаг завершения игры\n", " done = False\n", " om = oom \n", "\n", " # Максимум 9 ходов, поскольку поле 3x3 \n", " for j in range(9): \n", " moves = environment.move_generator() \n", "\n", " # Ходов нет, заканчиваем игру\n", " if not moves:\n", " break\n", "\n", " \n", " if len(moves) == 1:\n", " move = moves[0] # Если остался один ход на основе стратегии\n", " else:\n", " move = agent.get_action(moves) # Агент выбирает ход на основе стратегии\n", "\n", " # Выполнение хода и обновление состояния игры\n", " next_state, reward, done, info = environment.step(move)\n", " total_reward += reward\n", " state = next_state\n", "\n", " # Отображаем текущее состояние игры\n", " environment.render()\n", "\n", " if done:\n", " break\n", "\n", " om = -om # Смена игрока\n", "\n", " collected_rewards.append(total_reward)\n", "\n", " print(f\"Episode {i+1}, Total Reward: {total_reward}\")\n", " average_reward = sum(collected_rewards) / len(collected_rewards)\n", " print(f\"Average Reward: {average_reward}\")" ] } ], "metadata": { "kernelspec": { "display_name": "mai", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 2 }