diff --git a/lab_6/lab6.ipynb b/lab_6/lab6.ipynb new file mode 100644 index 0000000..f5ce264 --- /dev/null +++ b/lab_6/lab6.ipynb @@ -0,0 +1,5144 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Начинаем работу... \n", + "\n", + "Необходимо развернуть и запустить проект по реализации обучения с подкреплением для игры \"Крестики-нолики\" \n", + "\n", + "Перевести проект на библиотеку gymnasium и современную версию Python. Реализовать агена для игры \"Крестики-нолики\" ы ымдк отдельного класса (как в лекции). Переписать основной цикл обучения для работы с отдельным классом агента (как в лекции). Протестировать новую версию программы. \n", + "\n", + "**Обучение с подкреплением** (Reinforcement Learning, RL) — это метод машинного обучения, при котором агент обучается путем взаимодействия с окружающей средой. Цель агента — максимизировать получаемую награду через последовательность действий.\n", + "\n", + "\n", + "\n", + "Крестики-нолики: https://github.com/nczempin/gym-tic-tac-toe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Перевод среды на Gymnasium \n", + "\n", + "**Gymnasium (ранее OpenAI Gym)** — это библиотека для создания и тестирования алгоритмов обучения с подкреплением. Она предоставляет набор стандартных сред для RL, таких как игры, задачи управления и симуляции. \n", + "\n", + "Gymnasium предлагает обновленную API по сравнению с Gym: \n", + "Упростились методы reset и step. Добавлены новые возможности, такие как поддержка тайм-аутов и контроль шагов. Улучшена поддержка пользовательских сред. \n", + "\n", + "*Основные возможности Gymnasium:* \n", + "Стандартизированные интерфейсы для взаимодействия со средой. Простая интеграция с популярными библиотеками RL. Поддержка пользовательских сред. \n", + "\n", + "*Ключевые функции Gymnasium:* \n", + "env.reset() — инициализация среды. \n", + "env.step(action) — выполнение действия и переход в новое состояние. \n", + "env.render() — визуализация текущего состояния среды. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "from gymnasium import spaces\n", + "\n", + "class TicTacToeEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + " \n", + " symbols = ['O', ' ', 'X']\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.action_space = spaces.Discrete(9)\n", + " self.observation_space = spaces.Discrete(9 * 3 * 2)\n", + " self.reset()\n", + "\n", + " def step(self, action):\n", + " done = False\n", + " reward = 0\n", + "\n", + " p, square = action # p - игрок (1 или -1), square - номер клетки\n", + "\n", + " board = self.state['board']\n", + " proposed = board[square] \n", + " om = self.state['on_move'] \n", + " if proposed != 0: # Клетка уже занята\n", + " print(f\"Незаконный ход: Квадрат {square} уже занят.\")\n", + " done = True\n", + " reward = -1 * om \n", + " if p != om: # Не тот игрок на ходу\n", + " print(f\"Незаконный ход: игрок {p} не находится в движении\")\n", + " done = True\n", + " reward = -1 * om\n", + " else:\n", + " board[square] = p\n", + " self.state['on_move'] = -p\n", + "\n", + " for i in range(3):\n", + " # Горизонтали и вертикали\n", + " if (board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) or \\\n", + " (board[i] == p and board[i + 3] == p and board[i + 6] == p):\n", + " reward = p\n", + " done = True\n", + " break\n", + "\n", + " # Диагонали\n", + " if (board[0] == p and board[4] == p and board[8] == p) or \\\n", + " (board[2] == p and board[4] == p and board[6] == p):\n", + " reward = p\n", + " done = True\n", + " \n", + " return self.state, reward, done, {}\n", + "\n", + " def reset(self):\n", + " self.state = {}\n", + " self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] \n", + " self.state['on_move'] = 1 \n", + " return self.state, {}\n", + "\n", + " def render(self, close=False):\n", + " if close:\n", + " return\n", + " print(\"on move: \" , self.symbols[self.state['on_move']+1])\n", + " for i in range (9):\n", + " print (self.symbols[self.state['board'][i]+1], end=\" \");\n", + " if ((i % 3) == 2):\n", + " print();\n", + "\n", + " def move_generator(self):\n", + " moves = []\n", + " for i in range(9):\n", + " if self.state['board'][i] == 0:\n", + " p = self.state['on_move']\n", + " m = [p, i]\n", + " moves.append(m)\n", + " return moves" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Реализация агента \n", + "\n", + "Агент в контексте обучения с подкреплением (Reinforcement Learning, RL) — это система, которая взаимодействует с окружающей средой, чтобы достичь определенной цели. Его задача — выработать стратегию, которая максимизирует получаемую награду в долгосрочной перспективе. \n", + "\n", + "Роль агента: \n", + "Агент принимает решение (выбирает действие), основываясь на текущем состоянии среды, и затем получает обратную связь (награду и новое состояние) от среды. \n", + "\n", + "Функционал агента: \n", + "Выбор действия: Использует алгоритмы или стратегии, чтобы определить, что делать дальше. \n", + "Обучение: Обновляет свои знания или стратегию на основе опыта, чтобы лучше справляться с задачей. \n", + "Интерактивность: Адаптируется к изменениям в среде. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "# Реализация Агента, который в рамках обучения с подкреплением взаимодействует со средой и вырабатывает наилучшую стратегию \n", + "\n", + "class Agent:\n", + " def __init__(self, symbol):\n", + " self.symbol = symbol # Символ игрока (1 - X, -1 - O)\n", + " \n", + " def get_action(self, moves):\n", + " return random.choice(moves) # Выбираем случайный ход из доступных" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Основной цикл обучения \n", + "\n", + "Основной цикл включает в себя создание среды и работу с ней. По истечению выполнения игры, начисляются очки, которые будут свидетельствовать о том, какие результаты достиг наш агент, обучилась ли наша система. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + " X X \n", + " O \n", + " O \n", + "on move: O\n", + " X X \n", + " O \n", + "X O \n", + "on move: X\n", + "O X X \n", + " O \n", + "X O \n", + "on move: O\n", + "O X X \n", + " X O \n", + "X O \n", + "Episode 1, Total Reward: 1\n", + "Average Reward: 1.0\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " \n", + "O O \n", + "on move: O\n", + "X X \n", + "X \n", + "O O \n", + "on move: X\n", + "X O X \n", + "X \n", + "O O \n", + "on move: O\n", + "X O X \n", + "X \n", + "O X O \n", + "on move: X\n", + "X O X \n", + "X O \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X O X \n", + "O X O \n", + "Episode 2, Total Reward: 0\n", + "Average Reward: 0.5\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + " O \n", + "O X \n", + "X X \n", + "on move: X\n", + " O \n", + "O X \n", + "X X O \n", + "on move: O\n", + " O X \n", + "O X \n", + "X X O \n", + "Episode 3, Total Reward: 1\n", + "Average Reward: 0.6666666666666666\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + "X X \n", + " \n", + "on move: X\n", + "O \n", + "X X \n", + " O \n", + "on move: O\n", + "O X \n", + "X X \n", + " O \n", + "on move: X\n", + "O X \n", + "X X \n", + " O O \n", + "on move: O\n", + "O X X \n", + "X X \n", + " O O \n", + "on move: X\n", + "O X X \n", + "X X \n", + "O O O \n", + "Episode 4, Total Reward: -1\n", + "Average Reward: 0.25\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " O X \n", + " O \n", + "on move: O\n", + " X \n", + " O X \n", + " O X \n", + "on move: X\n", + " X \n", + "O O X \n", + " O X \n", + "on move: O\n", + " X \n", + "O O X \n", + "X O X \n", + "on move: X\n", + "O X \n", + "O O X \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "O O X \n", + "X O X \n", + "Episode 5, Total Reward: 1\n", + "Average Reward: 0.4\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + "O X \n", + "X \n", + "on move: O\n", + " O X \n", + "O X \n", + "X \n", + "Episode 6, Total Reward: 1\n", + "Average Reward: 0.5\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + " X X \n", + "on move: X\n", + " \n", + " O O \n", + " X X \n", + "on move: O\n", + " X \n", + " O O \n", + " X X \n", + "on move: X\n", + " X \n", + "O O O \n", + " X X \n", + "Episode 7, Total Reward: -1\n", + "Average Reward: 0.2857142857142857\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + " \n", + "X X \n", + "O O \n", + "on move: O\n", + " \n", + "X X \n", + "O O X \n", + "on move: X\n", + " \n", + "X O X \n", + "O O X \n", + "on move: O\n", + " X \n", + "X O X \n", + "O O X \n", + "Episode 8, Total Reward: 1\n", + "Average Reward: 0.375\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + "O X \n", + " O \n", + " X \n", + "on move: O\n", + "O X \n", + " O X \n", + " X \n", + "Episode 9, Total Reward: 1\n", + "Average Reward: 0.4444444444444444\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " X O \n", + " X \n", + " \n", + "on move: X\n", + " X O \n", + " X \n", + " O \n", + "on move: O\n", + " X O \n", + " X \n", + " X O \n", + "on move: X\n", + " X O \n", + "O X \n", + " X O \n", + "on move: O\n", + " X O \n", + "O X \n", + "X X O \n", + "on move: X\n", + " X O \n", + "O O X \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 10, Total Reward: 0\n", + "Average Reward: 0.4\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + " O \n", + "X X X \n", + "Episode 11, Total Reward: 1\n", + "Average Reward: 0.45454545454545453\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O \n", + "O \n", + "X \n", + "on move: O\n", + "X O \n", + "O X \n", + "X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X \n", + "on move: X\n", + "X O O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X X O \n", + "Episode 12, Total Reward: 0\n", + "Average Reward: 0.4166666666666667\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O O \n", + " X \n", + "X \n", + "on move: O\n", + "X O O \n", + " X \n", + "X \n", + "on move: X\n", + "X O O \n", + " X O \n", + "X \n", + "on move: O\n", + "X O O \n", + " X O \n", + "X X \n", + "Episode 13, Total Reward: 1\n", + "Average Reward: 0.46153846153846156\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " X \n", + " \n", + " X O \n", + "on move: X\n", + " O X \n", + " \n", + " X O \n", + "on move: O\n", + " O X \n", + "X \n", + " X O \n", + "on move: X\n", + " O X \n", + "X \n", + "O X O \n", + "on move: O\n", + " O X \n", + "X X \n", + "O X O \n", + "on move: X\n", + "O O X \n", + "X X \n", + "O X O \n", + "on move: O\n", + "O O X \n", + "X X X \n", + "O X O \n", + "Episode 14, Total Reward: 1\n", + "Average Reward: 0.5\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + " O O \n", + "on move: O\n", + " X \n", + " X \n", + "X O O \n", + "on move: X\n", + "O X \n", + " X \n", + "X O O \n", + "on move: O\n", + "O X \n", + "X X \n", + "X O O \n", + "on move: X\n", + "O X \n", + "X X O \n", + "X O O \n", + "on move: O\n", + "O X X \n", + "X X O \n", + "X O O \n", + "Episode 15, Total Reward: 1\n", + "Average Reward: 0.5333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + "X O \n", + "X O \n", + " X \n", + "on move: X\n", + "X O \n", + "X O O \n", + " X \n", + "on move: O\n", + "X O \n", + "X O O \n", + "X X \n", + "Episode 16, Total Reward: 1\n", + "Average Reward: 0.5625\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + " O \n", + "X O \n", + "on move: O\n", + " X X \n", + " O \n", + "X O \n", + "on move: X\n", + " X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X X \n", + "O O \n", + "X O \n", + "Episode 17, Total Reward: 1\n", + "Average Reward: 0.5882352941176471\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + " O \n", + "on move: O\n", + "O X \n", + " X \n", + "X O \n", + "on move: X\n", + "O O X \n", + " X \n", + "X O \n", + "on move: O\n", + "O O X \n", + " X \n", + "X O X \n", + "Episode 18, Total Reward: 1\n", + "Average Reward: 0.6111111111111112\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " X \n", + "O \n", + "X \n", + "on move: X\n", + " X \n", + "O O \n", + "X \n", + "on move: O\n", + " X X \n", + "O O \n", + "X \n", + "on move: X\n", + " X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X X \n", + "O O \n", + "X O \n", + "Episode 19, Total Reward: 1\n", + "Average Reward: 0.631578947368421\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " X \n", + "O X \n", + " \n", + "on move: X\n", + " X \n", + "O X O \n", + " \n", + "on move: O\n", + " X \n", + "O X O \n", + " X \n", + "on move: X\n", + "O X \n", + "O X O \n", + " X \n", + "on move: O\n", + "O X \n", + "O X O \n", + "X X \n", + "on move: X\n", + "O X \n", + "O X O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "O X O \n", + "X O X \n", + "Episode 20, Total Reward: 1\n", + "Average Reward: 0.65\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + " X X \n", + "on move: X\n", + "O \n", + " O \n", + " X X \n", + "on move: O\n", + "O X \n", + " O \n", + " X X \n", + "on move: X\n", + "O X \n", + " O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + " O \n", + "O X X \n", + "on move: X\n", + "O X X \n", + " O O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 21, Total Reward: 0\n", + "Average Reward: 0.6190476190476191\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + "X \n", + "on move: X\n", + " \n", + " O X \n", + "X O \n", + "on move: O\n", + " X \n", + " O X \n", + "X O \n", + "on move: X\n", + " X \n", + " O X \n", + "X O O \n", + "on move: O\n", + " X \n", + "X O X \n", + "X O O \n", + "on move: X\n", + " O X \n", + "X O X \n", + "X O O \n", + "Episode 22, Total Reward: -1\n", + "Average Reward: 0.5454545454545454\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X X \n", + "X O \n", + "O \n", + "on move: X\n", + " X X \n", + "X O O \n", + "O \n", + "on move: O\n", + "X X X \n", + "X O O \n", + "O \n", + "Episode 23, Total Reward: 1\n", + "Average Reward: 0.5652173913043478\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + "O \n", + " X \n", + "on move: O\n", + "O X \n", + "O \n", + " X X \n", + "on move: X\n", + "O X \n", + "O O \n", + " X X \n", + "on move: O\n", + "O X X \n", + "O O \n", + " X X \n", + "on move: X\n", + "O X X \n", + "O O O \n", + " X X \n", + "Episode 24, Total Reward: -1\n", + "Average Reward: 0.5\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O \n", + " O \n", + "X \n", + "on move: O\n", + "X O \n", + "X O \n", + "X \n", + "Episode 25, Total Reward: 1\n", + "Average Reward: 0.52\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + "X \n", + "O \n", + " X \n", + "on move: X\n", + "X \n", + "O \n", + " X O \n", + "on move: O\n", + "X X \n", + "O \n", + " X O \n", + "on move: X\n", + "X O X \n", + "O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "O \n", + "X X O \n", + "on move: X\n", + "X O X \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "X X O \n", + "Episode 26, Total Reward: 1\n", + "Average Reward: 0.5384615384615384\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + "O \n", + "X O \n", + " X \n", + "on move: O\n", + "O \n", + "X O \n", + " X X \n", + "on move: X\n", + "O \n", + "X O O \n", + " X X \n", + "on move: O\n", + "O X \n", + "X O O \n", + " X X \n", + "on move: X\n", + "O X \n", + "X O O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 27, Total Reward: 0\n", + "Average Reward: 0.5185185185185185\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " O X \n", + "on move: O\n", + " \n", + " X \n", + " O X \n", + "on move: X\n", + "O \n", + " X \n", + " O X \n", + "on move: O\n", + "O \n", + " X \n", + "X O X \n", + "on move: X\n", + "O \n", + " O X \n", + "X O X \n", + "on move: O\n", + "O X \n", + " O X \n", + "X O X \n", + "Episode 28, Total Reward: 1\n", + "Average Reward: 0.5357142857142857\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X \n", + " X \n", + " O \n", + "on move: X\n", + "X \n", + " X O \n", + " O \n", + "on move: O\n", + "X \n", + "X X O \n", + " O \n", + "on move: X\n", + "X O \n", + "X X O \n", + " O \n", + "on move: O\n", + "X X O \n", + "X X O \n", + " O \n", + "on move: X\n", + "X X O \n", + "X X O \n", + " O O \n", + "Episode 29, Total Reward: -1\n", + "Average Reward: 0.4827586206896552\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + " \n", + "X O \n", + "on move: O\n", + "O X \n", + " \n", + "X X O \n", + "on move: X\n", + "O O X \n", + " \n", + "X X O \n", + "on move: O\n", + "O O X \n", + " X \n", + "X X O \n", + "Episode 30, Total Reward: 1\n", + "Average Reward: 0.5\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O \n", + " X O \n", + " X X \n", + "on move: X\n", + "O \n", + " X O \n", + "O X X \n", + "on move: O\n", + "O X \n", + " X O \n", + "O X X \n", + "on move: X\n", + "O X \n", + "O X O \n", + "O X X \n", + "Episode 31, Total Reward: -1\n", + "Average Reward: 0.45161290322580644\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O X \n", + " O X \n", + "on move: O\n", + " X \n", + "O X \n", + " O X \n", + "Episode 32, Total Reward: 1\n", + "Average Reward: 0.46875\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + "O \n", + " X X \n", + "O \n", + "on move: O\n", + "O X \n", + " X X \n", + "O \n", + "on move: X\n", + "O X \n", + "O X X \n", + "O \n", + "Episode 33, Total Reward: -1\n", + "Average Reward: 0.42424242424242425\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + " X O \n", + "on move: O\n", + " X \n", + "X O \n", + " X O \n", + "on move: X\n", + "O X \n", + "X O \n", + " X O \n", + "Episode 34, Total Reward: -1\n", + "Average Reward: 0.38235294117647056\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + "X \n", + " X \n", + "O \n", + "on move: X\n", + "X O \n", + " X \n", + "O \n", + "on move: O\n", + "X O \n", + " X \n", + "O X \n", + "on move: X\n", + "X O \n", + "O X \n", + "O X \n", + "on move: O\n", + "X O \n", + "O X X \n", + "O X \n", + "on move: X\n", + "X O \n", + "O X X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 35, Total Reward: 1\n", + "Average Reward: 0.4\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O \n", + " O \n", + "X \n", + "on move: O\n", + "X O X \n", + " O \n", + "X \n", + "on move: X\n", + "X O X \n", + " O O \n", + "X \n", + "on move: O\n", + "X O X \n", + " O O \n", + "X X \n", + "on move: X\n", + "X O X \n", + " O O \n", + "X O X \n", + "Episode 36, Total Reward: -1\n", + "Average Reward: 0.3611111111111111\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X O \n", + " \n", + "on move: O\n", + "X O \n", + " X O \n", + " X \n", + "on move: X\n", + "X O \n", + "O X O \n", + " X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + " X \n", + "on move: X\n", + "X O X \n", + "O X O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "X X O \n", + "Episode 37, Total Reward: 1\n", + "Average Reward: 0.3783783783783784\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " X O \n", + " X \n", + " \n", + "on move: X\n", + " X O \n", + " X O \n", + " \n", + "on move: O\n", + " X O \n", + "X X O \n", + " \n", + "on move: X\n", + "O X O \n", + "X X O \n", + " \n", + "on move: O\n", + "O X O \n", + "X X O \n", + " X \n", + "Episode 38, Total Reward: 1\n", + "Average Reward: 0.39473684210526316\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + "O \n", + "on move: O\n", + "X O \n", + " X \n", + "O X \n", + "Episode 39, Total Reward: 1\n", + "Average Reward: 0.41025641025641024\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + " \n", + "X O O \n", + "on move: O\n", + " X \n", + "X \n", + "X O O \n", + "on move: X\n", + " X \n", + "X O \n", + "X O O \n", + "on move: O\n", + " X \n", + "X O X \n", + "X O O \n", + "on move: X\n", + " O X \n", + "X O X \n", + "X O O \n", + "Episode 40, Total Reward: -1\n", + "Average Reward: 0.375\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O O \n", + " X \n", + " X \n", + "on move: O\n", + " O O \n", + " X \n", + " X X \n", + "on move: X\n", + " O O \n", + " X O \n", + " X X \n", + "on move: O\n", + "X O O \n", + " X O \n", + " X X \n", + "Episode 41, Total Reward: 1\n", + "Average Reward: 0.3902439024390244\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O X \n", + " X \n", + "on move: X\n", + " \n", + " O X \n", + "O X \n", + "on move: O\n", + " X \n", + " O X \n", + "O X \n", + "Episode 42, Total Reward: 1\n", + "Average Reward: 0.40476190476190477\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + " X O \n", + "on move: O\n", + " X X \n", + " O \n", + " X O \n", + "on move: X\n", + "O X X \n", + " O \n", + " X O \n", + "Episode 43, Total Reward: -1\n", + "Average Reward: 0.37209302325581395\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + " O \n", + "O X \n", + " X X \n", + "on move: X\n", + " O \n", + "O X O \n", + " X X \n", + "on move: O\n", + " O X \n", + "O X O \n", + " X X \n", + "on move: X\n", + " O X \n", + "O X O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "O X X \n", + "Episode 44, Total Reward: 1\n", + "Average Reward: 0.38636363636363635\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + " \n", + "O X X \n", + "O \n", + "on move: O\n", + " X \n", + "O X X \n", + "O \n", + "on move: X\n", + "O X \n", + "O X X \n", + "O \n", + "Episode 45, Total Reward: -1\n", + "Average Reward: 0.35555555555555557\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X \n", + "X O \n", + " O \n", + "on move: O\n", + "X X \n", + "X O \n", + " O \n", + "on move: X\n", + "X X \n", + "X O O \n", + " O \n", + "on move: O\n", + "X X \n", + "X O O \n", + "X O \n", + "Episode 46, Total Reward: 1\n", + "Average Reward: 0.3695652173913043\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " \n", + " X O \n", + "X \n", + "on move: X\n", + " \n", + " X O \n", + "X O \n", + "on move: O\n", + "X \n", + " X O \n", + "X O \n", + "on move: X\n", + "X O \n", + " X O \n", + "X O \n", + "on move: O\n", + "X O X \n", + " X O \n", + "X O \n", + "Episode 47, Total Reward: 1\n", + "Average Reward: 0.3829787234042553\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O O \n", + " \n", + "X X \n", + "on move: O\n", + " O O \n", + "X \n", + "X X \n", + "on move: X\n", + " O O \n", + "X O \n", + "X X \n", + "on move: O\n", + "X O O \n", + "X O \n", + "X X \n", + "Episode 48, Total Reward: 1\n", + "Average Reward: 0.3958333333333333\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + " X \n", + "on move: X\n", + " O \n", + " O X \n", + " X \n", + "on move: O\n", + " O X \n", + " O X \n", + " X \n", + "Episode 49, Total Reward: 1\n", + "Average Reward: 0.40816326530612246\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + "X \n", + " \n", + "X O \n", + "on move: X\n", + "X \n", + " \n", + "X O O \n", + "on move: O\n", + "X X \n", + " \n", + "X O O \n", + "on move: X\n", + "X X \n", + " O \n", + "X O O \n", + "on move: O\n", + "X X \n", + "X O \n", + "X O O \n", + "Episode 50, Total Reward: 1\n", + "Average Reward: 0.42\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + " \n", + "X O O \n", + "on move: O\n", + " X X \n", + " \n", + "X O O \n", + "on move: X\n", + "O X X \n", + " \n", + "X O O \n", + "on move: O\n", + "O X X \n", + "X \n", + "X O O \n", + "on move: X\n", + "O X X \n", + "X O \n", + "X O O \n", + "Episode 51, Total Reward: -1\n", + "Average Reward: 0.39215686274509803\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O X \n", + " O X \n", + "on move: O\n", + "X \n", + "O X \n", + " O X \n", + "Episode 52, Total Reward: 1\n", + "Average Reward: 0.40384615384615385\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X X \n", + " \n", + " \n", + "on move: X\n", + "O X X \n", + "O \n", + " \n", + "on move: O\n", + "O X X \n", + "O \n", + " X \n", + "on move: X\n", + "O X X \n", + "O O \n", + " X \n", + "on move: O\n", + "O X X \n", + "O O X \n", + " X \n", + "on move: X\n", + "O X X \n", + "O O X \n", + "O X \n", + "Episode 53, Total Reward: -1\n", + "Average Reward: 0.37735849056603776\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O O \n", + " X \n", + "on move: O\n", + " X X \n", + "O O \n", + " X \n", + "on move: X\n", + "O X X \n", + "O O \n", + " X \n", + "on move: O\n", + "O X X \n", + "O O \n", + " X X \n", + "on move: X\n", + "O X X \n", + "O O \n", + "O X X \n", + "Episode 54, Total Reward: -1\n", + "Average Reward: 0.35185185185185186\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " O X \n", + "on move: O\n", + " \n", + " \n", + "X O X \n", + "on move: X\n", + "O \n", + " \n", + "X O X \n", + "on move: O\n", + "O X \n", + " \n", + "X O X \n", + "on move: X\n", + "O X \n", + " O \n", + "X O X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X O X \n", + "on move: X\n", + "O O X \n", + "X O \n", + "X O X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X O X \n", + "Episode 55, Total Reward: 1\n", + "Average Reward: 0.36363636363636365\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + " X X \n", + " O \n", + " O \n", + "on move: O\n", + " X X \n", + " O X \n", + " O \n", + "on move: X\n", + "O X X \n", + " O X \n", + " O \n", + "Episode 56, Total Reward: -1\n", + "Average Reward: 0.3392857142857143\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X X \n", + " O \n", + "on move: X\n", + " \n", + " X X \n", + " O O \n", + "on move: O\n", + "X \n", + " X X \n", + " O O \n", + "on move: X\n", + "X O \n", + " X X \n", + " O O \n", + "on move: O\n", + "X O X \n", + " X X \n", + " O O \n", + "on move: X\n", + "X O X \n", + " X X \n", + "O O O \n", + "Episode 57, Total Reward: -1\n", + "Average Reward: 0.3157894736842105\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + "O \n", + "X O \n", + "on move: O\n", + " X \n", + "O \n", + "X O X \n", + "on move: X\n", + "O X \n", + "O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "O \n", + "X O X \n", + "on move: X\n", + "O X X \n", + "O O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "O X O \n", + "X O X \n", + "Episode 58, Total Reward: 1\n", + "Average Reward: 0.3275862068965517\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O O X \n", + " X \n", + "on move: O\n", + " \n", + "O O X \n", + " X X \n", + "on move: X\n", + " O \n", + "O O X \n", + " X X \n", + "on move: O\n", + " X O \n", + "O O X \n", + " X X \n", + "on move: X\n", + "O X O \n", + "O O X \n", + " X X \n", + "on move: O\n", + "O X O \n", + "O O X \n", + "X X X \n", + "Episode 59, Total Reward: 1\n", + "Average Reward: 0.3389830508474576\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O X \n", + "X \n", + " \n", + "on move: X\n", + " O X \n", + "X O \n", + " \n", + "on move: O\n", + "X O X \n", + "X O \n", + " \n", + "on move: X\n", + "X O X \n", + "X O \n", + "O \n", + "on move: O\n", + "X O X \n", + "X O \n", + "O X \n", + "on move: X\n", + "X O X \n", + "X O O \n", + "O X \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "O X X \n", + "Episode 60, Total Reward: 0\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O X \n", + " \n", + "on move: X\n", + " X \n", + " O X \n", + " O \n", + "on move: O\n", + " X \n", + " O X \n", + "X O \n", + "on move: X\n", + " O X \n", + " O X \n", + "X O \n", + "on move: O\n", + "X O X \n", + " O X \n", + "X O \n", + "on move: X\n", + "X O X \n", + " O X \n", + "X O O \n", + "Episode 61, Total Reward: -1\n", + "Average Reward: 0.3114754098360656\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + " X X \n", + "on move: X\n", + " \n", + "O \n", + "O X X \n", + "on move: O\n", + " X \n", + "O \n", + "O X X \n", + "on move: X\n", + "O X \n", + "O \n", + "O X X \n", + "Episode 62, Total Reward: -1\n", + "Average Reward: 0.2903225806451613\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X \n", + "X O O \n", + " \n", + "on move: O\n", + "X \n", + "X O O \n", + "X \n", + "Episode 63, Total Reward: 1\n", + "Average Reward: 0.30158730158730157\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + " X X \n", + "on move: X\n", + " \n", + "O O \n", + " X X \n", + "on move: O\n", + "X \n", + "O O \n", + " X X \n", + "on move: X\n", + "X \n", + "O O O \n", + " X X \n", + "Episode 64, Total Reward: -1\n", + "Average Reward: 0.28125\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X \n", + "O O \n", + " X \n", + "on move: O\n", + "X \n", + "O O \n", + " X X \n", + "on move: X\n", + "X O \n", + "O O \n", + " X X \n", + "on move: O\n", + "X O \n", + "O X O \n", + " X X \n", + "Episode 65, Total Reward: 1\n", + "Average Reward: 0.2923076923076923\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + " X \n", + "X O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O \n", + "X X O \n", + "X O \n", + "on move: O\n", + " O X \n", + "X X O \n", + "X O \n", + "Episode 66, Total Reward: 1\n", + "Average Reward: 0.30303030303030304\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X X \n", + " \n", + "on move: X\n", + "O \n", + "X X \n", + " O \n", + "on move: O\n", + "O X \n", + "X X \n", + " O \n", + "on move: X\n", + "O X \n", + "X X \n", + "O O \n", + "on move: O\n", + "O X \n", + "X X X \n", + "O O \n", + "Episode 67, Total Reward: 1\n", + "Average Reward: 0.31343283582089554\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + "O X \n", + " X \n", + "on move: O\n", + "O \n", + "O X X \n", + " X \n", + "on move: X\n", + "O \n", + "O X X \n", + " O X \n", + "on move: O\n", + "O X \n", + "O X X \n", + " O X \n", + "on move: X\n", + "O X O \n", + "O X X \n", + " O X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 68, Total Reward: 0\n", + "Average Reward: 0.3088235294117647\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + " X \n", + "on move: X\n", + " \n", + " O X \n", + " X O \n", + "on move: O\n", + " X \n", + " O X \n", + " X O \n", + "on move: X\n", + " X O \n", + " O X \n", + " X O \n", + "on move: O\n", + " X O \n", + " O X \n", + "X X O \n", + "on move: X\n", + " X O \n", + "O O X \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 69, Total Reward: 0\n", + "Average Reward: 0.30434782608695654\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X \n", + "X O \n", + "O \n", + "on move: O\n", + "X X \n", + "X O \n", + "O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X X \n", + "X O \n", + "O O \n", + "Episode 70, Total Reward: 1\n", + "Average Reward: 0.3142857142857143\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X \n", + "X \n", + "on move: X\n", + "O O \n", + "X \n", + "X \n", + "on move: O\n", + "O O \n", + "X \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O \n", + "X X \n", + "on move: O\n", + "O O \n", + "X O \n", + "X X X \n", + "Episode 71, Total Reward: 1\n", + "Average Reward: 0.323943661971831\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " O X \n", + " X \n", + "on move: O\n", + " O \n", + " O X \n", + "X X \n", + "on move: X\n", + " O \n", + " O X \n", + "X O X \n", + "on move: O\n", + " O \n", + "X O X \n", + "X O X \n", + "on move: X\n", + " O O \n", + "X O X \n", + "X O X \n", + "Episode 72, Total Reward: -1\n", + "Average Reward: 0.3055555555555556\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X \n", + "X X \n", + "O \n", + "on move: X\n", + "O X \n", + "X O X \n", + "O \n", + "on move: O\n", + "O X \n", + "X O X \n", + "O X \n", + "Episode 73, Total Reward: 1\n", + "Average Reward: 0.3150684931506849\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + "O X \n", + "on move: O\n", + " X \n", + "X O \n", + "O X \n", + "on move: X\n", + " X \n", + "X O O \n", + "O X \n", + "on move: O\n", + " X X \n", + "X O O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 74, Total Reward: 0\n", + "Average Reward: 0.3108108108108108\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X O \n", + " \n", + " X \n", + "on move: O\n", + "O X O \n", + " \n", + "X X \n", + "on move: X\n", + "O X O \n", + " \n", + "X O X \n", + "on move: O\n", + "O X O \n", + " X \n", + "X O X \n", + "on move: X\n", + "O X O \n", + "O X \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 75, Total Reward: 0\n", + "Average Reward: 0.30666666666666664\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + "O X \n", + " O \n", + "on move: O\n", + " X \n", + "O X X \n", + " O \n", + "on move: X\n", + " X \n", + "O X X \n", + "O O \n", + "on move: O\n", + " X X \n", + "O X X \n", + "O O \n", + "on move: X\n", + " X X \n", + "O X X \n", + "O O O \n", + "Episode 76, Total Reward: -1\n", + "Average Reward: 0.2894736842105263\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X \n", + " O O \n", + "X \n", + "on move: O\n", + "X X \n", + " O O \n", + "X \n", + "on move: X\n", + "X X O \n", + " O O \n", + "X \n", + "on move: O\n", + "X X O \n", + " O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + " O O \n", + "X X O \n", + "Episode 77, Total Reward: -1\n", + "Average Reward: 0.2727272727272727\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " \n", + " X \n", + " X O \n", + "on move: X\n", + " \n", + "O X \n", + " X O \n", + "on move: O\n", + " X \n", + "O X \n", + " X O \n", + "on move: X\n", + "O X \n", + "O X \n", + " X O \n", + "on move: O\n", + "O X X \n", + "O X \n", + " X O \n", + "on move: X\n", + "O X X \n", + "O X \n", + "O X O \n", + "Episode 78, Total Reward: -1\n", + "Average Reward: 0.2564102564102564\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " \n", + "X X \n", + "on move: X\n", + "O \n", + " \n", + "X X O \n", + "on move: O\n", + "O \n", + "X \n", + "X X O \n", + "on move: X\n", + "O \n", + "X O \n", + "X X O \n", + "Episode 79, Total Reward: -1\n", + "Average Reward: 0.24050632911392406\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X X \n", + "O \n", + " \n", + "on move: X\n", + " X X \n", + "O \n", + "O \n", + "on move: O\n", + " X X \n", + "O X \n", + "O \n", + "on move: X\n", + " X X \n", + "O X O \n", + "O \n", + "on move: O\n", + " X X \n", + "O X O \n", + "O X \n", + "Episode 80, Total Reward: 1\n", + "Average Reward: 0.25\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + "X \n", + " \n", + "on move: X\n", + "X O \n", + "X \n", + " O \n", + "on move: O\n", + "X O \n", + "X \n", + "X O \n", + "Episode 81, Total Reward: 1\n", + "Average Reward: 0.25925925925925924\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " X \n", + " \n", + "O X \n", + "on move: X\n", + "O X \n", + " \n", + "O X \n", + "on move: O\n", + "O X \n", + " \n", + "O X X \n", + "on move: X\n", + "O X \n", + " O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + " O \n", + "O X X \n", + "on move: X\n", + "O X X \n", + " O O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 82, Total Reward: 0\n", + "Average Reward: 0.25609756097560976\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + " X X \n", + "on move: X\n", + " O O \n", + " \n", + " X X \n", + "on move: O\n", + " O O \n", + "X \n", + " X X \n", + "on move: X\n", + " O O \n", + "X O \n", + " X X \n", + "on move: O\n", + "X O O \n", + "X O \n", + " X X \n", + "on move: X\n", + "X O O \n", + "X O O \n", + " X X \n", + "on move: O\n", + "X O O \n", + "X O O \n", + "X X X \n", + "Episode 83, Total Reward: 1\n", + "Average Reward: 0.26506024096385544\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + "X \n", + " X \n", + "O \n", + "on move: X\n", + "X O \n", + " X \n", + "O \n", + "on move: O\n", + "X O \n", + " X X \n", + "O \n", + "on move: X\n", + "X O O \n", + " X X \n", + "O \n", + "on move: O\n", + "X O O \n", + " X X \n", + "O X \n", + "Episode 84, Total Reward: 1\n", + "Average Reward: 0.27380952380952384\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " X \n", + " X O \n", + "O \n", + "on move: O\n", + "X X \n", + " X O \n", + "O \n", + "on move: X\n", + "X X \n", + " X O \n", + "O O \n", + "on move: O\n", + "X X \n", + " X O \n", + "O X O \n", + "on move: X\n", + "X O X \n", + " X O \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 85, Total Reward: 0\n", + "Average Reward: 0.27058823529411763\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O O \n", + " X \n", + "on move: O\n", + " X \n", + " O O \n", + "X X \n", + "on move: X\n", + " X O \n", + " O O \n", + "X X \n", + "on move: O\n", + "X X O \n", + " O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + "O O O \n", + "X X \n", + "Episode 86, Total Reward: -1\n", + "Average Reward: 0.2558139534883721\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X \n", + "O O \n", + "X \n", + "on move: X\n", + "X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X X \n", + "O O \n", + "X O \n", + "Episode 87, Total Reward: 1\n", + "Average Reward: 0.26436781609195403\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X \n", + " O \n", + "X O \n", + "on move: O\n", + "X X \n", + " O \n", + "X O \n", + "on move: X\n", + "X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O O X \n", + "X O \n", + "on move: X\n", + "X X O \n", + "O O X \n", + "X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X O X \n", + "Episode 88, Total Reward: 0\n", + "Average Reward: 0.26136363636363635\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " \n", + " X \n", + "X O O \n", + "on move: O\n", + " X \n", + " X \n", + "X O O \n", + "on move: X\n", + "O X \n", + " X \n", + "X O O \n", + "on move: O\n", + "O X \n", + "X X \n", + "X O O \n", + "on move: X\n", + "O X \n", + "X X O \n", + "X O O \n", + "on move: O\n", + "O X X \n", + "X X O \n", + "X O O \n", + "Episode 89, Total Reward: 1\n", + "Average Reward: 0.2696629213483146\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + " X \n", + "O X \n", + "on move: X\n", + " O \n", + " X \n", + "O X \n", + "on move: O\n", + " O \n", + " X \n", + "O X X \n", + "on move: X\n", + " O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O \n", + "O X \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "O X X \n", + "Episode 90, Total Reward: 1\n", + "Average Reward: 0.2777777777777778\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + " O \n", + "X \n", + "on move: O\n", + "O X \n", + " O \n", + "X X \n", + "on move: X\n", + "O X \n", + " O O \n", + "X X \n", + "on move: O\n", + "O X X \n", + " O O \n", + "X X \n", + "on move: X\n", + "O X X \n", + " O O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "X O X \n", + "Episode 91, Total Reward: 0\n", + "Average Reward: 0.27472527472527475\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + " \n", + "O X \n", + "on move: O\n", + "O X \n", + " X \n", + "O X \n", + "on move: X\n", + "O X O \n", + " X \n", + "O X \n", + "on move: O\n", + "O X O \n", + " X X \n", + "O X \n", + "on move: X\n", + "O X O \n", + "O X X \n", + "O X \n", + "Episode 92, Total Reward: -1\n", + "Average Reward: 0.2608695652173913\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " \n", + "O O \n", + "X X \n", + "on move: O\n", + " X \n", + "O O \n", + "X X \n", + "on move: X\n", + " X O \n", + "O O \n", + "X X \n", + "on move: O\n", + " X O \n", + "O X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O X O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O X O \n", + "X X X \n", + "Episode 93, Total Reward: 1\n", + "Average Reward: 0.26881720430107525\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X \n", + "O \n", + "O \n", + "on move: O\n", + "X X \n", + "O \n", + "O X \n", + "on move: X\n", + "X X O \n", + "O \n", + "O X \n", + "on move: O\n", + "X X O \n", + "O X \n", + "O X \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 94, Total Reward: 1\n", + "Average Reward: 0.2765957446808511\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " \n", + " X O \n", + "on move: O\n", + "X O \n", + " X \n", + " X O \n", + "on move: X\n", + "X O \n", + " X \n", + "O X O \n", + "on move: O\n", + "X O \n", + " X X \n", + "O X O \n", + "on move: X\n", + "X O O \n", + " X X \n", + "O X O \n", + "on move: O\n", + "X O O \n", + "X X X \n", + "O X O \n", + "Episode 95, Total Reward: 1\n", + "Average Reward: 0.28421052631578947\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " X \n", + "X \n", + "on move: X\n", + "O \n", + " X \n", + "X O \n", + "on move: O\n", + "O X \n", + " X \n", + "X O \n", + "Episode 96, Total Reward: 1\n", + "Average Reward: 0.2916666666666667\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + "X \n", + "on move: X\n", + "O \n", + " O X \n", + "X \n", + "on move: O\n", + "O \n", + "X O X \n", + "X \n", + "on move: X\n", + "O \n", + "X O X \n", + "X O \n", + "Episode 97, Total Reward: -1\n", + "Average Reward: 0.27835051546391754\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X \n", + "X \n", + "on move: X\n", + " O \n", + "X \n", + "X O \n", + "on move: O\n", + " O X \n", + "X \n", + "X O \n", + "on move: X\n", + "O O X \n", + "X \n", + "X O \n", + "on move: O\n", + "O O X \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O X \n", + "X O X \n", + "X O \n", + "Episode 98, Total Reward: -1\n", + "Average Reward: 0.2653061224489796\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " X \n", + "X \n", + "on move: X\n", + "O O \n", + " X \n", + "X \n", + "on move: O\n", + "O X O \n", + " X \n", + "X \n", + "on move: X\n", + "O X O \n", + " O X \n", + "X \n", + "on move: O\n", + "O X O \n", + " O X \n", + "X X \n", + "on move: X\n", + "O X O \n", + " O X \n", + "X X O \n", + "Episode 99, Total Reward: -1\n", + "Average Reward: 0.25252525252525254\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " X O \n", + "X O \n", + " X \n", + "on move: X\n", + " X O \n", + "X O \n", + "O X \n", + "on move: O\n", + " X O \n", + "X O \n", + "O X X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "O X X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + "O X X \n", + "Episode 100, Total Reward: 1\n", + "Average Reward: 0.26\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O \n", + "X X O \n", + " X \n", + "on move: X\n", + " O O \n", + "X X O \n", + " X \n", + "on move: O\n", + " O O \n", + "X X O \n", + " X X \n", + "on move: X\n", + "O O O \n", + "X X O \n", + " X X \n", + "Episode 101, Total Reward: -1\n", + "Average Reward: 0.24752475247524752\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " \n", + "O \n", + "X X O \n", + "on move: O\n", + "X \n", + "O \n", + "X X O \n", + "on move: X\n", + "X \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X X \n", + "O O \n", + "X X O \n", + "on move: X\n", + "X O X \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 102, Total Reward: 0\n", + "Average Reward: 0.24509803921568626\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X \n", + " O O \n", + " X \n", + "on move: O\n", + "X \n", + " O O \n", + "X X \n", + "on move: X\n", + "X O \n", + " O O \n", + "X X \n", + "on move: O\n", + "X O X \n", + " O O \n", + "X X \n", + "on move: X\n", + "X O X \n", + " O O \n", + "X O X \n", + "Episode 103, Total Reward: -1\n", + "Average Reward: 0.23300970873786409\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X \n", + "O O X \n", + " \n", + "on move: X\n", + "X X \n", + "O O X \n", + " O \n", + "on move: O\n", + "X X \n", + "O O X \n", + " X O \n", + "on move: X\n", + "X X \n", + "O O X \n", + "O X O \n", + "on move: O\n", + "X X X \n", + "O O X \n", + "O X O \n", + "Episode 104, Total Reward: 1\n", + "Average Reward: 0.2403846153846154\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X X \n", + " O \n", + "on move: X\n", + " \n", + " X X \n", + " O O \n", + "on move: O\n", + "X \n", + " X X \n", + " O O \n", + "on move: X\n", + "X O \n", + " X X \n", + " O O \n", + "on move: O\n", + "X O \n", + " X X \n", + "X O O \n", + "on move: X\n", + "X O \n", + "O X X \n", + "X O O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "X O O \n", + "Episode 105, Total Reward: 0\n", + "Average Reward: 0.23809523809523808\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + "X X \n", + " X \n", + "O O \n", + "on move: X\n", + "X X O \n", + " X \n", + "O O \n", + "on move: O\n", + "X X O \n", + " X \n", + "O X O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 106, Total Reward: 1\n", + "Average Reward: 0.24528301886792453\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X O X \n", + " O \n", + " \n", + "on move: O\n", + "X O X \n", + " X O \n", + " \n", + "on move: X\n", + "X O X \n", + " X O \n", + "O \n", + "on move: O\n", + "X O X \n", + " X O \n", + "O X \n", + "on move: X\n", + "X O X \n", + "O X O \n", + "O X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "O X X \n", + "Episode 107, Total Reward: 1\n", + "Average Reward: 0.2523364485981308\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " X O \n", + " \n", + "X \n", + "on move: X\n", + " X O \n", + " \n", + "X O \n", + "on move: O\n", + " X O \n", + " X \n", + "X O \n", + "on move: X\n", + " X O \n", + " X O \n", + "X O \n", + "Episode 108, Total Reward: -1\n", + "Average Reward: 0.24074074074074073\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X \n", + " X O \n", + "on move: O\n", + " O \n", + "X X \n", + " X O \n", + "on move: X\n", + " O \n", + "X X O \n", + " X O \n", + "on move: O\n", + " O X \n", + "X X O \n", + " X O \n", + "on move: X\n", + "O O X \n", + "X X O \n", + " X O \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X X O \n", + "Episode 109, Total Reward: 1\n", + "Average Reward: 0.24770642201834864\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " \n", + "O O \n", + "X X \n", + "on move: O\n", + " X \n", + "O O \n", + "X X \n", + "on move: X\n", + "O X \n", + "O O \n", + "X X \n", + "on move: O\n", + "O X \n", + "O X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O X O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O X O \n", + "X X X \n", + "Episode 110, Total Reward: 1\n", + "Average Reward: 0.2545454545454545\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O O X \n", + " X \n", + " \n", + "on move: O\n", + "O O X \n", + " X \n", + " X \n", + "on move: X\n", + "O O X \n", + " X \n", + " O X \n", + "on move: O\n", + "O O X \n", + "X X \n", + " O X \n", + "on move: X\n", + "O O X \n", + "X X \n", + "O O X \n", + "on move: O\n", + "O O X \n", + "X X X \n", + "O O X \n", + "Episode 111, Total Reward: 1\n", + "Average Reward: 0.26126126126126126\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + "X \n", + "on move: X\n", + "O \n", + " X \n", + "X O \n", + "on move: O\n", + "O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O \n", + "X X \n", + "X O \n", + "on move: O\n", + "O O \n", + "X X X \n", + "X O \n", + "Episode 112, Total Reward: 1\n", + "Average Reward: 0.26785714285714285\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X O \n", + " \n", + " \n", + "on move: O\n", + " X O \n", + " \n", + " X \n", + "on move: X\n", + " X O \n", + "O \n", + " X \n", + "on move: O\n", + " X O \n", + "O X \n", + " X \n", + "on move: X\n", + "O X O \n", + "O X \n", + " X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + " X \n", + "Episode 113, Total Reward: 1\n", + "Average Reward: 0.2743362831858407\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X X \n", + "X O \n", + " O \n", + "on move: X\n", + " X X \n", + "X O O \n", + " O \n", + "on move: O\n", + "X X X \n", + "X O O \n", + " O \n", + "Episode 114, Total Reward: 1\n", + "Average Reward: 0.2807017543859649\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + "X O \n", + "O X \n", + " X \n", + "Episode 115, Total Reward: 1\n", + "Average Reward: 0.28695652173913044\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + "X \n", + " \n", + "O X \n", + "on move: X\n", + "X O \n", + " \n", + "O X \n", + "on move: O\n", + "X O \n", + " X \n", + "O X \n", + "on move: X\n", + "X O O \n", + " X \n", + "O X \n", + "on move: O\n", + "X O O \n", + " X \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "O X X \n", + "Episode 116, Total Reward: 1\n", + "Average Reward: 0.29310344827586204\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + " O O \n", + "on move: O\n", + " X X \n", + " X \n", + " O O \n", + "on move: X\n", + " X X \n", + "O X \n", + " O O \n", + "on move: O\n", + " X X \n", + "O X X \n", + " O O \n", + "on move: X\n", + " X X \n", + "O X X \n", + "O O O \n", + "Episode 117, Total Reward: -1\n", + "Average Reward: 0.28205128205128205\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + "X \n", + " \n", + "on move: X\n", + "O X \n", + "X O \n", + " \n", + "on move: O\n", + "O X \n", + "X O \n", + " X \n", + "on move: X\n", + "O X \n", + "X O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 118, Total Reward: 0\n", + "Average Reward: 0.2796610169491525\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + "O X X \n", + " O \n", + " \n", + "on move: O\n", + "O X X \n", + " O \n", + " X \n", + "on move: X\n", + "O X X \n", + " O O \n", + " X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + " X \n", + "on move: X\n", + "O X X \n", + "X O O \n", + " X O \n", + "Episode 119, Total Reward: -1\n", + "Average Reward: 0.2689075630252101\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + " X \n", + "X O \n", + "on move: O\n", + " X O \n", + " X \n", + "X O \n", + "on move: X\n", + " X O \n", + " X O \n", + "X O \n", + "on move: O\n", + "X X O \n", + " X O \n", + "X O \n", + "on move: X\n", + "X X O \n", + "O X O \n", + "X O \n", + "on move: O\n", + "X X O \n", + "O X O \n", + "X O X \n", + "Episode 120, Total Reward: 1\n", + "Average Reward: 0.275\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + " X X \n", + " X \n", + "on move: X\n", + "O O \n", + "O X X \n", + " X \n", + "on move: O\n", + "O O \n", + "O X X \n", + " X X \n", + "on move: X\n", + "O O \n", + "O X X \n", + "O X X \n", + "Episode 121, Total Reward: -1\n", + "Average Reward: 0.2644628099173554\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + " \n", + "X O \n", + "on move: O\n", + "O X \n", + " X \n", + "X O \n", + "on move: X\n", + "O X \n", + "O X \n", + "X O \n", + "on move: O\n", + "O X X \n", + "O X \n", + "X O \n", + "on move: X\n", + "O X X \n", + "O O X \n", + "X O \n", + "Episode 122, Total Reward: -1\n", + "Average Reward: 0.2540983606557377\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X \n", + "O X \n", + " O \n", + "on move: X\n", + "X X \n", + "O O X \n", + " O \n", + "on move: O\n", + "X X \n", + "O O X \n", + " X O \n", + "on move: X\n", + "X X \n", + "O O X \n", + "O X O \n", + "on move: O\n", + "X X X \n", + "O O X \n", + "O X O \n", + "Episode 123, Total Reward: 1\n", + "Average Reward: 0.2601626016260163\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + "O O \n", + " X \n", + "X \n", + "on move: O\n", + "O O \n", + " X X \n", + "X \n", + "on move: X\n", + "O O \n", + "O X X \n", + "X \n", + "on move: O\n", + "O O \n", + "O X X \n", + "X X \n", + "on move: X\n", + "O O \n", + "O X X \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 124, Total Reward: 0\n", + "Average Reward: 0.25806451612903225\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + " O \n", + "on move: O\n", + "O X \n", + " X \n", + "X O \n", + "on move: X\n", + "O X \n", + " X \n", + "X O O \n", + "on move: O\n", + "O X X \n", + " X \n", + "X O O \n", + "Episode 125, Total Reward: 1\n", + "Average Reward: 0.264\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X \n", + "X X \n", + "O \n", + "on move: X\n", + "O O X \n", + "X X \n", + "O \n", + "on move: O\n", + "O O X \n", + "X X \n", + "O X \n", + "Episode 126, Total Reward: 1\n", + "Average Reward: 0.2698412698412698\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X \n", + "O \n", + " X O \n", + "on move: X\n", + "X X O \n", + "O \n", + " X O \n", + "on move: O\n", + "X X O \n", + "O \n", + "X X O \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 127, Total Reward: 0\n", + "Average Reward: 0.2677165354330709\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X \n", + " \n", + "O X \n", + "on move: X\n", + "X \n", + " O \n", + "O X \n", + "on move: O\n", + "X X \n", + " O \n", + "O X \n", + "on move: X\n", + "X X \n", + "O O \n", + "O X \n", + "on move: O\n", + "X X \n", + "O O X \n", + "O X \n", + "on move: X\n", + "X X O \n", + "O O X \n", + "O X \n", + "Episode 128, Total Reward: -1\n", + "Average Reward: 0.2578125\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " X O \n", + "X \n", + " \n", + "on move: X\n", + " X O \n", + "X \n", + " O \n", + "on move: O\n", + " X O \n", + "X X \n", + " O \n", + "on move: X\n", + " X O \n", + "X X \n", + " O O \n", + "on move: O\n", + " X O \n", + "X X \n", + "X O O \n", + "on move: X\n", + "O X O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "X O O \n", + "Episode 129, Total Reward: 1\n", + "Average Reward: 0.26356589147286824\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O \n", + " X O \n", + " X X \n", + "on move: X\n", + "O O \n", + " X O \n", + " X X \n", + "on move: O\n", + "O O X \n", + " X O \n", + " X X \n", + "on move: X\n", + "O O X \n", + " X O \n", + "O X X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "O X X \n", + "Episode 130, Total Reward: 0\n", + "Average Reward: 0.26153846153846155\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + " \n", + " O X \n", + "on move: O\n", + "O X \n", + " X \n", + " O X \n", + "on move: X\n", + "O X O \n", + " X \n", + " O X \n", + "on move: O\n", + "O X O \n", + " X \n", + "X O X \n", + "on move: X\n", + "O X O \n", + "O X \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 131, Total Reward: 0\n", + "Average Reward: 0.2595419847328244\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + "O X \n", + " O \n", + "on move: O\n", + " X \n", + "O X \n", + "X O \n", + "on move: X\n", + "O X \n", + "O X \n", + "X O \n", + "on move: O\n", + "O X \n", + "O X X \n", + "X O \n", + "on move: X\n", + "O X \n", + "O X X \n", + "X O O \n", + "on move: O\n", + "O X X \n", + "O X X \n", + "X O O \n", + "Episode 132, Total Reward: 1\n", + "Average Reward: 0.26515151515151514\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + "O O \n", + "X \n", + " X \n", + "on move: O\n", + "O O \n", + "X X \n", + " X \n", + "on move: X\n", + "O O \n", + "X X O \n", + " X \n", + "on move: O\n", + "O O \n", + "X X O \n", + "X X \n", + "on move: X\n", + "O O O \n", + "X X O \n", + "X X \n", + "Episode 133, Total Reward: -1\n", + "Average Reward: 0.2556390977443609\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + " O \n", + "X X \n", + " O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O O \n", + "X X \n", + "X O \n", + "on move: O\n", + " O O \n", + "X X X \n", + "X O \n", + "Episode 134, Total Reward: 1\n", + "Average Reward: 0.26119402985074625\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + " O \n", + "X \n", + "on move: O\n", + "O X X \n", + " O \n", + "X \n", + "on move: X\n", + "O X X \n", + " O \n", + "X O \n", + "on move: O\n", + "O X X \n", + " X O \n", + "X O \n", + "Episode 135, Total Reward: 1\n", + "Average Reward: 0.26666666666666666\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O O \n", + " \n", + "on move: O\n", + " X \n", + "X O O \n", + "X \n", + "on move: X\n", + " X \n", + "X O O \n", + "X O \n", + "on move: O\n", + " X \n", + "X O O \n", + "X O X \n", + "on move: X\n", + " O X \n", + "X O O \n", + "X O X \n", + "Episode 136, Total Reward: -1\n", + "Average Reward: 0.25735294117647056\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + " O X \n", + " X \n", + "on move: O\n", + "O \n", + " O X \n", + "X X \n", + "on move: X\n", + "O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "O X \n", + "O O X \n", + "X X \n", + "Episode 137, Total Reward: 1\n", + "Average Reward: 0.26277372262773724\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X \n", + " X O \n", + "on move: O\n", + "X O \n", + "X \n", + " X O \n", + "on move: X\n", + "X O \n", + "X O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "X O \n", + " X O \n", + "on move: X\n", + "X O X \n", + "X O \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 138, Total Reward: 0\n", + "Average Reward: 0.2608695652173913\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O X \n", + " O \n", + "on move: X\n", + "O X \n", + "X O X \n", + " O \n", + "on move: O\n", + "O X \n", + "X O X \n", + "X O \n", + "on move: X\n", + "O X \n", + "X O X \n", + "X O O \n", + "Episode 139, Total Reward: -1\n", + "Average Reward: 0.2517985611510791\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " \n", + " X X \n", + "on move: X\n", + "O O \n", + " \n", + " X X \n", + "on move: O\n", + "O O \n", + " X \n", + " X X \n", + "on move: X\n", + "O O \n", + "O X \n", + " X X \n", + "on move: O\n", + "O O \n", + "O X X \n", + " X X \n", + "on move: X\n", + "O O O \n", + "O X X \n", + " X X \n", + "Episode 140, Total Reward: -1\n", + "Average Reward: 0.24285714285714285\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O \n", + " O X \n", + "on move: O\n", + " X \n", + "O X \n", + " O X \n", + "Episode 141, Total Reward: 1\n", + "Average Reward: 0.24822695035460993\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X X \n", + " O \n", + "on move: X\n", + " O \n", + " X X \n", + " O \n", + "on move: O\n", + "X O \n", + " X X \n", + " O \n", + "on move: X\n", + "X O \n", + " X X \n", + " O O \n", + "on move: O\n", + "X O \n", + "X X X \n", + " O O \n", + "Episode 142, Total Reward: 1\n", + "Average Reward: 0.2535211267605634\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " O \n", + "X \n", + "X O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O \n", + "X X \n", + "X O O \n", + "on move: O\n", + " O \n", + "X X X \n", + "X O O \n", + "Episode 143, Total Reward: 1\n", + "Average Reward: 0.25874125874125875\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + "O \n", + "X \n", + "X O \n", + "on move: O\n", + "O X \n", + "X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "X \n", + "X O \n", + "on move: O\n", + "O X O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "X O O \n", + "Episode 144, Total Reward: 1\n", + "Average Reward: 0.2638888888888889\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X O \n", + " \n", + " O \n", + "on move: O\n", + "X X O \n", + " \n", + " X O \n", + "on move: X\n", + "X X O \n", + "O \n", + " X O \n", + "on move: O\n", + "X X O \n", + "O \n", + "X X O \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 145, Total Reward: 0\n", + "Average Reward: 0.2620689655172414\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O \n", + "X X \n", + " O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O \n", + "X O X \n", + "X O \n", + "Episode 146, Total Reward: -1\n", + "Average Reward: 0.2534246575342466\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " O \n", + "X \n", + "X O \n", + "on move: O\n", + " X O \n", + "X \n", + "X O \n", + "on move: X\n", + " X O \n", + "X O \n", + "X O \n", + "Episode 147, Total Reward: -1\n", + "Average Reward: 0.24489795918367346\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X \n", + " O \n", + "O \n", + "on move: O\n", + "X X X \n", + " O \n", + "O \n", + "Episode 148, Total Reward: 1\n", + "Average Reward: 0.25\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O O \n", + " \n", + " X \n", + "on move: O\n", + "X O O \n", + " \n", + " X X \n", + "on move: X\n", + "X O O \n", + " O \n", + " X X \n", + "on move: O\n", + "X O O \n", + "X O \n", + " X X \n", + "on move: X\n", + "X O O \n", + "X O O \n", + " X X \n", + "on move: O\n", + "X O O \n", + "X O O \n", + "X X X \n", + "Episode 149, Total Reward: 1\n", + "Average Reward: 0.2550335570469799\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O O X \n", + " X \n", + "on move: O\n", + " X \n", + "O O X \n", + " X \n", + "on move: X\n", + " X \n", + "O O X \n", + " O X \n", + "on move: O\n", + " X X \n", + "O O X \n", + " O X \n", + "Episode 150, Total Reward: 1\n", + "Average Reward: 0.26\n" + ] + } + ], + "source": [ + "# Основной цикл обучения (работа с отдельным классом агента)\n", + "\n", + "# Создание среды для игры в крестики-нолики\n", + "environment = TicTacToeEnv()\n", + "\n", + "# Создание агента (играющего крестиками)\n", + "agent = Agent(symbol=1)\n", + "\n", + "num_episodes = 150 # Количество эпизодов (игр) для обучения\n", + "collected_rewards = [] # Список для хранения наград/побед в каждом эпизоде \n", + "\n", + "# Переменная для отслеживания символа и текущего игрока\n", + "oom = 1\n", + "\n", + "for i in range(num_episodes):\n", + " # Сброс среды и начало нового эпизода\n", + " state, _ = environment.reset() \n", + "\n", + " # Общая награда за эпизод\n", + " total_reward = 0\n", + "\n", + " # Флаг завершения игры\n", + " done = False\n", + " om = oom \n", + "\n", + " # Максимум 9 ходов, поскольку поле 3x3 \n", + " for j in range(9): \n", + " moves = environment.move_generator() \n", + "\n", + " # Ходов нет, заканчиваем игру\n", + " if not moves:\n", + " break\n", + "\n", + " \n", + " if len(moves) == 1:\n", + " move = moves[0] # Если остался один ход на основе стратегии\n", + " else:\n", + " move = agent.get_action(moves) # Агент выбирает ход на основе стратегии\n", + "\n", + " # Выполнение хода и обновление состояния игры\n", + " next_state, reward, done, info = environment.step(move)\n", + " total_reward += reward\n", + " state = next_state\n", + "\n", + " # Отображаем текущее состояние игры\n", + " environment.render()\n", + "\n", + " if done:\n", + " break\n", + "\n", + " om = -om # Смена игрока\n", + "\n", + " collected_rewards.append(total_reward)\n", + "\n", + " print(f\"Episode {i+1}, Total Reward: {total_reward}\")\n", + " average_reward = sum(collected_rewards) / len(collected_rewards)\n", + " print(f\"Average Reward: {average_reward}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mai", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}