{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Лабораторная работа 6\n", "\n", "**Крестики-нолики**\thttps://github.com/nczempin/gym-tic-tac-toe/tree/master\n", "\n", "#### **Перевод среды на Gymnasium**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import gymnasium as gym\n", "from gymnasium import spaces\n", "\n", "class TicTacToeEnv(gym.Env):\n", " metadata = {'render.modes': ['human']}\n", " \n", " symbols = ['O', ' ', 'X']\n", "\n", " def __init__(self):\n", " super().__init__()\n", " self.action_space = spaces.Discrete(9)\n", " self.observation_space = spaces.Discrete(9 * 3 * 2)\n", " self.reset()\n", "\n", " def step(self, action):\n", " done = False\n", " reward = 0\n", "\n", " p, square = action # p - игрок (1 или -1), square - номер клетки\n", "\n", " board = self.state['board']\n", " proposed = board[square] \n", " om = self.state['on_move'] \n", " if proposed != 0: # Клетка уже занята\n", " print(f\"Незаконный ход: Квадрат {square} уже занят.\")\n", " done = True\n", " reward = -1 * om \n", " if p != om: # Не тот игрок на ходу\n", " print(f\"Незаконный ход: игрок {p} не находится в движении\")\n", " done = True\n", " reward = -1 * om\n", " else:\n", " board[square] = p\n", " self.state['on_move'] = -p\n", "\n", " for i in range(3):\n", " # Горизонтали и вертикали\n", " if (board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) or \\\n", " (board[i] == p and board[i + 3] == p and board[i + 6] == p):\n", " reward = p\n", " done = True\n", " break\n", "\n", " # Диагонали\n", " if (board[0] == p and board[4] == p and board[8] == p) or \\\n", " (board[2] == p and board[4] == p and board[6] == p):\n", " reward = p\n", " done = True\n", " \n", " return self.state, reward, done, {}\n", "\n", " def reset(self):\n", " self.state = {}\n", " self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] \n", " self.state['on_move'] = 1 \n", " return self.state, {}\n", "\n", " def render(self, close=False):\n", " if close:\n", " return\n", " print(\"on move: \" , self.symbols[self.state['on_move']+1])\n", " for i in range (9):\n", " print (self.symbols[self.state['board'][i]+1], end=\" \");\n", " if ((i % 3) == 2):\n", " print();\n", "\n", " def move_generator(self):\n", " moves = []\n", " for i in range(9):\n", " if self.state['board'][i] == 0:\n", " p = self.state['on_move']\n", " m = [p, i]\n", " moves.append(m)\n", " return moves" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### **Реализация агента**" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import random\n", "\n", "class RandomTicTacToeAgent:\n", " def __init__(self, symbol):\n", " self.symbol = symbol # Символ игрока (1 - X, -1 - O\n", " \n", " def get_action(self, moves):\n", " return random.choice(moves) # Выбираем случайный ход из доступных\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### **Основной цикл обучения**" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", "X \n", " \n", " X O \n", "on move: X\n", "X O \n", " \n", " X O \n", "on move: O\n", "X X O \n", " \n", " X O \n", "on move: X\n", "X X O \n", " \n", "O X O \n", "on move: O\n", "X X O \n", " X \n", "O X O \n", "on move: X\n", "X X O \n", "O X \n", "O X O \n", "on move: O\n", "X X O \n", "O X X \n", "O X O \n", "Episode 1, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", " O X \n", "on move: X\n", " \n", "O X \n", " O X \n", "on move: O\n", " \n", "O X \n", "X O X \n", "on move: X\n", " O \n", "O X \n", "X O X \n", "on move: O\n", " O \n", "O X X \n", "X O X \n", "on move: X\n", " O O \n", "O X X \n", "X O X \n", "on move: O\n", "X O O \n", "O X X \n", "X O X \n", "Episode 2, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " \n", "O X \n", "O X \n", "on move: O\n", " X \n", "O X \n", "O X \n", "Episode 3, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", " O X \n", "on move: X\n", " X \n", " \n", "O O X \n", "on move: O\n", " X \n", "X \n", "O O X \n", "on move: X\n", " X O \n", "X \n", "O O X \n", "on move: O\n", " X O \n", "X X \n", "O O X \n", "on move: X\n", " X O \n", "X X O \n", "O O X \n", "on move: O\n", "X X O \n", "X X O \n", "O O X \n", "Episode 4, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X X \n", " O \n", " \n", "on move: X\n", " X X \n", " O O \n", " \n", "on move: O\n", " X X \n", "X O O \n", " \n", "on move: X\n", " X X \n", "X O O \n", " O \n", "on move: O\n", " X X \n", "X O O \n", " O X \n", "on move: X\n", " X X \n", "X O O \n", "O O X \n", "on move: O\n", "X X X \n", "X O O \n", "O O X \n", "Episode 5, Total Reward: 1\n", "Average Reward: 1.0\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O O \n", "X X \n", " \n", "on move: O\n", " O O \n", "X X \n", " X \n", "on move: X\n", " O O \n", "X X \n", " X O \n", "on move: O\n", "X O O \n", "X X \n", " X O \n", "on move: X\n", "X O O \n", "X X O \n", " X O \n", "Episode 6, Total Reward: -1\n", "Average Reward: 0.6666666666666666\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O O \n", " X \n", " X \n", "on move: O\n", " O O \n", " X \n", " X X \n", "on move: X\n", " O O \n", " O X \n", " X X \n", "on move: O\n", "X O O \n", " O X \n", " X X \n", "on move: X\n", "X O O \n", "O O X \n", " X X \n", "on move: O\n", "X O O \n", "O O X \n", "X X X \n", "Episode 7, Total Reward: 1\n", "Average Reward: 0.7142857142857143\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O X \n", " \n", "on move: X\n", " X \n", "O O X \n", " \n", "on move: O\n", " X \n", "O O X \n", " X \n", "on move: X\n", "O X \n", "O O X \n", " X \n", "on move: O\n", "O X \n", "O O X \n", "X X \n", "on move: X\n", "O X O \n", "O O X \n", "X X \n", "on move: O\n", "O X O \n", "O O X \n", "X X X \n", "Episode 8, Total Reward: 1\n", "Average Reward: 0.75\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O \n", "X X \n", " O \n", "on move: O\n", "X O \n", "X X \n", " O \n", "on move: X\n", "X O \n", "X X \n", " O O \n", "on move: O\n", "X O \n", "X X X \n", " O O \n", "Episode 9, Total Reward: 1\n", "Average Reward: 0.7777777777777778\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X X \n", " O \n", " O \n", "Episode 10, Total Reward: 1\n", "Average Reward: 0.8\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", " \n", "O X O \n", " X \n", "on move: O\n", "X \n", "O X O \n", " X \n", "on move: X\n", "X O \n", "O X O \n", " X \n", "on move: O\n", "X O \n", "O X O \n", "X X \n", "on move: X\n", "X O O \n", "O X O \n", "X X \n", "on move: O\n", "X O O \n", "O X O \n", "X X X \n", "Episode 11, Total Reward: 1\n", "Average Reward: 0.8181818181818182\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", "X \n", " O X \n", " \n", "on move: X\n", "X \n", " O X \n", " O \n", "on move: O\n", "X \n", " O X \n", " O X \n", "on move: X\n", "X \n", " O X \n", "O O X \n", "on move: O\n", "X \n", "X O X \n", "O O X \n", "on move: X\n", "X O \n", "X O X \n", "O O X \n", "Episode 12, Total Reward: -1\n", "Average Reward: 0.6666666666666666\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O X \n", " \n", "X \n", "on move: X\n", "O X \n", " \n", "X O \n", "on move: O\n", "O X \n", " X \n", "X O \n", "on move: X\n", "O O X \n", " X \n", "X O \n", "on move: O\n", "O O X \n", " X \n", "X X O \n", "on move: X\n", "O O X \n", "O X \n", "X X O \n", "on move: O\n", "O O X \n", "O X X \n", "X X O \n", "Episode 13, Total Reward: 1\n", "Average Reward: 0.6923076923076923\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", "X X \n", " X \n", "O O \n", "on move: X\n", "X O X \n", " X \n", "O O \n", "on move: O\n", "X O X \n", " X \n", "O O X \n", "Episode 14, Total Reward: 1\n", "Average Reward: 0.7142857142857143\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " \n", "O O \n", "on move: O\n", "X X \n", " \n", "O X O \n", "on move: X\n", "X X \n", " O \n", "O X O \n", "on move: O\n", "X X X \n", " O \n", "O X O \n", "Episode 15, Total Reward: 1\n", "Average Reward: 0.7333333333333333\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", "O O \n", " \n", "on move: O\n", "X X \n", "O O \n", " X \n", "on move: X\n", "X O X \n", "O O \n", " X \n", "on move: O\n", "X O X \n", "O O X \n", " X \n", "on move: X\n", "X O X \n", "O O X \n", "O X \n", "on move: O\n", "X O X \n", "O O X \n", "O X X \n", "Episode 16, Total Reward: 1\n", "Average Reward: 0.75\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X \n", "O \n", " X \n", "on move: X\n", "X \n", "O \n", " O X \n", "on move: O\n", "X X \n", "O \n", " O X \n", "on move: X\n", "X X \n", "O O \n", " O X \n", "on move: O\n", "X X X \n", "O O \n", " O X \n", "Episode 17, Total Reward: 1\n", "Average Reward: 0.7647058823529411\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X X \n", " \n", " \n", "on move: X\n", "O X X \n", "O \n", " \n", "on move: O\n", "O X X \n", "O \n", " X \n", "on move: X\n", "O X X \n", "O O \n", " X \n", "on move: O\n", "O X X \n", "O O \n", " X X \n", "on move: X\n", "O X X \n", "O O O \n", " X X \n", "Episode 18, Total Reward: -1\n", "Average Reward: 0.6666666666666666\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X O \n", "X X \n", "on move: X\n", " O \n", "X O \n", "X O X \n", "on move: O\n", " X O \n", "X O \n", "X O X \n", "on move: X\n", "O X O \n", "X O \n", "X O X \n", "on move: O\n", "O X O \n", "X O X \n", "X O X \n", "Episode 19, Total Reward: 0\n", "Average Reward: 0.631578947368421\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", "X X \n", " X \n", "on move: X\n", "O O \n", "X X O \n", " X \n", "on move: O\n", "O O \n", "X X O \n", "X X \n", "on move: X\n", "O O O \n", "X X O \n", "X X \n", "Episode 20, Total Reward: -1\n", "Average Reward: 0.55\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", "X \n", " O \n", "X \n", "on move: X\n", "X \n", " O O \n", "X \n", "on move: O\n", "X \n", " O O \n", "X X \n", "on move: X\n", "X \n", " O O \n", "X X O \n", "on move: O\n", "X X \n", " O O \n", "X X O \n", "on move: X\n", "X X \n", "O O O \n", "X X O \n", "Episode 21, Total Reward: -1\n", "Average Reward: 0.47619047619047616\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", "O O \n", "X X \n", " \n", "on move: O\n", "O X O \n", "X X \n", " \n", "on move: X\n", "O X O \n", "X X \n", " O \n", "on move: O\n", "O X O \n", "X X \n", "X O \n", "on move: X\n", "O X O \n", "X O X \n", "X O \n", "Episode 22, Total Reward: -1\n", "Average Reward: 0.4090909090909091\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X O \n", "X O \n", " \n", "on move: O\n", "X O X \n", "X O \n", " \n", "on move: X\n", "X O X \n", "X O \n", " O \n", "on move: O\n", "X O X \n", "X O \n", "X O \n", "Episode 23, Total Reward: 1\n", "Average Reward: 0.43478260869565216\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", "X X \n", "O \n", "on move: X\n", " \n", "X O X \n", "O \n", "on move: O\n", " X \n", "X O X \n", "O \n", "on move: X\n", "O X \n", "X O X \n", "O \n", "on move: O\n", "O X X \n", "X O X \n", "O \n", "on move: X\n", "O X X \n", "X O X \n", "O O \n", "Episode 24, Total Reward: -1\n", "Average Reward: 0.375\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X \n", " O X \n", "on move: O\n", "X O \n", " X \n", " O X \n", "Episode 25, Total Reward: 1\n", "Average Reward: 0.4\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", "O X \n", " \n", "on move: O\n", "O X \n", "O X \n", "X \n", "on move: X\n", "O X \n", "O X \n", "X O \n", "on move: O\n", "O X X \n", "O X \n", "X O \n", "on move: X\n", "O X X \n", "O O X \n", "X O \n", "Episode 26, Total Reward: -1\n", "Average Reward: 0.34615384615384615\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", " X \n", "X X \n", "on move: X\n", "O O \n", " X \n", "X O X \n", "on move: O\n", "O O \n", " X X \n", "X O X \n", "on move: X\n", "O O \n", "O X X \n", "X O X \n", "on move: O\n", "O O X \n", "O X X \n", "X O X \n", "Episode 27, Total Reward: 1\n", "Average Reward: 0.37037037037037035\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O X \n", " \n", "on move: X\n", " \n", "X O X \n", " O \n", "on move: O\n", " X \n", "X O X \n", " O \n", "on move: X\n", "O X \n", "X O X \n", " O \n", "on move: O\n", "O X \n", "X O X \n", " O X \n", "on move: X\n", "O X O \n", "X O X \n", " O X \n", "on move: O\n", "O X O \n", "X O X \n", "X O X \n", "Episode 28, Total Reward: 0\n", "Average Reward: 0.35714285714285715\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O O \n", " X \n", " X \n", "on move: O\n", " O O \n", " X \n", "X X \n", "on move: X\n", "O O O \n", " X \n", "X X \n", "Episode 29, Total Reward: -1\n", "Average Reward: 0.3103448275862069\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", "X O \n", "X \n", " \n", "on move: X\n", "X O O \n", "X \n", " \n", "on move: O\n", "X O O \n", "X \n", "X \n", "Episode 30, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X \n", " O X \n", "on move: O\n", " X O \n", " X \n", " O X \n", "on move: X\n", "O X O \n", " X \n", " O X \n", "on move: O\n", "O X O \n", "X X \n", " O X \n", "on move: X\n", "O X O \n", "X X \n", "O O X \n", "on move: O\n", "O X O \n", "X X X \n", "O O X \n", "Episode 31, Total Reward: 1\n", "Average Reward: 0.3548387096774194\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", "O \n", "X O \n", "on move: O\n", " X \n", "O X \n", "X O \n", "on move: X\n", "O X \n", "O X \n", "X O \n", "on move: O\n", "O X X \n", "O X \n", "X O \n", "on move: X\n", "O X X \n", "O O X \n", "X O \n", "Episode 32, Total Reward: -1\n", "Average Reward: 0.3125\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O O \n", " \n", "on move: O\n", "X X \n", "X O O \n", " \n", "on move: X\n", "X X O \n", "X O O \n", " \n", "on move: O\n", "X X O \n", "X O O \n", " X \n", "on move: X\n", "X X O \n", "X O O \n", "O X \n", "Episode 33, Total Reward: -1\n", "Average Reward: 0.2727272727272727\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", "X \n", "X \n", "on move: X\n", " O \n", "X O \n", "X \n", "on move: O\n", " O X \n", "X O \n", "X \n", "on move: X\n", " O X \n", "X O \n", "X O \n", "Episode 34, Total Reward: -1\n", "Average Reward: 0.23529411764705882\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", " O \n", "O \n", "X X \n", "on move: O\n", " O \n", "O X \n", "X X \n", "on move: X\n", " O \n", "O O X \n", "X X \n", "on move: O\n", " O X \n", "O O X \n", "X X \n", "Episode 35, Total Reward: 1\n", "Average Reward: 0.2571428571428571\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", "X \n", " O \n", "on move: X\n", "X \n", "X O \n", " O \n", "on move: O\n", "X \n", "X O \n", "X O \n", "Episode 36, Total Reward: 1\n", "Average Reward: 0.2777777777777778\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X O \n", " \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", "O \n", " X O \n", " X \n", "on move: O\n", "O \n", "X X O \n", " X \n", "on move: X\n", "O O \n", "X X O \n", " X \n", "on move: O\n", "O X O \n", "X X O \n", " X \n", "Episode 37, Total Reward: 1\n", "Average Reward: 0.2972972972972973\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O \n", "X \n", "on move: X\n", " X \n", "O O \n", "X \n", "on move: O\n", " X \n", "O O \n", "X X \n", "on move: X\n", "O X \n", "O O \n", "X X \n", "on move: O\n", "O X \n", "O X O \n", "X X \n", "Episode 38, Total Reward: 1\n", "Average Reward: 0.3157894736842105\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", "X \n", " \n", "O X \n", "on move: X\n", "X O \n", " \n", "O X \n", "on move: O\n", "X X O \n", " \n", "O X \n", "on move: X\n", "X X O \n", " O \n", "O X \n", "on move: O\n", "X X O \n", "X O \n", "O X \n", "on move: X\n", "X X O \n", "X O O \n", "O X \n", "Episode 39, Total Reward: -1\n", "Average Reward: 0.28205128205128205\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X \n", "O X \n", "on move: O\n", "X O \n", " X \n", "O X \n", "on move: X\n", "X O O \n", " X \n", "O X \n", "on move: O\n", "X O O \n", " X X \n", "O X \n", "on move: X\n", "X O O \n", "O X X \n", "O X \n", "on move: O\n", "X O O \n", "O X X \n", "O X X \n", "Episode 40, Total Reward: 1\n", "Average Reward: 0.3\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X X O \n", " X \n", "on move: X\n", " O O \n", "X X O \n", " X \n", "on move: O\n", "X O O \n", "X X O \n", " X \n", "on move: X\n", "X O O \n", "X X O \n", "O X \n", "on move: O\n", "X O O \n", "X X O \n", "O X X \n", "Episode 41, Total Reward: 1\n", "Average Reward: 0.3170731707317073\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", "X O \n", " \n", "X \n", "on move: X\n", "X O \n", "O \n", "X \n", "on move: O\n", "X O \n", "O \n", "X X \n", "on move: X\n", "X O \n", "O O \n", "X X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O O \n", "X X \n", "Episode 42, Total Reward: -1\n", "Average Reward: 0.2857142857142857\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", "X \n", "on move: X\n", " O \n", "O X \n", "X \n", "on move: O\n", " X O \n", "O X \n", "X \n", "on move: X\n", "O X O \n", "O X \n", "X \n", "on move: O\n", "O X O \n", "O X \n", "X X \n", "on move: X\n", "O X O \n", "O O X \n", "X X \n", "on move: O\n", "O X O \n", "O O X \n", "X X X \n", "Episode 43, Total Reward: 1\n", "Average Reward: 0.3023255813953488\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O \n", " O X \n", "on move: O\n", " X \n", " X O \n", " O X \n", "on move: X\n", " X \n", "O X O \n", " O X \n", "on move: O\n", "X X \n", "O X O \n", " O X \n", "Episode 44, Total Reward: 1\n", "Average Reward: 0.3181818181818182\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", "X X \n", " X \n", "on move: X\n", "O O \n", "X X \n", " X O \n", "on move: O\n", "O X O \n", "X X \n", " X O \n", "on move: X\n", "O X O \n", "X O X \n", " X O \n", "Episode 45, Total Reward: -1\n", "Average Reward: 0.28888888888888886\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X O \n", "O \n", "on move: O\n", " X X \n", "X O \n", "O \n", "on move: X\n", "O X X \n", "X O \n", "O \n", "on move: O\n", "O X X \n", "X O \n", "O X \n", "on move: X\n", "O X X \n", "X O O \n", "O X \n", "on move: O\n", "O X X \n", "X O O \n", "O X X \n", "Episode 46, Total Reward: 0\n", "Average Reward: 0.2826086956521739\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", "X O \n", " \n", "X \n", "on move: X\n", "X O \n", " O \n", "X \n", "on move: O\n", "X O \n", "X O \n", "X \n", "Episode 47, Total Reward: 1\n", "Average Reward: 0.2978723404255319\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O \n", " O X \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", "O X \n", " O \n", "X O X \n", "on move: O\n", "O X \n", "X O \n", "X O X \n", "on move: X\n", "O O X \n", "X O \n", "X O X \n", "on move: O\n", "O O X \n", "X X O \n", "X O X \n", "Episode 48, Total Reward: 1\n", "Average Reward: 0.3125\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X \n", " O \n", "O X \n", "on move: O\n", "X \n", " X O \n", "O X \n", "Episode 49, Total Reward: 1\n", "Average Reward: 0.32653061224489793\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X X \n", "O \n", "on move: X\n", " \n", " X X \n", "O O \n", "on move: O\n", " X \n", " X X \n", "O O \n", "on move: X\n", " X \n", "O X X \n", "O O \n", "on move: O\n", " X X \n", "O X X \n", "O O \n", "on move: X\n", " X X \n", "O X X \n", "O O O \n", "Episode 50, Total Reward: -1\n", "Average Reward: 0.3\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O O \n", " \n", "on move: O\n", " X \n", "X O O \n", " X \n", "on move: X\n", " X \n", "X O O \n", "O X \n", "on move: O\n", " X X \n", "X O O \n", "O X \n", "on move: X\n", " X X \n", "X O O \n", "O O X \n", "on move: O\n", "X X X \n", "X O O \n", "O O X \n", "Episode 51, Total Reward: 1\n", "Average Reward: 0.3137254901960784\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", " O \n", "X O \n", "on move: O\n", " X \n", " O X \n", "X O \n", "on move: X\n", " X O \n", " O X \n", "X O \n", "on move: O\n", " X O \n", " O X \n", "X X O \n", "on move: X\n", "O X O \n", " O X \n", "X X O \n", "Episode 52, Total Reward: -1\n", "Average Reward: 0.28846153846153844\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " O \n", "X \n", "on move: X\n", "X \n", "O O \n", "X \n", "on move: O\n", "X \n", "O O \n", "X X \n", "on move: X\n", "X O \n", "O O \n", "X X \n", "on move: O\n", "X O \n", "O O X \n", "X X \n", "on move: X\n", "X O O \n", "O O X \n", "X X \n", "on move: O\n", "X O O \n", "O O X \n", "X X X \n", "Episode 53, Total Reward: 1\n", "Average Reward: 0.3018867924528302\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X O \n", " \n", " \n", "on move: O\n", " X O \n", " X \n", " \n", "on move: X\n", " X O \n", " X \n", "O \n", "on move: O\n", " X O \n", " X \n", "O X \n", "Episode 54, Total Reward: 1\n", "Average Reward: 0.3148148148148148\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X X \n", " \n", "on move: X\n", "O O \n", " X X \n", " \n", "on move: O\n", "O O \n", "X X X \n", " \n", "Episode 55, Total Reward: 1\n", "Average Reward: 0.32727272727272727\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X \n", "X \n", "on move: X\n", "O O \n", "X \n", "X \n", "on move: O\n", "O O \n", "X X \n", "X \n", "on move: X\n", "O O \n", "X O X \n", "X \n", "on move: O\n", "O X O \n", "X O X \n", "X \n", "on move: X\n", "O X O \n", "X O X \n", "X O \n", "on move: O\n", "O X O \n", "X O X \n", "X O X \n", "Episode 56, Total Reward: 0\n", "Average Reward: 0.32142857142857145\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " O \n", " X X \n", "on move: X\n", " \n", " O \n", "O X X \n", "on move: O\n", " \n", "X O \n", "O X X \n", "on move: X\n", " \n", "X O O \n", "O X X \n", "on move: O\n", " X \n", "X O O \n", "O X X \n", "on move: X\n", "O X \n", "X O O \n", "O X X \n", "on move: O\n", "O X X \n", "X O O \n", "O X X \n", "Episode 57, Total Reward: 0\n", "Average Reward: 0.3157894736842105\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " \n", " O \n", "X O X \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", " X \n", "O O \n", "X O X \n", "on move: O\n", " X X \n", "O O \n", "X O X \n", "on move: X\n", " X X \n", "O O O \n", "X O X \n", "Episode 58, Total Reward: -1\n", "Average Reward: 0.29310344827586204\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X X \n", " \n", "O \n", "on move: X\n", "O X X \n", " \n", "O \n", "on move: O\n", "O X X \n", " X \n", "O \n", "on move: X\n", "O X X \n", " X \n", "O O \n", "on move: O\n", "O X X \n", " X \n", "O O X \n", "on move: X\n", "O X X \n", "O X \n", "O O X \n", "Episode 59, Total Reward: -1\n", "Average Reward: 0.2711864406779661\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " O X \n", " O \n", " X \n", "on move: O\n", " O X \n", " O \n", " X X \n", "on move: X\n", " O X \n", "O O \n", " X X \n", "on move: O\n", " O X \n", "O X O \n", " X X \n", "on move: X\n", " O X \n", "O X O \n", "O X X \n", "on move: O\n", "X O X \n", "O X O \n", "O X X \n", "Episode 60, Total Reward: 1\n", "Average Reward: 0.2833333333333333\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " \n", "X \n", "O X \n", "on move: X\n", "O \n", "X \n", "O X \n", "on move: O\n", "O \n", "X \n", "O X X \n", "on move: X\n", "O O \n", "X \n", "O X X \n", "on move: O\n", "O X O \n", "X \n", "O X X \n", "on move: X\n", "O X O \n", "X O \n", "O X X \n", "Episode 61, Total Reward: -1\n", "Average Reward: 0.26229508196721313\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X \n", "O X \n", "on move: O\n", " O \n", "X \n", "O X X \n", "on move: X\n", " O \n", "X O \n", "O X X \n", "on move: O\n", "X O \n", "X O \n", "O X X \n", "on move: X\n", "X O \n", "X O O \n", "O X X \n", "on move: O\n", "X O X \n", "X O O \n", "O X X \n", "Episode 62, Total Reward: 0\n", "Average Reward: 0.25806451612903225\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X \n", "O X \n", "on move: X\n", "O \n", " X \n", "O X \n", "on move: O\n", "O X \n", " X \n", "O X \n", "Episode 63, Total Reward: 1\n", "Average Reward: 0.2698412698412698\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X O \n", " X O \n", " X \n", "Episode 64, Total Reward: 1\n", "Average Reward: 0.28125\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " \n", "O O X \n", " X \n", "on move: O\n", " \n", "O O X \n", "X X \n", "on move: X\n", "O \n", "O O X \n", "X X \n", "on move: O\n", "O X \n", "O O X \n", "X X \n", "on move: X\n", "O X O \n", "O O X \n", "X X \n", "on move: O\n", "O X O \n", "O O X \n", "X X X \n", "Episode 65, Total Reward: 1\n", "Average Reward: 0.2923076923076923\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " X \n", " \n", "on move: X\n", "X O \n", " X \n", " O \n", "on move: O\n", "X O \n", " X \n", "X O \n", "on move: X\n", "X O O \n", " X \n", "X O \n", "on move: O\n", "X O O \n", " X \n", "X X O \n", "on move: X\n", "X O O \n", "O X \n", "X X O \n", "on move: O\n", "X O O \n", "O X X \n", "X X O \n", "Episode 66, Total Reward: 0\n", "Average Reward: 0.2878787878787879\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", " \n", "O X \n", "on move: O\n", "X O X \n", " \n", "O X \n", "on move: X\n", "X O X \n", " \n", "O O X \n", "on move: O\n", "X O X \n", " X \n", "O O X \n", "Episode 67, Total Reward: 1\n", "Average Reward: 0.29850746268656714\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X \n", "O \n", " X \n", "on move: X\n", "X \n", "O O \n", " X \n", "on move: O\n", "X \n", "O O \n", "X X \n", "on move: X\n", "X \n", "O O O \n", "X X \n", "Episode 68, Total Reward: -1\n", "Average Reward: 0.27941176470588236\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " \n", " \n", "O X X \n", "on move: X\n", " O \n", " \n", "O X X \n", "on move: O\n", " O \n", " X \n", "O X X \n", "on move: X\n", " O O \n", " X \n", "O X X \n", "on move: O\n", " O O \n", " X X \n", "O X X \n", "on move: X\n", "O O O \n", " X X \n", "O X X \n", "Episode 69, Total Reward: -1\n", "Average Reward: 0.2608695652173913\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", "X \n", "on move: X\n", "X O O \n", " \n", "X \n", "on move: O\n", "X O O \n", " \n", "X X \n", "on move: X\n", "X O O \n", " O \n", "X X \n", "on move: O\n", "X O O \n", "X O \n", "X X \n", "Episode 70, Total Reward: 1\n", "Average Reward: 0.2714285714285714\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " X O \n", " \n", "on move: X\n", " X \n", " X O \n", " O \n", "on move: O\n", " X \n", " X O \n", "X O \n", "on move: X\n", " X \n", " X O \n", "X O O \n", "on move: O\n", " X X \n", " X O \n", "X O O \n", "Episode 71, Total Reward: 1\n", "Average Reward: 0.28169014084507044\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X X O \n", " O \n", " X \n", "on move: X\n", "X X O \n", "O O \n", " X \n", "on move: O\n", "X X O \n", "O O \n", " X X \n", "on move: X\n", "X X O \n", "O O O \n", " X X \n", "Episode 72, Total Reward: -1\n", "Average Reward: 0.2638888888888889\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " O X \n", "on move: O\n", " X \n", " \n", " O X \n", "on move: X\n", " X \n", " \n", "O O X \n", "on move: O\n", " X \n", " X \n", "O O X \n", "on move: X\n", " X O \n", " X \n", "O O X \n", "on move: O\n", "X X O \n", " X \n", "O O X \n", "on move: X\n", "X X O \n", "O X \n", "O O X \n", "on move: O\n", "X X O \n", "O X X \n", "O O X \n", "Episode 73, Total Reward: 1\n", "Average Reward: 0.273972602739726\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", "X X \n", "O \n", "on move: X\n", " \n", "X X O \n", "O \n", "on move: O\n", " \n", "X X O \n", "O X \n", "on move: X\n", " O \n", "X X O \n", "O X \n", "on move: O\n", " O \n", "X X O \n", "O X X \n", "on move: X\n", " O O \n", "X X O \n", "O X X \n", "on move: O\n", "X O O \n", "X X O \n", "O X X \n", "Episode 74, Total Reward: 1\n", "Average Reward: 0.28378378378378377\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " X \n", "X \n", " O O \n", "on move: O\n", " X \n", "X X \n", " O O \n", "on move: X\n", "O X \n", "X X \n", " O O \n", "on move: O\n", "O X \n", "X X X \n", " O O \n", "Episode 75, Total Reward: 1\n", "Average Reward: 0.29333333333333333\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " O X \n", " X \n", " O \n", "on move: O\n", " O X \n", " X X \n", " O \n", "on move: X\n", " O X \n", " X X \n", "O O \n", "on move: O\n", " O X \n", "X X X \n", "O O \n", "Episode 76, Total Reward: 1\n", "Average Reward: 0.3026315789473684\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " \n", "X \n", "O X \n", "on move: X\n", "O \n", "X \n", "O X \n", "on move: O\n", "O X \n", "X \n", "O X \n", "on move: X\n", "O X O \n", "X \n", "O X \n", "on move: O\n", "O X O \n", "X X \n", "O X \n", "on move: X\n", "O X O \n", "X O X \n", "O X \n", "Episode 77, Total Reward: -1\n", "Average Reward: 0.2857142857142857\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", " X \n", "X X \n", "O O \n", "on move: X\n", " X \n", "X X O \n", "O O \n", "on move: O\n", " X \n", "X X O \n", "O X O \n", "Episode 78, Total Reward: 1\n", "Average Reward: 0.2948717948717949\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", "X X \n", "O \n", "on move: X\n", " \n", "X X \n", "O O \n", "on move: O\n", " \n", "X X X \n", "O O \n", "Episode 79, Total Reward: 1\n", "Average Reward: 0.3037974683544304\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", " \n", " X O \n", "O X \n", "on move: O\n", " X \n", " X O \n", "O X \n", "on move: X\n", " X \n", "O X O \n", "O X \n", "on move: O\n", " X X \n", "O X O \n", "O X \n", "Episode 80, Total Reward: 1\n", "Average Reward: 0.3125\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", "O O \n", " \n", "X X \n", "on move: O\n", "O O \n", " X \n", "X X \n", "on move: X\n", "O O O \n", " X \n", "X X \n", "Episode 81, Total Reward: -1\n", "Average Reward: 0.2962962962962963\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", " O \n", "O \n", "X X \n", "on move: O\n", " X O \n", "O \n", "X X \n", "on move: X\n", " X O \n", "O O \n", "X X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O \n", "X O X \n", "on move: O\n", "X X O \n", "O O X \n", "X O X \n", "Episode 82, Total Reward: 0\n", "Average Reward: 0.2926829268292683\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O O \n", "X X \n", " \n", "on move: O\n", " O O \n", "X X \n", " X \n", "on move: X\n", " O O \n", "X X \n", " X O \n", "on move: O\n", " O O \n", "X X X \n", " X O \n", "Episode 83, Total Reward: 1\n", "Average Reward: 0.30120481927710846\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " \n", " X \n", "X O O \n", "on move: O\n", " \n", "X X \n", "X O O \n", "on move: X\n", "O \n", "X X \n", "X O O \n", "on move: O\n", "O X \n", "X X \n", "X O O \n", "on move: X\n", "O X O \n", "X X \n", "X O O \n", "on move: O\n", "O X O \n", "X X X \n", "X O O \n", "Episode 84, Total Reward: 1\n", "Average Reward: 0.30952380952380953\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X O \n", " X \n", "on move: O\n", " O \n", "X X O \n", " X \n", "on move: X\n", " O \n", "X X O \n", " O X \n", "on move: O\n", " O \n", "X X O \n", "X O X \n", "on move: X\n", "O O \n", "X X O \n", "X O X \n", "on move: O\n", "O O X \n", "X X O \n", "X O X \n", "Episode 85, Total Reward: 1\n", "Average Reward: 0.3176470588235294\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", " O X \n", "on move: X\n", "O \n", "X \n", " O X \n", "on move: O\n", "O \n", "X \n", "X O X \n", "on move: X\n", "O \n", "X O \n", "X O X \n", "on move: O\n", "O X \n", "X O \n", "X O X \n", "on move: X\n", "O X \n", "X O O \n", "X O X \n", "on move: O\n", "O X X \n", "X O O \n", "X O X \n", "Episode 86, Total Reward: 0\n", "Average Reward: 0.313953488372093\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O O \n", " X \n", "on move: O\n", " X \n", " O O \n", "X X \n", "on move: X\n", "O X \n", " O O \n", "X X \n", "on move: O\n", "O X \n", "X O O \n", "X X \n", "on move: X\n", "O X \n", "X O O \n", "X O X \n", "on move: O\n", "O X X \n", "X O O \n", "X O X \n", "Episode 87, Total Reward: 0\n", "Average Reward: 0.3103448275862069\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", "O X \n", " O \n", " X \n", "on move: O\n", "O X X \n", " O \n", " X \n", "on move: X\n", "O X X \n", " O \n", "O X \n", "on move: O\n", "O X X \n", "X O \n", "O X \n", "on move: X\n", "O X X \n", "X O \n", "O X O \n", "Episode 88, Total Reward: -1\n", "Average Reward: 0.29545454545454547\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", "X \n", " \n", "on move: X\n", "X O \n", "X \n", " O \n", "on move: O\n", "X O X \n", "X \n", " O \n", "on move: X\n", "X O X \n", "X O \n", " O \n", "on move: O\n", "X O X \n", "X X O \n", " O \n", "on move: X\n", "X O X \n", "X X O \n", " O O \n", "on move: O\n", "X O X \n", "X X O \n", "X O O \n", "Episode 89, Total Reward: 1\n", "Average Reward: 0.30337078651685395\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X \n", "O O \n", " X \n", "on move: O\n", "X \n", "O O \n", "X X \n", "on move: X\n", "X O \n", "O O \n", "X X \n", "on move: O\n", "X O X \n", "O O \n", "X X \n", "on move: X\n", "X O X \n", "O O O \n", "X X \n", "Episode 90, Total Reward: -1\n", "Average Reward: 0.28888888888888886\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X O \n", "X O \n", " X \n", "on move: X\n", "X O \n", "X O \n", " X O \n", "on move: O\n", "X O \n", "X X O \n", " X O \n", "on move: X\n", "X O O \n", "X X O \n", " X O \n", "Episode 91, Total Reward: -1\n", "Average Reward: 0.27472527472527475\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " \n", "O X O \n", " X \n", "on move: O\n", " \n", "O X O \n", "X X \n", "on move: X\n", " O \n", "O X O \n", "X X \n", "on move: O\n", " O \n", "O X O \n", "X X X \n", "Episode 92, Total Reward: 1\n", "Average Reward: 0.2826086956521739\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O \n", "X \n", "on move: X\n", " O X \n", "O \n", "X \n", "on move: O\n", " O X \n", "O X \n", "X \n", "Episode 93, Total Reward: 1\n", "Average Reward: 0.2903225806451613\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", "O X \n", " X \n", "O \n", "on move: O\n", "O X \n", "X X \n", "O \n", "on move: X\n", "O X \n", "X X \n", "O O \n", "on move: O\n", "O X X \n", "X X \n", "O O \n", "on move: X\n", "O X X \n", "X X \n", "O O O \n", "Episode 94, Total Reward: -1\n", "Average Reward: 0.2765957446808511\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O X \n", "X O \n", " X \n", "on move: X\n", " O X \n", "X O O \n", " X \n", "on move: O\n", " O X \n", "X O O \n", " X X \n", "on move: X\n", " O X \n", "X O O \n", "O X X \n", "on move: O\n", "X O X \n", "X O O \n", "O X X \n", "Episode 95, Total Reward: 0\n", "Average Reward: 0.2736842105263158\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", " X \n", "X X \n", "O O \n", "on move: X\n", " X O \n", "X X \n", "O O \n", "on move: O\n", " X O \n", "X X X \n", "O O \n", "Episode 96, Total Reward: 1\n", "Average Reward: 0.28125\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X O \n", "X O \n", " \n", "on move: O\n", "X O \n", "X O \n", "X \n", "Episode 97, Total Reward: 1\n", "Average Reward: 0.28865979381443296\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", " X \n", " O X \n", "on move: O\n", "O X \n", " X \n", " O X \n", "on move: X\n", "O X \n", " X \n", "O O X \n", "on move: O\n", "O X \n", "X X \n", "O O X \n", "on move: X\n", "O X \n", "X O X \n", "O O X \n", "on move: O\n", "O X X \n", "X O X \n", "O O X \n", "Episode 98, Total Reward: 1\n", "Average Reward: 0.29591836734693877\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X \n", "O X \n", "on move: X\n", "O \n", " X \n", "O X \n", "on move: O\n", "O \n", "X X \n", "O X \n", "on move: X\n", "O O \n", "X X \n", "O X \n", "on move: O\n", "O X O \n", "X X \n", "O X \n", "on move: X\n", "O X O \n", "X X O \n", "O X \n", "on move: O\n", "O X O \n", "X X O \n", "O X X \n", "Episode 99, Total Reward: 1\n", "Average Reward: 0.30303030303030304\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " \n", "O X \n", "X O \n", "on move: O\n", " \n", "O X \n", "X X O \n", "on move: X\n", " \n", "O O X \n", "X X O \n", "on move: O\n", " X \n", "O O X \n", "X X O \n", "on move: X\n", " O X \n", "O O X \n", "X X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 100, Total Reward: 0\n", "Average Reward: 0.3\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " \n", " X \n", "O X \n", "on move: X\n", " \n", " X O \n", "O X \n", "on move: O\n", " \n", " X O \n", "O X X \n", "on move: X\n", " \n", "O X O \n", "O X X \n", "on move: O\n", "X \n", "O X O \n", "O X X \n", "Episode 101, Total Reward: 1\n", "Average Reward: 0.3069306930693069\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", " O \n", "O \n", "X X \n", "on move: O\n", " O \n", "O \n", "X X X \n", "Episode 102, Total Reward: 1\n", "Average Reward: 0.3137254901960784\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " \n", "X X \n", "on move: X\n", "O O \n", " \n", "X X \n", "on move: O\n", "O O \n", "X \n", "X X \n", "on move: X\n", "O O \n", "X O \n", "X X \n", "on move: O\n", "O X O \n", "X O \n", "X X \n", "on move: X\n", "O X O \n", "X O O \n", "X X \n", "on move: O\n", "O X O \n", "X O O \n", "X X X \n", "Episode 103, Total Reward: 1\n", "Average Reward: 0.32038834951456313\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", " O \n", "O \n", "X X \n", "on move: O\n", "X O \n", "O \n", "X X \n", "on move: X\n", "X O \n", "O O \n", "X X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O O \n", "X X \n", "Episode 104, Total Reward: -1\n", "Average Reward: 0.3076923076923077\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X \n", "O O \n", " \n", "on move: O\n", "X X X \n", "O O \n", " \n", "Episode 105, Total Reward: 1\n", "Average Reward: 0.3142857142857143\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", "X \n", "O X \n", " \n", "on move: X\n", "X O \n", "O X \n", " \n", "on move: O\n", "X O \n", "O X \n", "X \n", "on move: X\n", "X O O \n", "O X \n", "X \n", "on move: O\n", "X O O \n", "O X \n", "X X \n", "on move: X\n", "X O O \n", "O O X \n", "X X \n", "on move: O\n", "X O O \n", "O O X \n", "X X X \n", "Episode 106, Total Reward: 1\n", "Average Reward: 0.32075471698113206\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X O \n", " O \n", " X X \n", "on move: X\n", "X O \n", "O O \n", " X X \n", "on move: O\n", "X O \n", "O X O \n", " X X \n", "Episode 107, Total Reward: 1\n", "Average Reward: 0.32710280373831774\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", " X O \n", "on move: X\n", " \n", "X \n", "O X O \n", "on move: O\n", " \n", "X X \n", "O X O \n", "on move: X\n", " O \n", "X X \n", "O X O \n", "on move: O\n", " O \n", "X X X \n", "O X O \n", "Episode 108, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " O X \n", " \n", " \n", "on move: O\n", " O X \n", " X \n", " \n", "on move: X\n", " O X \n", " X \n", " O \n", "on move: O\n", "X O X \n", " X \n", " O \n", "on move: X\n", "X O X \n", " X \n", "O O \n", "on move: O\n", "X O X \n", "X X \n", "O O \n", "on move: X\n", "X O X \n", "X X O \n", "O O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 109, Total Reward: 0\n", "Average Reward: 0.3302752293577982\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", "O \n", " X \n", "on move: O\n", "X O \n", "O X \n", " X \n", "Episode 110, Total Reward: 1\n", "Average Reward: 0.33636363636363636\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", " O X \n", " O \n", "X \n", "on move: O\n", " O X \n", " O \n", "X X \n", "on move: X\n", "O O X \n", " O \n", "X X \n", "on move: O\n", "O O X \n", " X O \n", "X X \n", "Episode 111, Total Reward: 1\n", "Average Reward: 0.34234234234234234\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", "O X \n", " X \n", "on move: O\n", " O \n", "O X \n", " X X \n", "on move: X\n", " O \n", "O O X \n", " X X \n", "on move: O\n", " O \n", "O O X \n", "X X X \n", "Episode 112, Total Reward: 1\n", "Average Reward: 0.3482142857142857\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X X \n", "O \n", "on move: X\n", " \n", "O X X \n", "O \n", "on move: O\n", " \n", "O X X \n", "O X \n", "on move: X\n", " O \n", "O X X \n", "O X \n", "on move: O\n", " O X \n", "O X X \n", "O X \n", "on move: X\n", " O X \n", "O X X \n", "O X O \n", "on move: O\n", "X O X \n", "O X X \n", "O X O \n", "Episode 113, Total Reward: 0\n", "Average Reward: 0.34513274336283184\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", " X X \n", "X \n", "O O \n", "on move: X\n", " X X \n", "X \n", "O O O \n", "Episode 114, Total Reward: -1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " \n", "O O \n", "on move: O\n", "X X \n", " X \n", "O O \n", "on move: X\n", "X X O \n", " X \n", "O O \n", "on move: O\n", "X X O \n", "X X \n", "O O \n", "on move: X\n", "X X O \n", "X O X \n", "O O \n", "Episode 115, Total Reward: -1\n", "Average Reward: 0.3217391304347826\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", " O O \n", "on move: O\n", " X \n", " X X \n", " O O \n", "on move: X\n", " X \n", " X X \n", "O O O \n", "Episode 116, Total Reward: -1\n", "Average Reward: 0.3103448275862069\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", "X X \n", " O \n", "on move: X\n", "O \n", "X X \n", " O \n", "on move: O\n", "O \n", "X X \n", " X O \n", "on move: X\n", "O O \n", "X X \n", " X O \n", "on move: O\n", "O O X \n", "X X \n", " X O \n", "on move: X\n", "O O X \n", "X X O \n", " X O \n", "on move: O\n", "O O X \n", "X X O \n", "X X O \n", "Episode 117, Total Reward: 1\n", "Average Reward: 0.3162393162393162\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X \n", "X \n", "on move: X\n", "O \n", "X \n", "X O \n", "on move: O\n", "O \n", "X X \n", "X O \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O X \n", "X X \n", "X O \n", "on move: X\n", "O O X \n", "X O X \n", "X O \n", "Episode 118, Total Reward: -1\n", "Average Reward: 0.3050847457627119\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " X \n", " \n", "O X \n", "on move: X\n", " X \n", " O \n", "O X \n", "on move: O\n", "X X \n", " O \n", "O X \n", "on move: X\n", "X X \n", "O O \n", "O X \n", "on move: O\n", "X X \n", "O O \n", "O X X \n", "on move: X\n", "X X O \n", "O O \n", "O X X \n", "Episode 119, Total Reward: -1\n", "Average Reward: 0.29411764705882354\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " O X \n", " \n", "X O \n", "on move: O\n", "X O X \n", " \n", "X O \n", "on move: X\n", "X O X \n", "O \n", "X O \n", "on move: O\n", "X O X \n", "O X \n", "X O \n", "on move: X\n", "X O X \n", "O O X \n", "X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 120, Total Reward: 0\n", "Average Reward: 0.2916666666666667\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", " X O \n", " X \n", "on move: O\n", "O X \n", " X O \n", " X \n", "on move: X\n", "O X \n", "O X O \n", " X \n", "on move: O\n", "O X \n", "O X O \n", " X X \n", "Episode 121, Total Reward: 1\n", "Average Reward: 0.2975206611570248\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", " X \n", "X O \n", "X O O \n", "on move: O\n", "X X \n", "X O \n", "X O O \n", "Episode 122, Total Reward: 1\n", "Average Reward: 0.30327868852459017\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", "X \n", "X \n", "on move: X\n", "O \n", "X O \n", "X \n", "on move: O\n", "O X \n", "X O \n", "X \n", "on move: X\n", "O X O \n", "X O \n", "X \n", "on move: O\n", "O X O \n", "X O \n", "X X \n", "on move: X\n", "O X O \n", "X O O \n", "X X \n", "on move: O\n", "O X O \n", "X O O \n", "X X X \n", "Episode 123, Total Reward: 1\n", "Average Reward: 0.3089430894308943\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", " O \n", "X O \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", "O X \n", " O \n", "X O X \n", "on move: O\n", "O X \n", " X O \n", "X O X \n", "Episode 124, Total Reward: 1\n", "Average Reward: 0.31451612903225806\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", " \n", "X O \n", " X O \n", "on move: O\n", "X \n", "X O \n", " X O \n", "on move: X\n", "X \n", "X O O \n", " X O \n", "on move: O\n", "X \n", "X O O \n", "X X O \n", "Episode 125, Total Reward: 1\n", "Average Reward: 0.32\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " O \n", "X \n", "X O \n", "on move: O\n", " O \n", "X X \n", "X O \n", "on move: X\n", " O \n", "X X \n", "X O O \n", "on move: O\n", "X O \n", "X X \n", "X O O \n", "Episode 126, Total Reward: 1\n", "Average Reward: 0.3253968253968254\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", " \n", " O X \n", "on move: O\n", "X O \n", " X \n", " O X \n", "on move: X\n", "X O \n", " X \n", "O O X \n", "on move: O\n", "X O X \n", " X \n", "O O X \n", "Episode 127, Total Reward: 1\n", "Average Reward: 0.33070866141732286\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", "X X \n", "X \n", "O O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X X \n", "X O \n", "O O \n", "Episode 128, Total Reward: 1\n", "Average Reward: 0.3359375\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " X O \n", " \n", "on move: X\n", " X \n", "O X O \n", " \n", "on move: O\n", " X \n", "O X O \n", "X \n", "on move: X\n", " X \n", "O X O \n", "X O \n", "on move: O\n", "X X \n", "O X O \n", "X O \n", "on move: X\n", "X X \n", "O X O \n", "X O O \n", "on move: O\n", "X X X \n", "O X O \n", "X O O \n", "Episode 129, Total Reward: 1\n", "Average Reward: 0.34108527131782945\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X X \n", " \n", "on move: X\n", " O O \n", " X X \n", " \n", "on move: O\n", "X O O \n", " X X \n", " \n", "on move: X\n", "X O O \n", " X X \n", " O \n", "on move: O\n", "X O O \n", "X X X \n", " O \n", "Episode 130, Total Reward: 1\n", "Average Reward: 0.34615384615384615\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", "X X \n", "X O \n", " O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X \n", "X O X \n", "O O \n", "on move: X\n", "X X \n", "X O X \n", "O O O \n", "Episode 131, Total Reward: -1\n", "Average Reward: 0.33587786259541985\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " \n", "X O \n", "X O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", " X O \n", "X O \n", "X O \n", "on move: O\n", "X X O \n", "X O \n", "X O \n", "Episode 132, Total Reward: 1\n", "Average Reward: 0.3409090909090909\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X O \n", " \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", " \n", " X O \n", "O X \n", "on move: O\n", " \n", "X X O \n", "O X \n", "on move: X\n", " \n", "X X O \n", "O X O \n", "on move: O\n", " X \n", "X X O \n", "O X O \n", "on move: X\n", " O X \n", "X X O \n", "O X O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 133, Total Reward: 0\n", "Average Reward: 0.3383458646616541\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O \n", "X \n", "O X \n", "on move: O\n", "O X \n", "X \n", "O X \n", "on move: X\n", "O X \n", "X O \n", "O X \n", "on move: O\n", "O X X \n", "X O \n", "O X \n", "on move: X\n", "O X X \n", "X O \n", "O X O \n", "Episode 134, Total Reward: -1\n", "Average Reward: 0.3283582089552239\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", "X \n", " O \n", "on move: X\n", "X \n", "X O \n", " O \n", "on move: O\n", "X \n", "X O \n", " X O \n", "on move: X\n", "X \n", "X O \n", "O X O \n", "on move: O\n", "X \n", "X X O \n", "O X O \n", "on move: X\n", "X O \n", "X X O \n", "O X O \n", "Episode 135, Total Reward: -1\n", "Average Reward: 0.31851851851851853\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " X \n", "O \n", "on move: O\n", "O X \n", "X X \n", "O \n", "on move: X\n", "O X \n", "X X \n", "O O \n", "on move: O\n", "O X \n", "X X \n", "O O X \n", "on move: X\n", "O X O \n", "X X \n", "O O X \n", "on move: O\n", "O X O \n", "X X X \n", "O O X \n", "Episode 136, Total Reward: 1\n", "Average Reward: 0.3235294117647059\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X X \n", " \n", "on move: X\n", "O O \n", " X X \n", " \n", "on move: O\n", "O O \n", " X X \n", " X \n", "on move: X\n", "O O O \n", " X X \n", " X \n", "Episode 137, Total Reward: -1\n", "Average Reward: 0.31386861313868614\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O X \n", " X \n", " \n", "on move: X\n", " O X \n", " X \n", " O \n", "on move: O\n", " O X \n", " X \n", "X O \n", "on move: X\n", "O O X \n", " X \n", "X O \n", "on move: O\n", "O O X \n", "X X \n", "X O \n", "on move: X\n", "O O X \n", "X X \n", "X O O \n", "on move: O\n", "O O X \n", "X X X \n", "X O O \n", "Episode 138, Total Reward: 1\n", "Average Reward: 0.3188405797101449\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", " O O \n", " \n", "X X \n", "on move: O\n", " O O \n", "X \n", "X X \n", "on move: X\n", " O O \n", "X O \n", "X X \n", "on move: O\n", " O O \n", "X O \n", "X X X \n", "Episode 139, Total Reward: 1\n", "Average Reward: 0.3237410071942446\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", " \n", "X O \n", "on move: X\n", "X \n", " O \n", "X O \n", "on move: O\n", "X X \n", " O \n", "X O \n", "on move: X\n", "X O X \n", " O \n", "X O \n", "on move: O\n", "X O X \n", "X O \n", "X O \n", "Episode 140, Total Reward: 1\n", "Average Reward: 0.32857142857142857\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", "X X \n", " \n", "O \n", "on move: X\n", "X X O \n", " \n", "O \n", "on move: O\n", "X X O \n", " X \n", "O \n", "on move: X\n", "X X O \n", " X \n", "O O \n", "on move: O\n", "X X O \n", " X \n", "O O X \n", "Episode 141, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", " O O \n", " \n", "X X \n", "on move: O\n", " O O \n", " \n", "X X X \n", "Episode 142, Total Reward: 1\n", "Average Reward: 0.3380281690140845\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " \n", "X O \n", "X O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", " X O \n", "X O \n", "X O \n", "Episode 143, Total Reward: -1\n", "Average Reward: 0.32867132867132864\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " O \n", " O \n", "X X X \n", "Episode 144, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " \n", " X X \n", "on move: X\n", "O \n", " \n", "O X X \n", "on move: O\n", "O \n", " X \n", "O X X \n", "on move: X\n", "O \n", "O X \n", "O X X \n", "Episode 145, Total Reward: -1\n", "Average Reward: 0.32413793103448274\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " X \n", "O \n", "on move: O\n", "O X X \n", " X \n", "O \n", "on move: X\n", "O X X \n", " X \n", "O O \n", "on move: O\n", "O X X \n", " X \n", "O X O \n", "Episode 146, Total Reward: 1\n", "Average Reward: 0.3287671232876712\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " X O \n", " \n", "on move: X\n", "X \n", "O X O \n", " \n", "on move: O\n", "X \n", "O X O \n", " X \n", "on move: X\n", "X O \n", "O X O \n", " X \n", "on move: O\n", "X O X \n", "O X O \n", " X \n", "on move: X\n", "X O X \n", "O X O \n", " X O \n", "on move: O\n", "X O X \n", "O X O \n", "X X O \n", "Episode 147, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " \n", " O X \n", "X O \n", "on move: O\n", " \n", "X O X \n", "X O \n", "on move: X\n", " \n", "X O X \n", "X O O \n", "on move: O\n", " X \n", "X O X \n", "X O O \n", "on move: X\n", "O X \n", "X O X \n", "X O O \n", "Episode 148, Total Reward: -1\n", "Average Reward: 0.32432432432432434\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X X \n", " \n", "on move: X\n", "O \n", "O X X \n", " \n", "on move: O\n", "O \n", "O X X \n", "X \n", "on move: X\n", "O \n", "O X X \n", "X O \n", "on move: O\n", "O X \n", "O X X \n", "X O \n", "on move: X\n", "O X O \n", "O X X \n", "X O \n", "on move: O\n", "O X O \n", "O X X \n", "X O X \n", "Episode 149, Total Reward: 0\n", "Average Reward: 0.3221476510067114\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X \n", " O \n", "O X \n", "on move: O\n", "X X \n", " O \n", "O X \n", "on move: X\n", "X X \n", " O O \n", "O X \n", "on move: O\n", "X X \n", "X O O \n", "O X \n", "on move: X\n", "X O X \n", "X O O \n", "O X \n", "on move: O\n", "X O X \n", "X O O \n", "O X X \n", "Episode 150, Total Reward: 0\n", "Average Reward: 0.32\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " X \n", "O \n", "on move: O\n", "O X X \n", " X \n", "O \n", "on move: X\n", "O X X \n", "O X \n", "O \n", "Episode 151, Total Reward: -1\n", "Average Reward: 0.31125827814569534\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", "O O \n", " \n", "X X \n", "on move: O\n", "O O \n", "X \n", "X X \n", "on move: X\n", "O O \n", "X O \n", "X X \n", "on move: O\n", "O O X \n", "X O \n", "X X \n", "on move: X\n", "O O X \n", "X O O \n", "X X \n", "on move: O\n", "O O X \n", "X O O \n", "X X X \n", "Episode 152, Total Reward: 1\n", "Average Reward: 0.3157894736842105\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " X O \n", "O X \n", " \n", "on move: O\n", " X O \n", "O X X \n", " \n", "on move: X\n", " X O \n", "O X X \n", " O \n", "on move: O\n", " X O \n", "O X X \n", "X O \n", "on move: X\n", "O X O \n", "O X X \n", "X O \n", "on move: O\n", "O X O \n", "O X X \n", "X X O \n", "Episode 153, Total Reward: 1\n", "Average Reward: 0.3202614379084967\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", " X \n", "X X \n", "on move: X\n", "O O \n", " O X \n", "X X \n", "on move: O\n", "O O \n", "X O X \n", "X X \n", "on move: X\n", "O O \n", "X O X \n", "X X O \n", "Episode 154, Total Reward: -1\n", "Average Reward: 0.3116883116883117\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " X \n", "O X \n", " O \n", "on move: O\n", " X X \n", "O X \n", " O \n", "on move: X\n", " X X \n", "O X \n", "O O \n", "on move: O\n", "X X X \n", "O X \n", "O O \n", "Episode 155, Total Reward: 1\n", "Average Reward: 0.3161290322580645\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O O \n", " \n", " X \n", "on move: O\n", "X O O \n", " X \n", " X \n", "on move: X\n", "X O O \n", " X \n", "O X \n", "on move: O\n", "X O O \n", " X X \n", "O X \n", "on move: X\n", "X O O \n", " X X \n", "O X O \n", "on move: O\n", "X O O \n", "X X X \n", "O X O \n", "Episode 156, Total Reward: 1\n", "Average Reward: 0.32051282051282054\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X \n", " O O \n", " X \n", "on move: O\n", "X \n", " O O \n", "X X \n", "on move: X\n", "X O \n", " O O \n", "X X \n", "on move: O\n", "X O \n", " O O \n", "X X X \n", "Episode 157, Total Reward: 1\n", "Average Reward: 0.3248407643312102\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O X \n", "X \n", "on move: X\n", " O \n", " O X \n", "X \n", "on move: O\n", " O \n", " O X \n", "X X \n", "on move: X\n", "O O \n", " O X \n", "X X \n", "on move: O\n", "O O \n", " O X \n", "X X X \n", "Episode 158, Total Reward: 1\n", "Average Reward: 0.3291139240506329\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", "X \n", "on move: X\n", " O \n", "O X \n", "X \n", "on move: O\n", " O \n", "O X \n", "X X \n", "on move: X\n", " O \n", "O X \n", "X X O \n", "on move: O\n", " O \n", "O X X \n", "X X O \n", "on move: X\n", " O O \n", "O X X \n", "X X O \n", "on move: O\n", "X O O \n", "O X X \n", "X X O \n", "Episode 159, Total Reward: 0\n", "Average Reward: 0.3270440251572327\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " \n", "X \n", "X O O \n", "on move: O\n", " X \n", "X \n", "X O O \n", "on move: X\n", " X \n", "X O \n", "X O O \n", "on move: O\n", " X X \n", "X O \n", "X O O \n", "on move: X\n", " X X \n", "X O O \n", "X O O \n", "on move: O\n", "X X X \n", "X O O \n", "X O O \n", "Episode 160, Total Reward: 1\n", "Average Reward: 0.33125\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " X O \n", " \n", " X \n", "on move: X\n", " X O \n", " \n", " O X \n", "on move: O\n", "X X O \n", " \n", " O X \n", "on move: X\n", "X X O \n", "O \n", " O X \n", "on move: O\n", "X X O \n", "O \n", "X O X \n", "on move: X\n", "X X O \n", "O O \n", "X O X \n", "on move: O\n", "X X O \n", "O X O \n", "X O X \n", "Episode 161, Total Reward: 1\n", "Average Reward: 0.33540372670807456\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", "X X \n", " \n", "O \n", "on move: X\n", "X X \n", " O \n", "O \n", "on move: O\n", "X X \n", " X O \n", "O \n", "on move: X\n", "X X \n", " X O \n", "O O \n", "on move: O\n", "X X \n", "X X O \n", "O O \n", "on move: X\n", "X O X \n", "X X O \n", "O O \n", "on move: O\n", "X O X \n", "X X O \n", "O O X \n", "Episode 162, Total Reward: 1\n", "Average Reward: 0.3395061728395062\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X X \n", " O \n", "on move: X\n", " \n", "O X X \n", " O \n", "on move: O\n", " \n", "O X X \n", "X O \n", "on move: X\n", " O \n", "O X X \n", "X O \n", "on move: O\n", " O \n", "O X X \n", "X X O \n", "on move: X\n", " O O \n", "O X X \n", "X X O \n", "on move: O\n", "X O O \n", "O X X \n", "X X O \n", "Episode 163, Total Reward: 0\n", "Average Reward: 0.3374233128834356\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", " O \n", "O \n", "X X \n", "on move: O\n", " O \n", "O X \n", "X X \n", "on move: X\n", " O \n", "O O X \n", "X X \n", "on move: O\n", " O \n", "O O X \n", "X X X \n", "Episode 164, Total Reward: 1\n", "Average Reward: 0.34146341463414637\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X X \n", " O \n", "on move: X\n", "O \n", "X X \n", " O \n", "on move: O\n", "O \n", "X X \n", " O X \n", "on move: X\n", "O O \n", "X X \n", " O X \n", "on move: O\n", "O O X \n", "X X \n", " O X \n", "on move: X\n", "O O X \n", "X X \n", "O O X \n", "on move: O\n", "O O X \n", "X X X \n", "O O X \n", "Episode 165, Total Reward: 1\n", "Average Reward: 0.34545454545454546\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O X \n", " \n", " X \n", "on move: X\n", "O X \n", " O \n", " X \n", "on move: O\n", "O X X \n", " O \n", " X \n", "on move: X\n", "O X X \n", " O O \n", " X \n", "on move: O\n", "O X X \n", "X O O \n", " X \n", "on move: X\n", "O X X \n", "X O O \n", "O X \n", "on move: O\n", "O X X \n", "X O O \n", "O X X \n", "Episode 166, Total Reward: 0\n", "Average Reward: 0.3433734939759036\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O O \n", " X \n", "on move: O\n", " X \n", " O O \n", "X X \n", "on move: X\n", " X \n", "O O O \n", "X X \n", "Episode 167, Total Reward: -1\n", "Average Reward: 0.33532934131736525\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " O X \n", " X \n", "on move: O\n", " O \n", "X O X \n", " X \n", "on move: X\n", " O O \n", "X O X \n", " X \n", "on move: O\n", " O O \n", "X O X \n", " X X \n", "on move: X\n", "O O O \n", "X O X \n", " X X \n", "Episode 168, Total Reward: -1\n", "Average Reward: 0.3273809523809524\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X X \n", " \n", "on move: X\n", "O \n", "X X \n", "O \n", "on move: O\n", "O \n", "X X \n", "O X \n", "on move: X\n", "O O \n", "X X \n", "O X \n", "on move: O\n", "O O \n", "X X X \n", "O X \n", "Episode 169, Total Reward: 1\n", "Average Reward: 0.33136094674556216\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", " X O \n", "on move: X\n", "O X \n", " \n", " X O \n", "on move: O\n", "O X \n", " \n", "X X O \n", "on move: X\n", "O O X \n", " \n", "X X O \n", "on move: O\n", "O O X \n", " X \n", "X X O \n", "on move: X\n", "O O X \n", " O X \n", "X X O \n", "Episode 170, Total Reward: -1\n", "Average Reward: 0.3235294117647059\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " X O \n", " \n", " X \n", "on move: X\n", " X O \n", "O \n", " X \n", "on move: O\n", " X O \n", "O \n", "X X \n", "on move: X\n", " X O \n", "O O \n", "X X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O O \n", "X X \n", "Episode 171, Total Reward: -1\n", "Average Reward: 0.3157894736842105\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", " X \n", " O \n", "on move: X\n", "X \n", "O X \n", " O \n", "on move: O\n", "X \n", "O X \n", " X O \n", "on move: X\n", "X O \n", "O X \n", " X O \n", "on move: O\n", "X O \n", "O X \n", "X X O \n", "on move: X\n", "X O O \n", "O X \n", "X X O \n", "on move: O\n", "X O O \n", "O X X \n", "X X O \n", "Episode 172, Total Reward: 0\n", "Average Reward: 0.313953488372093\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " X \n", " \n", "on move: X\n", "X O \n", " X \n", "O \n", "on move: O\n", "X O \n", " X \n", "O X \n", "on move: X\n", "X O \n", "O X \n", "O X \n", "on move: O\n", "X O \n", "O X \n", "O X X \n", "on move: X\n", "X O O \n", "O X \n", "O X X \n", "on move: O\n", "X O O \n", "O X X \n", "O X X \n", "Episode 173, Total Reward: 1\n", "Average Reward: 0.3179190751445087\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", "X \n", "X \n", "on move: X\n", " O \n", "X \n", "X O \n", "on move: O\n", "X O \n", "X \n", "X O \n", "Episode 174, Total Reward: 1\n", "Average Reward: 0.3218390804597701\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " \n", " X \n", "on move: X\n", "O X \n", " \n", " X O \n", "on move: O\n", "O X \n", " \n", "X X O \n", "on move: X\n", "O X \n", " O \n", "X X O \n", "on move: O\n", "O X \n", "X O \n", "X X O \n", "on move: X\n", "O O X \n", "X O \n", "X X O \n", "on move: O\n", "O O X \n", "X X O \n", "X X O \n", "Episode 175, Total Reward: 1\n", "Average Reward: 0.32571428571428573\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X X \n", " \n", "on move: X\n", "O O \n", " X X \n", " \n", "on move: O\n", "O O \n", "X X X \n", " \n", "Episode 176, Total Reward: 1\n", "Average Reward: 0.32954545454545453\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X O \n", "O \n", " \n", "on move: O\n", "X X O \n", "O X \n", " \n", "on move: X\n", "X X O \n", "O X \n", "O \n", "on move: O\n", "X X O \n", "O X \n", "O X \n", "Episode 177, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", "X \n", "O \n", " X \n", "on move: X\n", "X O \n", "O \n", " X \n", "on move: O\n", "X O \n", "O \n", "X X \n", "on move: X\n", "X O \n", "O O \n", "X X \n", "on move: O\n", "X X O \n", "O O \n", "X X \n", "on move: X\n", "X X O \n", "O O \n", "X O X \n", "on move: O\n", "X X O \n", "O X O \n", "X O X \n", "Episode 178, Total Reward: 1\n", "Average Reward: 0.33707865168539325\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", " X \n", " X X \n", "on move: X\n", "O O \n", " X \n", "O X X \n", "on move: O\n", "O O \n", "X X \n", "O X X \n", "on move: X\n", "O O \n", "X O X \n", "O X X \n", "Episode 179, Total Reward: -1\n", "Average Reward: 0.329608938547486\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", "X \n", "O X \n", " \n", "on move: X\n", "X \n", "O X \n", "O \n", "on move: O\n", "X X \n", "O X \n", "O \n", "on move: X\n", "X X \n", "O X O \n", "O \n", "on move: O\n", "X X \n", "O X O \n", "O X \n", "on move: X\n", "X X \n", "O X O \n", "O X O \n", "on move: O\n", "X X X \n", "O X O \n", "O X O \n", "Episode 180, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O O \n", " \n", " X \n", "on move: O\n", "X O O \n", "X \n", " X \n", "on move: X\n", "X O O \n", "X O \n", " X \n", "on move: O\n", "X O O \n", "X X O \n", " X \n", "Episode 181, Total Reward: 1\n", "Average Reward: 0.3370165745856354\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", "X \n", "on move: X\n", " O \n", " X \n", "X O \n", "on move: O\n", " O X \n", " X \n", "X O \n", "Episode 182, Total Reward: 1\n", "Average Reward: 0.34065934065934067\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X X \n", " \n", "on move: X\n", " O \n", " X X \n", "O \n", "on move: O\n", " O \n", " X X \n", "O X \n", "on move: X\n", " O \n", " X X \n", "O O X \n", "on move: O\n", " O \n", "X X X \n", "O O X \n", "Episode 183, Total Reward: 1\n", "Average Reward: 0.3442622950819672\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O \n", " O X \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", "O X \n", " O \n", "X O X \n", "on move: O\n", "O X \n", "X O \n", "X O X \n", "on move: X\n", "O O X \n", "X O \n", "X O X \n", "Episode 184, Total Reward: -1\n", "Average Reward: 0.33695652173913043\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X \n", "O X \n", "on move: O\n", " O \n", "X \n", "O X X \n", "on move: X\n", " O \n", "X O \n", "O X X \n", "on move: O\n", "X O \n", "X O \n", "O X X \n", "on move: X\n", "X O O \n", "X O \n", "O X X \n", "Episode 185, Total Reward: -1\n", "Average Reward: 0.32972972972972975\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", " X \n", " X \n", "O O X \n", "on move: X\n", " X \n", " X O \n", "O O X \n", "on move: O\n", "X X \n", " X O \n", "O O X \n", "Episode 186, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " O X \n", "O X \n", " \n", "on move: O\n", " O X \n", "O X \n", " X \n", "on move: X\n", " O X \n", "O X \n", " O X \n", "on move: O\n", " O X \n", "O X X \n", " O X \n", "Episode 187, Total Reward: 1\n", "Average Reward: 0.33689839572192515\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X O \n", "X \n", "O \n", "on move: O\n", " X O \n", "X \n", "O X \n", "on move: X\n", " X O \n", "X O \n", "O X \n", "Episode 188, Total Reward: -1\n", "Average Reward: 0.32978723404255317\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", "X X \n", " X \n", "O O \n", "on move: X\n", "X X \n", "O X \n", "O O \n", "on move: O\n", "X X \n", "O X \n", "O X O \n", "on move: X\n", "X X O \n", "O X \n", "O X O \n", "on move: O\n", "X X O \n", "O X X \n", "O X O \n", "Episode 189, Total Reward: 1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", "O \n", "on move: O\n", "X X \n", " O \n", "O X \n", "on move: X\n", "X X \n", "O O \n", "O X \n", "on move: O\n", "X X \n", "O O \n", "O X X \n", "on move: X\n", "X O X \n", "O O \n", "O X X \n", "on move: O\n", "X O X \n", "O O X \n", "O X X \n", "Episode 190, Total Reward: 1\n", "Average Reward: 0.3368421052631579\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O \n", "X O \n", " X \n", "on move: O\n", "O \n", "X X O \n", " X \n", "on move: X\n", "O O \n", "X X O \n", " X \n", "on move: O\n", "O O \n", "X X O \n", " X X \n", "on move: X\n", "O O \n", "X X O \n", "O X X \n", "on move: O\n", "O X O \n", "X X O \n", "O X X \n", "Episode 191, Total Reward: 1\n", "Average Reward: 0.3403141361256545\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", "O \n", " X \n", "X O \n", "on move: O\n", "O \n", " X \n", "X X O \n", "on move: X\n", "O O \n", " X \n", "X X O \n", "on move: O\n", "O O \n", "X X \n", "X X O \n", "on move: X\n", "O O O \n", "X X \n", "X X O \n", "Episode 192, Total Reward: -1\n", "Average Reward: 0.3333333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O \n", "X \n", " X O \n", "on move: O\n", "O X \n", "X \n", " X O \n", "on move: X\n", "O X \n", "X O \n", " X O \n", "Episode 193, Total Reward: -1\n", "Average Reward: 0.32642487046632124\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X O \n", "O \n", "on move: O\n", " X X \n", "X O \n", "O \n", "on move: X\n", " X X \n", "X O O \n", "O \n", "on move: O\n", "X X X \n", "X O O \n", "O \n", "Episode 194, Total Reward: 1\n", "Average Reward: 0.32989690721649484\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", " X \n", "O O \n", "X \n", "on move: O\n", " X X \n", "O O \n", "X \n", "on move: X\n", "O X X \n", "O O \n", "X \n", "on move: O\n", "O X X \n", "O O X \n", "X \n", "on move: X\n", "O X X \n", "O O X \n", "X O \n", "Episode 195, Total Reward: -1\n", "Average Reward: 0.3230769230769231\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O \n", " X O \n", "on move: O\n", " X \n", " O \n", "X X O \n", "on move: X\n", " X \n", " O O \n", "X X O \n", "on move: O\n", "X X \n", " O O \n", "X X O \n", "on move: X\n", "X X O \n", " O O \n", "X X O \n", "Episode 196, Total Reward: -1\n", "Average Reward: 0.3163265306122449\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " \n", "X X \n", "on move: X\n", "O O \n", " \n", "X X \n", "on move: O\n", "O O \n", "X \n", "X X \n", "on move: X\n", "O O O \n", "X \n", "X X \n", "Episode 197, Total Reward: -1\n", "Average Reward: 0.3096446700507614\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " O X \n", " \n", " \n", "on move: O\n", " O X \n", " X \n", " \n", "on move: X\n", " O X \n", " X \n", " O \n", "on move: O\n", " O X \n", " X \n", "X O \n", "on move: X\n", " O X \n", " O X \n", "X O \n", "Episode 198, Total Reward: -1\n", "Average Reward: 0.30303030303030304\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O X \n", " \n", " \n", "on move: X\n", "X O X \n", " \n", " O \n", "on move: O\n", "X O X \n", " \n", " X O \n", "on move: X\n", "X O X \n", "O \n", " X O \n", "on move: O\n", "X O X \n", "O X \n", " X O \n", "on move: X\n", "X O X \n", "O X \n", "O X O \n", "on move: O\n", "X O X \n", "O X X \n", "O X O \n", "Episode 199, Total Reward: 0\n", "Average Reward: 0.3015075376884422\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X \n", "O X O \n", " X \n", "Episode 200, Total Reward: 1\n", "Average Reward: 0.305\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X \n", " O \n", "X O \n", "on move: X\n", "X O X \n", " O \n", "X O \n", "on move: O\n", "X O X \n", " O X \n", "X O \n", "on move: X\n", "X O X \n", "O O X \n", "X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 201, Total Reward: 0\n", "Average Reward: 0.3034825870646766\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", "O \n", "on move: O\n", "X \n", "X \n", "O \n", "on move: X\n", "X \n", "X \n", "O O \n", "on move: O\n", "X X \n", "X \n", "O O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X \n", "X O \n", "O X O \n", "on move: X\n", "X X O \n", "X O \n", "O X O \n", "Episode 202, Total Reward: -1\n", "Average Reward: 0.297029702970297\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " O \n", " X O \n", "X X \n", "on move: X\n", " O O \n", " X O \n", "X X \n", "on move: O\n", " O O \n", " X O \n", "X X X \n", "Episode 203, Total Reward: 1\n", "Average Reward: 0.30049261083743845\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X \n", "X O O \n", " \n", "on move: O\n", "X X \n", "X O O \n", " \n", "on move: X\n", "X X \n", "X O O \n", " O \n", "on move: O\n", "X X \n", "X O O \n", "X O \n", "Episode 204, Total Reward: 1\n", "Average Reward: 0.30392156862745096\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", " X \n", "X X \n", "O O \n", "on move: X\n", " O X \n", "X X \n", "O O \n", "on move: O\n", " O X \n", "X X X \n", "O O \n", "Episode 205, Total Reward: 1\n", "Average Reward: 0.3073170731707317\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X O \n", " \n", " \n", "on move: O\n", " X O \n", " \n", "X \n", "on move: X\n", " X O \n", " O \n", "X \n", "on move: O\n", " X O \n", "X O \n", "X \n", "on move: X\n", "O X O \n", "X O \n", "X \n", "on move: O\n", "O X O \n", "X O \n", "X X \n", "on move: X\n", "O X O \n", "X O O \n", "X X \n", "on move: O\n", "O X O \n", "X O O \n", "X X X \n", "Episode 206, Total Reward: 1\n", "Average Reward: 0.3106796116504854\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " \n", "X X \n", "on move: X\n", "O \n", "O \n", "X X \n", "on move: O\n", "O \n", "O \n", "X X X \n", "Episode 207, Total Reward: 1\n", "Average Reward: 0.3140096618357488\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", "O \n", "X \n", "X O \n", "on move: O\n", "O X \n", "X \n", "X O \n", "on move: X\n", "O O X \n", "X \n", "X O \n", "on move: O\n", "O O X \n", "X \n", "X X O \n", "on move: X\n", "O O X \n", "X O \n", "X X O \n", "Episode 208, Total Reward: -1\n", "Average Reward: 0.3076923076923077\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " X \n", " \n", "on move: X\n", "X O \n", " X \n", " O \n", "on move: O\n", "X O \n", " X X \n", " O \n", "on move: X\n", "X O \n", "O X X \n", " O \n", "on move: O\n", "X O X \n", "O X X \n", " O \n", "on move: X\n", "X O X \n", "O X X \n", "O O \n", "on move: O\n", "X O X \n", "O X X \n", "O O X \n", "Episode 209, Total Reward: 1\n", "Average Reward: 0.31100478468899523\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " X \n", "O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X \n", "O O X \n", " X \n", "on move: X\n", "O X \n", "O O X \n", " X \n", "on move: O\n", "O X \n", "O O X \n", "X X \n", "on move: X\n", "O X \n", "O O X \n", "X X O \n", "Episode 210, Total Reward: -1\n", "Average Reward: 0.3047619047619048\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", " X X \n", " X \n", "O O \n", "on move: X\n", "O X X \n", " X \n", "O O \n", "on move: O\n", "O X X \n", "X X \n", "O O \n", "on move: X\n", "O X X \n", "X X \n", "O O O \n", "Episode 211, Total Reward: -1\n", "Average Reward: 0.2985781990521327\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " \n", "X \n", "on move: X\n", "O X \n", " O \n", "X \n", "on move: O\n", "O X \n", " O \n", "X X \n", "on move: X\n", "O X \n", " O O \n", "X X \n", "on move: O\n", "O X \n", " O O \n", "X X X \n", "Episode 212, Total Reward: 1\n", "Average Reward: 0.3018867924528302\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " O X \n", "X \n", " O \n", "on move: O\n", " O X \n", "X \n", " O X \n", "on move: X\n", " O X \n", "X O \n", " O X \n", "on move: O\n", "X O X \n", "X O \n", " O X \n", "on move: X\n", "X O X \n", "X O O \n", " O X \n", "Episode 213, Total Reward: -1\n", "Average Reward: 0.29577464788732394\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", " X X \n", "on move: X\n", " O \n", " O \n", " X X \n", "on move: O\n", "X O \n", " O \n", " X X \n", "on move: X\n", "X O \n", " O \n", "O X X \n", "on move: O\n", "X O \n", "X O \n", "O X X \n", "on move: X\n", "X O O \n", "X O \n", "O X X \n", "Episode 214, Total Reward: -1\n", "Average Reward: 0.2897196261682243\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", " X \n", " O \n", "X O \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", " X \n", "O O \n", "X O X \n", "on move: O\n", " X X \n", "O O \n", "X O X \n", "on move: X\n", " X X \n", "O O O \n", "X O X \n", "Episode 215, Total Reward: -1\n", "Average Reward: 0.2837209302325581\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", "X \n", " O \n", "X \n", "on move: X\n", "X O \n", " O \n", "X \n", "on move: O\n", "X O \n", " O \n", "X X \n", "on move: X\n", "X O \n", " O \n", "X X O \n", "on move: O\n", "X O \n", " O X \n", "X X O \n", "on move: X\n", "X O O \n", " O X \n", "X X O \n", "on move: O\n", "X O O \n", "X O X \n", "X X O \n", "Episode 216, Total Reward: 1\n", "Average Reward: 0.28703703703703703\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", "X \n", " \n", "X O \n", "on move: X\n", "X \n", "O \n", "X O \n", "on move: O\n", "X \n", "O X \n", "X O \n", "on move: X\n", "X \n", "O X \n", "X O O \n", "on move: O\n", "X X \n", "O X \n", "X O O \n", "on move: X\n", "X X \n", "O X O \n", "X O O \n", "on move: O\n", "X X X \n", "O X O \n", "X O O \n", "Episode 217, Total Reward: 1\n", "Average Reward: 0.2903225806451613\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X X \n", " O \n", "on move: X\n", " \n", "O X X \n", " O \n", "on move: O\n", " \n", "O X X \n", "X O \n", "on move: X\n", "O \n", "O X X \n", "X O \n", "on move: O\n", "O \n", "O X X \n", "X O X \n", "on move: X\n", "O O \n", "O X X \n", "X O X \n", "on move: O\n", "O X O \n", "O X X \n", "X O X \n", "Episode 218, Total Reward: 0\n", "Average Reward: 0.2889908256880734\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", " O X \n", "on move: X\n", " X \n", "O \n", " O X \n", "on move: O\n", " X \n", "O X \n", " O X \n", "on move: X\n", " X \n", "O X \n", "O O X \n", "on move: O\n", " X \n", "O X X \n", "O O X \n", "on move: X\n", "O X \n", "O X X \n", "O O X \n", "Episode 219, Total Reward: -1\n", "Average Reward: 0.2831050228310502\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X \n", "O \n", " X \n", "on move: X\n", " X \n", "O \n", "O X \n", "on move: O\n", "X X \n", "O \n", "O X \n", "on move: X\n", "X X \n", "O \n", "O X O \n", "on move: O\n", "X X \n", "O X \n", "O X O \n", "on move: X\n", "X O X \n", "O X \n", "O X O \n", "on move: O\n", "X O X \n", "O X X \n", "O X O \n", "Episode 220, Total Reward: 0\n", "Average Reward: 0.2818181818181818\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " \n", " X X \n", "on move: X\n", "O \n", " O \n", " X X \n", "on move: O\n", "O X \n", " O \n", " X X \n", "on move: X\n", "O X \n", "O O \n", " X X \n", "on move: O\n", "O X \n", "O X O \n", " X X \n", "on move: X\n", "O X \n", "O X O \n", "O X X \n", "Episode 221, Total Reward: -1\n", "Average Reward: 0.27601809954751133\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", " \n", "O O \n", "X X \n", "on move: O\n", "X \n", "O O \n", "X X \n", "on move: X\n", "X \n", "O O \n", "X X O \n", "on move: O\n", "X X \n", "O O \n", "X X O \n", "on move: X\n", "X X \n", "O O O \n", "X X O \n", "Episode 222, Total Reward: -1\n", "Average Reward: 0.2702702702702703\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X \n", "O O \n", " \n", "on move: O\n", "X X \n", "O O \n", " X \n", "on move: X\n", "X O X \n", "O O \n", " X \n", "on move: O\n", "X O X \n", "O X O \n", " X \n", "on move: X\n", "X O X \n", "O X O \n", " X O \n", "on move: O\n", "X O X \n", "O X O \n", "X X O \n", "Episode 223, Total Reward: 1\n", "Average Reward: 0.273542600896861\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", " X \n", " X \n", "O X O \n", "on move: X\n", " X \n", " O X \n", "O X O \n", "on move: O\n", " X \n", "X O X \n", "O X O \n", "on move: X\n", " O X \n", "X O X \n", "O X O \n", "on move: O\n", "X O X \n", "X O X \n", "O X O \n", "Episode 224, Total Reward: 0\n", "Average Reward: 0.27232142857142855\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " X O \n", "X \n", "on move: X\n", " \n", " X O \n", "X O \n", "on move: O\n", " \n", " X O \n", "X O X \n", "on move: X\n", "O \n", " X O \n", "X O X \n", "on move: O\n", "O \n", "X X O \n", "X O X \n", "on move: X\n", "O O \n", "X X O \n", "X O X \n", "on move: O\n", "O O X \n", "X X O \n", "X O X \n", "Episode 225, Total Reward: 1\n", "Average Reward: 0.27555555555555555\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", "X X \n", " O \n", "on move: X\n", " \n", "X X \n", "O O \n", "on move: O\n", " \n", "X X X \n", "O O \n", "Episode 226, Total Reward: 1\n", "Average Reward: 0.27876106194690264\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", " \n", " O X \n", "X \n", "on move: X\n", " \n", "O O X \n", "X \n", "on move: O\n", " X \n", "O O X \n", "X \n", "on move: X\n", " X \n", "O O X \n", "X O \n", "on move: O\n", "X X \n", "O O X \n", "X O \n", "on move: X\n", "X O X \n", "O O X \n", "X O \n", "Episode 227, Total Reward: -1\n", "Average Reward: 0.27312775330396477\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", "X \n", "X \n", "on move: X\n", " O \n", "X \n", "X O \n", "on move: O\n", " O \n", "X X \n", "X O \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O \n", "X X X \n", "X O \n", "Episode 228, Total Reward: 1\n", "Average Reward: 0.27631578947368424\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O \n", "O X \n", "on move: O\n", " X \n", " O X \n", "O X \n", "on move: X\n", " X \n", "O O X \n", "O X \n", "on move: O\n", " X \n", "O O X \n", "O X X \n", "on move: X\n", " X O \n", "O O X \n", "O X X \n", "Episode 229, Total Reward: -1\n", "Average Reward: 0.27074235807860264\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X \n", "X \n", "on move: X\n", "O \n", " X \n", "X O \n", "on move: O\n", "O \n", "X X \n", "X O \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O \n", "X X \n", "X X O \n", "on move: X\n", "O O \n", "X X O \n", "X X O \n", "Episode 230, Total Reward: -1\n", "Average Reward: 0.26521739130434785\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " O X \n", " \n", " \n", "on move: O\n", " O X \n", " \n", " X \n", "on move: X\n", " O X \n", " O \n", " X \n", "on move: O\n", " O X \n", "X O \n", " X \n", "on move: X\n", "O O X \n", "X O \n", " X \n", "on move: O\n", "O O X \n", "X O \n", " X X \n", "on move: X\n", "O O X \n", "X O O \n", " X X \n", "on move: O\n", "O O X \n", "X O O \n", "X X X \n", "Episode 231, Total Reward: 1\n", "Average Reward: 0.2683982683982684\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", " X \n", " X \n", "O O X \n", "on move: X\n", " X \n", " X O \n", "O O X \n", "on move: O\n", " X \n", "X X O \n", "O O X \n", "on move: X\n", " O X \n", "X X O \n", "O O X \n", "on move: O\n", "X O X \n", "X X O \n", "O O X \n", "Episode 232, Total Reward: 1\n", "Average Reward: 0.27155172413793105\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " O X \n", " \n", "X O \n", "on move: O\n", " O X \n", " X \n", "X O \n", "Episode 233, Total Reward: 1\n", "Average Reward: 0.27467811158798283\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X \n", "O \n", " O \n", "on move: O\n", "X X \n", "O X \n", " O \n", "on move: X\n", "X X \n", "O X \n", "O O \n", "on move: O\n", "X X X \n", "O X \n", "O O \n", "Episode 234, Total Reward: 1\n", "Average Reward: 0.2777777777777778\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O O \n", "X X \n", " \n", "on move: O\n", " O O \n", "X X \n", " X \n", "on move: X\n", " O O \n", "X X \n", "O X \n", "on move: O\n", " O O \n", "X X \n", "O X X \n", "on move: X\n", " O O \n", "X X O \n", "O X X \n", "on move: O\n", "X O O \n", "X X O \n", "O X X \n", "Episode 235, Total Reward: 1\n", "Average Reward: 0.28085106382978725\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O \n", "X O \n", " X \n", "on move: O\n", "O \n", "X O X \n", " X \n", "on move: X\n", "O O \n", "X O X \n", " X \n", "on move: O\n", "O O X \n", "X O X \n", " X \n", "on move: X\n", "O O X \n", "X O X \n", " X O \n", "Episode 236, Total Reward: -1\n", "Average Reward: 0.2754237288135593\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X X \n", " \n", "on move: X\n", "O O \n", "X X \n", " \n", "on move: O\n", "O X O \n", "X X \n", " \n", "on move: X\n", "O X O \n", "X X \n", "O \n", "on move: O\n", "O X O \n", "X X \n", "O X \n", "on move: X\n", "O X O \n", "X X \n", "O X O \n", "on move: O\n", "O X O \n", "X X X \n", "O X O \n", "Episode 237, Total Reward: 1\n", "Average Reward: 0.27848101265822783\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", "O O \n", "X \n", " X \n", "on move: O\n", "O O \n", "X X \n", " X \n", "on move: X\n", "O O \n", "X X \n", "O X \n", "on move: O\n", "O O \n", "X X \n", "O X X \n", "on move: X\n", "O O \n", "X O X \n", "O X X \n", "Episode 238, Total Reward: -1\n", "Average Reward: 0.27310924369747897\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", "X \n", "on move: X\n", " O \n", "O X \n", "X \n", "on move: O\n", " O \n", "O X X \n", "X \n", "on move: X\n", " O O \n", "O X X \n", "X \n", "on move: O\n", "X O O \n", "O X X \n", "X \n", "on move: X\n", "X O O \n", "O X X \n", "X O \n", "on move: O\n", "X O O \n", "O X X \n", "X O X \n", "Episode 239, Total Reward: 1\n", "Average Reward: 0.27615062761506276\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X X \n", " \n", "O \n", "on move: X\n", " X X \n", " O \n", "O \n", "on move: O\n", " X X \n", "X O \n", "O \n", "on move: X\n", "O X X \n", "X O \n", "O \n", "on move: O\n", "O X X \n", "X O \n", "O X \n", "on move: X\n", "O X X \n", "X O \n", "O O X \n", "on move: O\n", "O X X \n", "X O X \n", "O O X \n", "Episode 240, Total Reward: 1\n", "Average Reward: 0.2791666666666667\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", "O X \n", " O \n", "X \n", "on move: O\n", "O X \n", " O X \n", "X \n", "on move: X\n", "O X \n", "O O X \n", "X \n", "on move: O\n", "O X \n", "O O X \n", "X X \n", "on move: X\n", "O X O \n", "O O X \n", "X X \n", "on move: O\n", "O X O \n", "O O X \n", "X X X \n", "Episode 241, Total Reward: 1\n", "Average Reward: 0.2821576763485477\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " O X \n", "on move: O\n", " \n", " \n", "X O X \n", "on move: X\n", " \n", " O \n", "X O X \n", "on move: O\n", " X \n", " O \n", "X O X \n", "on move: X\n", " X \n", " O O \n", "X O X \n", "on move: O\n", " X X \n", " O O \n", "X O X \n", "on move: X\n", " X X \n", "O O O \n", "X O X \n", "Episode 242, Total Reward: -1\n", "Average Reward: 0.2768595041322314\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", "X \n", "O X \n", " \n", "on move: X\n", "X \n", "O O X \n", " \n", "on move: O\n", "X \n", "O O X \n", " X \n", "on move: X\n", "X O \n", "O O X \n", " X \n", "on move: O\n", "X O \n", "O O X \n", " X X \n", "on move: X\n", "X O \n", "O O X \n", "O X X \n", "Episode 243, Total Reward: -1\n", "Average Reward: 0.2716049382716049\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " O \n", "X \n", "on move: X\n", "X \n", "O O \n", "X \n", "on move: O\n", "X X \n", "O O \n", "X \n", "on move: X\n", "X X \n", "O O \n", "X O \n", "on move: O\n", "X X \n", "O X O \n", "X O \n", "on move: X\n", "X X \n", "O X O \n", "X O O \n", "on move: O\n", "X X X \n", "O X O \n", "X O O \n", "Episode 244, Total Reward: 1\n", "Average Reward: 0.27459016393442626\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", " X O \n", " X \n", "on move: O\n", "O X \n", " X O \n", " X \n", "on move: X\n", "O X \n", "O X O \n", " X \n", "on move: O\n", "O X X \n", "O X O \n", " X \n", "on move: X\n", "O X X \n", "O X O \n", "O X \n", "Episode 245, Total Reward: -1\n", "Average Reward: 0.2693877551020408\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " \n", " X \n", "X O O \n", "on move: O\n", " X \n", " X \n", "X O O \n", "on move: X\n", " X \n", " X O \n", "X O O \n", "on move: O\n", " X X \n", " X O \n", "X O O \n", "Episode 246, Total Reward: 1\n", "Average Reward: 0.27235772357723576\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X \n", "X O \n", " O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X \n", "X O X \n", "O O \n", "on move: X\n", "X X \n", "X O X \n", "O O O \n", "Episode 247, Total Reward: -1\n", "Average Reward: 0.26720647773279355\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " X \n", "O X \n", " \n", "on move: X\n", " X \n", "O X \n", "O \n", "on move: O\n", "X X \n", "O X \n", "O \n", "on move: X\n", "X X \n", "O O X \n", "O \n", "on move: O\n", "X X \n", "O O X \n", "O X \n", "on move: X\n", "X X \n", "O O X \n", "O X O \n", "on move: O\n", "X X X \n", "O O X \n", "O X O \n", "Episode 248, Total Reward: 1\n", "Average Reward: 0.2701612903225806\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X X \n", " \n", "on move: X\n", "O O \n", " X X \n", " \n", "on move: O\n", "O O \n", "X X X \n", " \n", "Episode 249, Total Reward: 1\n", "Average Reward: 0.27309236947791166\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " \n", " \n", "X X O \n", "on move: X\n", " O \n", " \n", "X X O \n", "on move: O\n", " O \n", " X \n", "X X O \n", "on move: X\n", " O \n", " X O \n", "X X O \n", "Episode 250, Total Reward: -1\n", "Average Reward: 0.268\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " X \n", "O \n", "X \n", "on move: X\n", " X \n", "O O \n", "X \n", "on move: O\n", "X X \n", "O O \n", "X \n", "on move: X\n", "X X \n", "O O \n", "X O \n", "on move: O\n", "X X \n", "O O \n", "X X O \n", "on move: X\n", "X X O \n", "O O \n", "X X O \n", "on move: O\n", "X X O \n", "O O X \n", "X X O \n", "Episode 251, Total Reward: 0\n", "Average Reward: 0.26693227091633465\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " \n", "X X O \n", "on move: X\n", " O \n", " \n", "X X O \n", "on move: O\n", " O \n", "X \n", "X X O \n", "on move: X\n", " O \n", "X O \n", "X X O \n", "on move: O\n", "X O \n", "X O \n", "X X O \n", "Episode 252, Total Reward: 1\n", "Average Reward: 0.2698412698412698\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " \n", "O O \n", "on move: O\n", "X X \n", " \n", "O O X \n", "on move: X\n", "X O X \n", " \n", "O O X \n", "on move: O\n", "X O X \n", " X \n", "O O X \n", "Episode 253, Total Reward: 1\n", "Average Reward: 0.2727272727272727\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X X \n", " \n", " O \n", "on move: X\n", " X X \n", "O \n", " O \n", "on move: O\n", "X X X \n", "O \n", " O \n", "Episode 254, Total Reward: 1\n", "Average Reward: 0.2755905511811024\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X O \n", " O X \n", " X \n", "on move: X\n", "X O O \n", " O X \n", " X \n", "on move: O\n", "X O O \n", "X O X \n", " X \n", "on move: X\n", "X O O \n", "X O X \n", "O X \n", "Episode 255, Total Reward: -1\n", "Average Reward: 0.27058823529411763\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X X \n", " O \n", " \n", "on move: X\n", "O X X \n", " O \n", " \n", "on move: O\n", "O X X \n", " O \n", "X \n", "on move: X\n", "O X X \n", " O \n", "X O \n", "on move: O\n", "O X X \n", "X O \n", "X O \n", "on move: X\n", "O X X \n", "X O \n", "X O O \n", "on move: O\n", "O X X \n", "X X O \n", "X O O \n", "Episode 256, Total Reward: 1\n", "Average Reward: 0.2734375\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X \n", " O \n", " X O \n", "on move: X\n", "X X \n", " O O \n", " X O \n", "on move: O\n", "X X \n", "X O O \n", " X O \n", "on move: X\n", "X O X \n", "X O O \n", " X O \n", "on move: O\n", "X O X \n", "X O O \n", "X X O \n", "Episode 257, Total Reward: 1\n", "Average Reward: 0.27626459143968873\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " \n", " X \n", " X O \n", "on move: X\n", " O \n", " X \n", " X O \n", "on move: O\n", " O \n", "X X \n", " X O \n", "on move: X\n", " O \n", "X X O \n", " X O \n", "on move: O\n", "X O \n", "X X O \n", " X O \n", "on move: X\n", "X O \n", "X X O \n", "O X O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 258, Total Reward: 0\n", "Average Reward: 0.2751937984496124\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O \n", " O \n", "on move: O\n", "X X \n", "X O \n", " O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X X \n", "X O \n", "O O \n", "Episode 259, Total Reward: 1\n", "Average Reward: 0.277992277992278\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X X \n", " \n", " O \n", "on move: X\n", " X X \n", " O \n", " O \n", "on move: O\n", "X X X \n", " O \n", " O \n", "Episode 260, Total Reward: 1\n", "Average Reward: 0.28076923076923077\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", "X \n", " \n", "on move: X\n", "O O X \n", "X \n", " \n", "on move: O\n", "O O X \n", "X \n", "X \n", "on move: X\n", "O O X \n", "X O \n", "X \n", "on move: O\n", "O O X \n", "X O \n", "X X \n", "on move: X\n", "O O X \n", "X O O \n", "X X \n", "on move: O\n", "O O X \n", "X O O \n", "X X X \n", "Episode 261, Total Reward: 1\n", "Average Reward: 0.2835249042145594\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", "O O \n", " \n", "on move: O\n", "X X \n", "O O \n", "X \n", "on move: X\n", "X X \n", "O O \n", "X O \n", "on move: O\n", "X X \n", "O O \n", "X O X \n", "on move: X\n", "X O X \n", "O O \n", "X O X \n", "on move: O\n", "X O X \n", "O X O \n", "X O X \n", "Episode 262, Total Reward: 1\n", "Average Reward: 0.2862595419847328\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X X \n", " O \n", "on move: X\n", "O \n", "X X \n", " O \n", "on move: O\n", "O X \n", "X X \n", " O \n", "on move: X\n", "O X \n", "X X O \n", " O \n", "on move: O\n", "O X \n", "X X O \n", "X O \n", "on move: X\n", "O X O \n", "X X O \n", "X O \n", "on move: O\n", "O X O \n", "X X O \n", "X O X \n", "Episode 263, Total Reward: 0\n", "Average Reward: 0.28517110266159695\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", "O \n", "X O \n", "on move: O\n", " X \n", "O \n", "X O X \n", "on move: X\n", "O X \n", "O \n", "X O X \n", "on move: O\n", "O X \n", "O X \n", "X O X \n", "Episode 264, Total Reward: 1\n", "Average Reward: 0.2878787878787879\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X O X \n", "O \n", " \n", "on move: O\n", "X O X \n", "O \n", " X \n", "on move: X\n", "X O X \n", "O O \n", " X \n", "on move: O\n", "X O X \n", "O O \n", " X X \n", "on move: X\n", "X O X \n", "O O \n", "O X X \n", "on move: O\n", "X O X \n", "O O X \n", "O X X \n", "Episode 265, Total Reward: 1\n", "Average Reward: 0.29056603773584905\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", " X O \n", " X \n", "on move: O\n", "O X \n", " X O \n", " X \n", "on move: X\n", "O X O \n", " X O \n", " X \n", "on move: O\n", "O X O \n", " X O \n", " X X \n", "Episode 266, Total Reward: 1\n", "Average Reward: 0.2932330827067669\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " X \n", "O \n", " X \n", "on move: X\n", " X \n", "O \n", "O X \n", "on move: O\n", " X \n", "O X \n", "O X \n", "on move: X\n", "O X \n", "O X \n", "O X \n", "Episode 267, Total Reward: -1\n", "Average Reward: 0.2883895131086142\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " \n", "X X \n", "on move: X\n", "O O \n", " \n", "X X \n", "on move: O\n", "O O \n", " X \n", "X X \n", "on move: X\n", "O O O \n", " X \n", "X X \n", "Episode 268, Total Reward: -1\n", "Average Reward: 0.2835820895522388\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O O \n", " X \n", " X \n", "on move: O\n", " O O \n", " X \n", " X X \n", "on move: X\n", "O O O \n", " X \n", " X X \n", "Episode 269, Total Reward: -1\n", "Average Reward: 0.2788104089219331\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " O \n", " X \n", "X O \n", "on move: O\n", " X O \n", " X \n", "X O \n", "on move: X\n", " X O \n", " X O \n", "X O \n", "on move: O\n", " X O \n", " X O \n", "X O X \n", "on move: X\n", "O X O \n", " X O \n", "X O X \n", "on move: O\n", "O X O \n", "X X O \n", "X O X \n", "Episode 270, Total Reward: 0\n", "Average Reward: 0.2777777777777778\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X O \n", " \n", " \n", "on move: O\n", " X O \n", "X \n", " \n", "on move: X\n", " X O \n", "X O \n", " \n", "on move: O\n", " X O \n", "X O \n", " X \n", "on move: X\n", " X O \n", "X O \n", "O X \n", "Episode 271, Total Reward: -1\n", "Average Reward: 0.2730627306273063\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X \n", "O O \n", "on move: O\n", "X X \n", "X \n", "O O \n", "on move: X\n", "X X \n", "X O \n", "O O \n", "on move: O\n", "X X \n", "X O X \n", "O O \n", "on move: X\n", "X X \n", "X O X \n", "O O O \n", "Episode 272, Total Reward: -1\n", "Average Reward: 0.26838235294117646\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", "O \n", "on move: O\n", "X X \n", " \n", "O \n", "on move: X\n", "X X \n", "O \n", "O \n", "on move: O\n", "X X X \n", "O \n", "O \n", "Episode 273, Total Reward: 1\n", "Average Reward: 0.27106227106227104\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O \n", "O \n", "on move: O\n", " X X \n", "X O \n", "O \n", "on move: X\n", "O X X \n", "X O \n", "O \n", "on move: O\n", "O X X \n", "X O \n", "O X \n", "on move: X\n", "O X X \n", "X O \n", "O O X \n", "on move: O\n", "O X X \n", "X O X \n", "O O X \n", "Episode 274, Total Reward: 1\n", "Average Reward: 0.2737226277372263\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", "X \n", " \n", "X O \n", "on move: X\n", "X \n", "O \n", "X O \n", "on move: O\n", "X X \n", "O \n", "X O \n", "on move: X\n", "X X \n", "O \n", "X O O \n", "on move: O\n", "X X \n", "O X \n", "X O O \n", "on move: X\n", "X X O \n", "O X \n", "X O O \n", "on move: O\n", "X X O \n", "O X X \n", "X O O \n", "Episode 275, Total Reward: 0\n", "Average Reward: 0.2727272727272727\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", "X \n", "X \n", "O \n", "on move: X\n", "X \n", "X \n", "O O \n", "on move: O\n", "X \n", "X X \n", "O O \n", "on move: X\n", "X O \n", "X X \n", "O O \n", "on move: O\n", "X O \n", "X X X \n", "O O \n", "Episode 276, Total Reward: 1\n", "Average Reward: 0.2753623188405797\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X O \n", "X O \n", " \n", "on move: O\n", "X O \n", "X O \n", " X \n", "on move: X\n", "X O O \n", "X O \n", " X \n", "on move: O\n", "X O O \n", "X O X \n", " X \n", "on move: X\n", "X O O \n", "X O X \n", "O X \n", "Episode 277, Total Reward: -1\n", "Average Reward: 0.27075812274368233\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O \n", "X X \n", "O \n", "on move: O\n", " O \n", "X X \n", "O X \n", "on move: X\n", " O \n", "X X O \n", "O X \n", "on move: O\n", "X O \n", "X X O \n", "O X \n", "Episode 278, Total Reward: 1\n", "Average Reward: 0.2733812949640288\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " X \n", "O \n", "on move: O\n", "O X \n", " X \n", "O X \n", "Episode 279, Total Reward: 1\n", "Average Reward: 0.27598566308243727\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", "O O \n", "X X \n", " \n", "on move: O\n", "O O \n", "X X \n", " X \n", "on move: X\n", "O O \n", "X X \n", " O X \n", "on move: O\n", "O O \n", "X X X \n", " O X \n", "Episode 280, Total Reward: 1\n", "Average Reward: 0.2785714285714286\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", " O \n", "O X \n", "on move: O\n", " X X \n", " O \n", "O X \n", "on move: X\n", "O X X \n", " O \n", "O X \n", "on move: O\n", "O X X \n", " O \n", "O X X \n", "on move: X\n", "O X X \n", "O O \n", "O X X \n", "Episode 281, Total Reward: -1\n", "Average Reward: 0.27402135231316727\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", " \n", " O O \n", "on move: O\n", "X X \n", " X \n", " O O \n", "on move: X\n", "X X \n", " O X \n", " O O \n", "on move: O\n", "X X \n", "X O X \n", " O O \n", "on move: X\n", "X O X \n", "X O X \n", " O O \n", "Episode 282, Total Reward: -1\n", "Average Reward: 0.2695035460992908\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O \n", "O \n", " X \n", "on move: O\n", "X O \n", "O X \n", " X \n", "on move: X\n", "X O \n", "O X \n", "O X \n", "on move: O\n", "X O \n", "O X X \n", "O X \n", "Episode 283, Total Reward: 1\n", "Average Reward: 0.27208480565371024\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", "X O \n", " \n", " X \n", "on move: X\n", "X O O \n", " \n", " X \n", "on move: O\n", "X O O \n", "X \n", " X \n", "on move: X\n", "X O O \n", "X \n", " X O \n", "on move: O\n", "X O O \n", "X X \n", " X O \n", "on move: X\n", "X O O \n", "X X O \n", " X O \n", "Episode 284, Total Reward: -1\n", "Average Reward: 0.2676056338028169\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", " X \n", "O X \n", "on move: O\n", "O \n", "X X \n", "O X \n", "on move: X\n", "O O \n", "X X \n", "O X \n", "on move: O\n", "O O \n", "X X \n", "O X X \n", "on move: X\n", "O O \n", "X O X \n", "O X X \n", "on move: O\n", "O O X \n", "X O X \n", "O X X \n", "Episode 285, Total Reward: 1\n", "Average Reward: 0.27017543859649124\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " X \n", "X \n", "on move: X\n", "O \n", " X \n", "X O \n", "on move: O\n", "O \n", "X X \n", "X O \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O \n", "X X \n", "X O X \n", "on move: X\n", "O O \n", "X O X \n", "X O X \n", "on move: O\n", "O X O \n", "X O X \n", "X O X \n", "Episode 286, Total Reward: 0\n", "Average Reward: 0.2692307692307692\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " \n", " O O \n", "X X \n", "on move: O\n", " \n", " O O \n", "X X X \n", "Episode 287, Total Reward: 1\n", "Average Reward: 0.27177700348432055\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " \n", " X \n", "O X \n", "on move: X\n", " \n", " X O \n", "O X \n", "on move: O\n", " \n", "X X O \n", "O X \n", "on move: X\n", " O \n", "X X O \n", "O X \n", "on move: O\n", " O \n", "X X O \n", "O X X \n", "on move: X\n", "O O \n", "X X O \n", "O X X \n", "on move: O\n", "O O X \n", "X X O \n", "O X X \n", "Episode 288, Total Reward: 0\n", "Average Reward: 0.2708333333333333\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " O X \n", "on move: O\n", " X \n", " \n", " O X \n", "on move: X\n", " X \n", "O \n", " O X \n", "on move: O\n", " X \n", "O \n", "X O X \n", "on move: X\n", " X \n", "O O \n", "X O X \n", "on move: O\n", "X X \n", "O O \n", "X O X \n", "on move: X\n", "X X \n", "O O O \n", "X O X \n", "Episode 289, Total Reward: -1\n", "Average Reward: 0.2664359861591695\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", " X \n", "O O \n", "on move: O\n", "X X \n", " X \n", "O O \n", "on move: X\n", "X X \n", "O X \n", "O O \n", "on move: O\n", "X X \n", "O X X \n", "O O \n", "on move: X\n", "X X O \n", "O X X \n", "O O \n", "on move: O\n", "X X O \n", "O X X \n", "O O X \n", "Episode 290, Total Reward: 1\n", "Average Reward: 0.2689655172413793\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " X \n", " O \n", "on move: X\n", " X \n", "O X \n", " O \n", "on move: O\n", "X X \n", "O X \n", " O \n", "on move: X\n", "X O X \n", "O X \n", " O \n", "on move: O\n", "X O X \n", "O X X \n", " O \n", "on move: X\n", "X O X \n", "O X X \n", " O O \n", "on move: O\n", "X O X \n", "O X X \n", "X O O \n", "Episode 291, Total Reward: 1\n", "Average Reward: 0.27147766323024053\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " O X \n", " \n", " \n", "on move: O\n", "X O X \n", " \n", " \n", "on move: X\n", "X O X \n", "O \n", " \n", "on move: O\n", "X O X \n", "O \n", "X \n", "on move: X\n", "X O X \n", "O \n", "X O \n", "on move: O\n", "X O X \n", "O X \n", "X O \n", "on move: X\n", "X O X \n", "O X \n", "X O O \n", "on move: O\n", "X O X \n", "O X X \n", "X O O \n", "Episode 292, Total Reward: 1\n", "Average Reward: 0.273972602739726\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", " X X \n", "O \n", "on move: X\n", " \n", " X X \n", "O O \n", "on move: O\n", " X \n", " X X \n", "O O \n", "on move: X\n", " O X \n", " X X \n", "O O \n", "on move: O\n", "X O X \n", " X X \n", "O O \n", "on move: X\n", "X O X \n", " X X \n", "O O O \n", "Episode 293, Total Reward: -1\n", "Average Reward: 0.2696245733788396\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X X \n", "O O \n", " X \n", "on move: X\n", " X X \n", "O O \n", " O X \n", "on move: O\n", "X X X \n", "O O \n", " O X \n", "Episode 294, Total Reward: 1\n", "Average Reward: 0.272108843537415\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X O \n", " \n", "on move: O\n", " X \n", " X O \n", " \n", "on move: X\n", " O X \n", " X O \n", " \n", "on move: O\n", " O X \n", " X O \n", "X \n", "Episode 295, Total Reward: 1\n", "Average Reward: 0.2745762711864407\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " O X \n", " \n", "on move: O\n", "O X \n", " O X \n", " X \n", "on move: X\n", "O X O \n", " O X \n", " X \n", "on move: O\n", "O X O \n", " O X \n", " X X \n", "on move: X\n", "O X O \n", "O O X \n", " X X \n", "on move: O\n", "O X O \n", "O O X \n", "X X X \n", "Episode 296, Total Reward: 1\n", "Average Reward: 0.27702702702702703\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O \n", "X X \n", "on move: X\n", " O \n", "O \n", "X X \n", "on move: O\n", " O \n", "O X \n", "X X \n", "on move: X\n", " O \n", "O X \n", "X O X \n", "on move: O\n", " X O \n", "O X \n", "X O X \n", "on move: X\n", " X O \n", "O X O \n", "X O X \n", "on move: O\n", "X X O \n", "O X O \n", "X O X \n", "Episode 297, Total Reward: 1\n", "Average Reward: 0.27946127946127947\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " O \n", " X \n", "on move: X\n", "X O \n", " O \n", " X \n", "on move: O\n", "X X O \n", " O \n", " X \n", "on move: X\n", "X X O \n", " O \n", " O X \n", "on move: O\n", "X X O \n", " X O \n", " O X \n", "Episode 298, Total Reward: 1\n", "Average Reward: 0.28187919463087246\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", " \n", " X X \n", " O \n", "on move: X\n", " O \n", " X X \n", " O \n", "on move: O\n", " O \n", "X X X \n", " O \n", "Episode 299, Total Reward: 1\n", "Average Reward: 0.2842809364548495\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", " X \n", "X \n", "on move: X\n", "O O \n", " X \n", "X \n", "on move: O\n", "O O \n", " X \n", "X X \n", "on move: X\n", "O O \n", " O X \n", "X X \n", "on move: O\n", "O O \n", "X O X \n", "X X \n", "on move: X\n", "O O \n", "X O X \n", "X X O \n", "Episode 300, Total Reward: -1\n", "Average Reward: 0.28\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", "O X \n", " O \n", " X \n", "on move: O\n", "O X \n", "X O \n", " X \n", "on move: X\n", "O X O \n", "X O \n", " X \n", "on move: O\n", "O X O \n", "X O \n", "X X \n", "on move: X\n", "O X O \n", "X O O \n", "X X \n", "on move: O\n", "O X O \n", "X O O \n", "X X X \n", "Episode 301, Total Reward: 1\n", "Average Reward: 0.2823920265780731\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " X \n", "O \n", " X \n", "on move: X\n", "O X \n", "O \n", " X \n", "on move: O\n", "O X \n", "O \n", " X X \n", "on move: X\n", "O X \n", "O O \n", " X X \n", "on move: O\n", "O X X \n", "O O \n", " X X \n", "on move: X\n", "O X X \n", "O O O \n", " X X \n", "Episode 302, Total Reward: -1\n", "Average Reward: 0.2781456953642384\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " \n", " X \n", " X O \n", "on move: X\n", " \n", " X O \n", " X O \n", "on move: O\n", " \n", " X O \n", "X X O \n", "on move: X\n", "O \n", " X O \n", "X X O \n", "on move: O\n", "O \n", "X X O \n", "X X O \n", "on move: X\n", "O O \n", "X X O \n", "X X O \n", "on move: O\n", "O O X \n", "X X O \n", "X X O \n", "Episode 303, Total Reward: 1\n", "Average Reward: 0.28052805280528054\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " X \n", " \n", "on move: X\n", "O X \n", " X \n", " O \n", "on move: O\n", "O X X \n", " X \n", " O \n", "on move: X\n", "O X X \n", "O X \n", " O \n", "on move: O\n", "O X X \n", "O X \n", " O X \n", "on move: X\n", "O X X \n", "O X O \n", " O X \n", "on move: O\n", "O X X \n", "O X O \n", "X O X \n", "Episode 304, Total Reward: 1\n", "Average Reward: 0.28289473684210525\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " O \n", " X \n", "on move: X\n", " X \n", "O O \n", " X \n", "on move: O\n", " X \n", "O O \n", " X X \n", "on move: X\n", " X \n", "O O \n", "O X X \n", "on move: O\n", " X \n", "O O X \n", "O X X \n", "Episode 305, Total Reward: 1\n", "Average Reward: 0.28524590163934427\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X \n", "O X \n", " \n", "on move: X\n", "X \n", "O O X \n", " \n", "on move: O\n", "X \n", "O O X \n", " X \n", "on move: X\n", "X \n", "O O X \n", "O X \n", "on move: O\n", "X X \n", "O O X \n", "O X \n", "on move: X\n", "X O X \n", "O O X \n", "O X \n", "on move: O\n", "X O X \n", "O O X \n", "O X X \n", "Episode 306, Total Reward: 1\n", "Average Reward: 0.2875816993464052\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X X \n", "O \n", " \n", "on move: X\n", " X X \n", "O \n", " O \n", "on move: O\n", " X X \n", "O \n", "X O \n", "on move: X\n", " X X \n", "O O \n", "X O \n", "on move: O\n", " X X \n", "O O X \n", "X O \n", "on move: X\n", " X X \n", "O O X \n", "X O O \n", "on move: O\n", "X X X \n", "O O X \n", "X O O \n", "Episode 307, Total Reward: 1\n", "Average Reward: 0.2899022801302932\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X \n", "O O \n", " \n", "on move: O\n", "X X \n", "O O \n", " X \n", "on move: X\n", "X X \n", "O O \n", " X O \n", "on move: O\n", "X X \n", "O O \n", "X X O \n", "on move: X\n", "X X \n", "O O O \n", "X X O \n", "Episode 308, Total Reward: -1\n", "Average Reward: 0.2857142857142857\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X O \n", "X O \n", " \n", "on move: O\n", "X O \n", "X O \n", " X \n", "on move: X\n", "X O O \n", "X O \n", " X \n", "on move: O\n", "X O O \n", "X O \n", "X X \n", "Episode 309, Total Reward: 1\n", "Average Reward: 0.28802588996763756\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X X O \n", " X \n", "on move: X\n", " O \n", "X X O \n", "O X \n", "on move: O\n", "X O \n", "X X O \n", "O X \n", "Episode 310, Total Reward: 1\n", "Average Reward: 0.2903225806451613\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " O \n", "X O \n", "X X \n", "on move: X\n", " O O \n", "X O \n", "X X \n", "on move: O\n", " O O \n", "X O \n", "X X X \n", "Episode 311, Total Reward: 1\n", "Average Reward: 0.29260450160771706\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", " O \n", "on move: O\n", "X \n", " \n", "X O \n", "on move: X\n", "X \n", " \n", "X O O \n", "on move: O\n", "X X \n", " \n", "X O O \n", "on move: X\n", "X X \n", " O \n", "X O O \n", "on move: O\n", "X X \n", "X O \n", "X O O \n", "Episode 312, Total Reward: 1\n", "Average Reward: 0.2948717948717949\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", " \n", "X O \n", "on move: X\n", " X \n", "O \n", "X O \n", "on move: O\n", "X X \n", "O \n", "X O \n", "on move: X\n", "X O X \n", "O \n", "X O \n", "on move: O\n", "X O X \n", "O \n", "X X O \n", "on move: X\n", "X O X \n", "O O \n", "X X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 313, Total Reward: 0\n", "Average Reward: 0.2939297124600639\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", "O X \n", "on move: O\n", " X \n", " \n", "O X \n", "on move: X\n", "O X \n", " \n", "O X \n", "on move: O\n", "O X \n", " \n", "O X X \n", "on move: X\n", "O X O \n", " \n", "O X X \n", "on move: O\n", "O X O \n", "X \n", "O X X \n", "on move: X\n", "O X O \n", "X O \n", "O X X \n", "Episode 314, Total Reward: -1\n", "Average Reward: 0.2898089171974522\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", "O \n", " X O \n", " X \n", "on move: O\n", "O \n", "X X O \n", " X \n", "on move: X\n", "O \n", "X X O \n", "O X \n", "on move: O\n", "O X \n", "X X O \n", "O X \n", "on move: X\n", "O O X \n", "X X O \n", "O X \n", "on move: O\n", "O O X \n", "X X O \n", "O X X \n", "Episode 315, Total Reward: 0\n", "Average Reward: 0.28888888888888886\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O \n", " \n", "X X \n", "on move: X\n", "O \n", " O \n", "X X \n", "on move: O\n", "O \n", " O \n", "X X X \n", "Episode 316, Total Reward: 1\n", "Average Reward: 0.2911392405063291\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", "O X \n", "X O \n", "X O \n", "on move: O\n", "O X \n", "X O \n", "X X O \n", "on move: X\n", "O X O \n", "X O \n", "X X O \n", "Episode 317, Total Reward: -1\n", "Average Reward: 0.2870662460567823\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " \n", " X \n", " X O \n", "on move: X\n", " \n", "O X \n", " X O \n", "on move: O\n", " X \n", "O X \n", " X O \n", "on move: X\n", " X O \n", "O X \n", " X O \n", "on move: O\n", " X O \n", "O X X \n", " X O \n", "Episode 318, Total Reward: 1\n", "Average Reward: 0.2893081761006289\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " O \n", " X X \n", "on move: X\n", "O \n", " O \n", " X X \n", "on move: O\n", "O \n", " O X \n", " X X \n", "on move: X\n", "O \n", " O X \n", "O X X \n", "on move: O\n", "O X \n", " O X \n", "O X X \n", "on move: X\n", "O X \n", "O O X \n", "O X X \n", "Episode 319, Total Reward: -1\n", "Average Reward: 0.2852664576802508\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", "X O \n", " \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", "O X \n", "X O \n", "X O \n", "on move: O\n", "O X X \n", "X O \n", "X O \n", "on move: X\n", "O X X \n", "X O O \n", "X O \n", "Episode 320, Total Reward: -1\n", "Average Reward: 0.28125\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X X O \n", " \n", " \n", "on move: X\n", "X X O \n", " O \n", " \n", "on move: O\n", "X X O \n", "X O \n", " \n", "on move: X\n", "X X O \n", "X O O \n", " \n", "on move: O\n", "X X O \n", "X O O \n", " X \n", "on move: X\n", "X X O \n", "X O O \n", "O X \n", "Episode 321, Total Reward: -1\n", "Average Reward: 0.2772585669781931\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", "O X \n", "O \n", "on move: O\n", " X \n", "O X \n", "O X \n", "on move: X\n", " X \n", "O X \n", "O O X \n", "on move: O\n", " X \n", "O X X \n", "O O X \n", "Episode 322, Total Reward: 1\n", "Average Reward: 0.2795031055900621\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", "O X \n", " O \n", "X \n", "on move: O\n", "O X \n", "X O \n", "X \n", "on move: X\n", "O X O \n", "X O \n", "X \n", "on move: O\n", "O X O \n", "X X O \n", "X \n", "on move: X\n", "O X O \n", "X X O \n", "X O \n", "Episode 323, Total Reward: -1\n", "Average Reward: 0.2755417956656347\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " \n", " X \n", "on move: X\n", "O X \n", " \n", "O X \n", "on move: O\n", "O X X \n", " \n", "O X \n", "on move: X\n", "O X X \n", " O \n", "O X \n", "on move: O\n", "O X X \n", " X O \n", "O X \n", "Episode 324, Total Reward: 1\n", "Average Reward: 0.2777777777777778\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O X \n", " \n", " X \n", "on move: X\n", " O X \n", "O \n", " X \n", "on move: O\n", " O X \n", "O \n", " X X \n", "on move: X\n", " O X \n", "O O \n", " X X \n", "on move: O\n", " O X \n", "O O X \n", " X X \n", "Episode 325, Total Reward: 1\n", "Average Reward: 0.28\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X O \n", " \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", " \n", " X O \n", "O X \n", "on move: O\n", " X \n", " X O \n", "O X \n", "on move: X\n", " O X \n", " X O \n", "O X \n", "on move: O\n", " O X \n", "X X O \n", "O X \n", "on move: X\n", " O X \n", "X X O \n", "O X O \n", "on move: O\n", "X O X \n", "X X O \n", "O X O \n", "Episode 326, Total Reward: 0\n", "Average Reward: 0.2791411042944785\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " \n", " X \n", "X O O \n", "on move: O\n", " \n", "X X \n", "X O O \n", "on move: X\n", " O \n", "X X \n", "X O O \n", "on move: O\n", "X O \n", "X X \n", "X O O \n", "Episode 327, Total Reward: 1\n", "Average Reward: 0.28134556574923547\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " O \n", " X X \n", "on move: X\n", " \n", " O O \n", " X X \n", "on move: O\n", " X \n", " O O \n", " X X \n", "on move: X\n", " X \n", "O O O \n", " X X \n", "Episode 328, Total Reward: -1\n", "Average Reward: 0.2774390243902439\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X \n", "O X \n", "on move: O\n", " O \n", " X X \n", "O X \n", "on move: X\n", " O O \n", " X X \n", "O X \n", "on move: O\n", " O O \n", " X X \n", "O X X \n", "on move: X\n", "O O O \n", " X X \n", "O X X \n", "Episode 329, Total Reward: -1\n", "Average Reward: 0.2735562310030395\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " X O \n", " X \n", "on move: X\n", " \n", "O X O \n", " X \n", "on move: O\n", " \n", "O X O \n", " X X \n", "on move: X\n", " \n", "O X O \n", "O X X \n", "on move: O\n", "X \n", "O X O \n", "O X X \n", "Episode 330, Total Reward: 1\n", "Average Reward: 0.27575757575757576\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X O \n", " O \n", " \n", "on move: O\n", "X X O \n", " O X \n", " \n", "on move: X\n", "X X O \n", " O X \n", "O \n", "Episode 331, Total Reward: -1\n", "Average Reward: 0.2719033232628399\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O X \n", " X \n", " \n", "on move: X\n", " O X \n", " X \n", "O \n", "on move: O\n", " O X \n", " X X \n", "O \n", "on move: X\n", "O O X \n", " X X \n", "O \n", "on move: O\n", "O O X \n", " X X \n", "O X \n", "on move: X\n", "O O X \n", " X X \n", "O X O \n", "on move: O\n", "O O X \n", "X X X \n", "O X O \n", "Episode 332, Total Reward: 1\n", "Average Reward: 0.2740963855421687\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X X \n", " O \n", " \n", "on move: X\n", " X X \n", " O \n", "O \n", "on move: O\n", " X X \n", " O \n", "O X \n", "on move: X\n", " X X \n", " O \n", "O O X \n", "on move: O\n", "X X X \n", " O \n", "O O X \n", "Episode 333, Total Reward: 1\n", "Average Reward: 0.27627627627627627\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", "X \n", " O X \n", " \n", "on move: X\n", "X \n", " O X \n", " O \n", "on move: O\n", "X \n", " O X \n", " X O \n", "on move: X\n", "X O \n", " O X \n", " X O \n", "on move: O\n", "X O \n", "X O X \n", " X O \n", "on move: X\n", "X O \n", "X O X \n", "O X O \n", "on move: O\n", "X O X \n", "X O X \n", "O X O \n", "Episode 334, Total Reward: 0\n", "Average Reward: 0.2754491017964072\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", "X O \n", " X \n", " \n", "on move: X\n", "X O O \n", " X \n", " \n", "on move: O\n", "X O O \n", " X \n", "X \n", "on move: X\n", "X O O \n", " O X \n", "X \n", "on move: O\n", "X O O \n", "X O X \n", "X \n", "Episode 335, Total Reward: 1\n", "Average Reward: 0.27761194029850744\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X X \n", " \n", "O \n", "on move: X\n", " X X \n", " \n", "O O \n", "on move: O\n", " X X \n", " \n", "O O X \n", "on move: X\n", " X X \n", " O \n", "O O X \n", "on move: O\n", "X X X \n", " O \n", "O O X \n", "Episode 336, Total Reward: 1\n", "Average Reward: 0.27976190476190477\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", "O \n", "on move: O\n", "X \n", " X \n", "O \n", "on move: X\n", "X O \n", " X \n", "O \n", "on move: O\n", "X O \n", "X X \n", "O \n", "on move: X\n", "X O \n", "X X \n", "O O \n", "on move: O\n", "X O \n", "X X \n", "O X O \n", "on move: X\n", "X O O \n", "X X \n", "O X O \n", "on move: O\n", "X O O \n", "X X X \n", "O X O \n", "Episode 337, Total Reward: 1\n", "Average Reward: 0.2818991097922849\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " O X \n", " X \n", "O \n", "on move: O\n", "X O X \n", " X \n", "O \n", "on move: X\n", "X O X \n", " X \n", "O O \n", "on move: O\n", "X O X \n", " X X \n", "O O \n", "on move: X\n", "X O X \n", " X X \n", "O O O \n", "Episode 338, Total Reward: -1\n", "Average Reward: 0.2781065088757396\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " O X \n", " O \n", "X X \n", "on move: X\n", " O X \n", " O O \n", "X X \n", "on move: O\n", " O X \n", " O O \n", "X X X \n", "Episode 339, Total Reward: 1\n", "Average Reward: 0.28023598820059\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X \n", "X O \n", " O \n", "on move: O\n", "X \n", "X O \n", " X O \n", "on move: X\n", "X O \n", "X O \n", " X O \n", "Episode 340, Total Reward: -1\n", "Average Reward: 0.27647058823529413\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", "O \n", "on move: O\n", "X X \n", " \n", "O \n", "on move: X\n", "X X O \n", " \n", "O \n", "on move: O\n", "X X O \n", " \n", "O X \n", "on move: X\n", "X X O \n", " O \n", "O X \n", "Episode 341, Total Reward: -1\n", "Average Reward: 0.2727272727272727\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", "X \n", "on move: X\n", " \n", "O X \n", "X O \n", "on move: O\n", " \n", "O X \n", "X X O \n", "on move: X\n", " O \n", "O X \n", "X X O \n", "on move: O\n", "X O \n", "O X \n", "X X O \n", "on move: X\n", "X O \n", "O O X \n", "X X O \n", "on move: O\n", "X X O \n", "O O X \n", "X X O \n", "Episode 342, Total Reward: 0\n", "Average Reward: 0.2719298245614035\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", " X X \n", "O \n", " \n", "on move: X\n", " X X \n", "O \n", " O \n", "on move: O\n", " X X \n", "O X \n", " O \n", "on move: X\n", " X X \n", "O X \n", " O O \n", "on move: O\n", " X X \n", "O X \n", "X O O \n", "on move: X\n", "O X X \n", "O X \n", "X O O \n", "on move: O\n", "O X X \n", "O X X \n", "X O O \n", "Episode 343, Total Reward: 1\n", "Average Reward: 0.27405247813411077\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", " X \n", "O X \n", "on move: O\n", " O \n", " X X \n", "O X \n", "on move: X\n", " O \n", " X X \n", "O X O \n", "on move: O\n", " O \n", "X X X \n", "O X O \n", "Episode 344, Total Reward: 1\n", "Average Reward: 0.2761627906976744\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", "O \n", " \n", "X \n", "on move: O\n", "O X \n", " \n", "X \n", "on move: X\n", "O X \n", "O \n", "X \n", "on move: O\n", "O X \n", "O X \n", "X \n", "on move: X\n", "O X \n", "O X \n", "X O \n", "on move: O\n", "O X \n", "O X X \n", "X O \n", "Episode 345, Total Reward: 1\n", "Average Reward: 0.2782608695652174\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " \n", "X \n", "on move: X\n", "X O \n", "O \n", "X \n", "on move: O\n", "X O \n", "O X \n", "X \n", "on move: X\n", "X O \n", "O X \n", "X O \n", "on move: O\n", "X O \n", "O X X \n", "X O \n", "on move: X\n", "X O O \n", "O X X \n", "X O \n", "on move: O\n", "X O O \n", "O X X \n", "X O X \n", "Episode 346, Total Reward: 1\n", "Average Reward: 0.28034682080924855\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", "X \n", "X \n", " O \n", "on move: X\n", "X O \n", "X \n", " O \n", "on move: O\n", "X O \n", "X \n", "X O \n", "Episode 347, Total Reward: 1\n", "Average Reward: 0.2824207492795389\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " \n", "X \n", "X O \n", "on move: X\n", " \n", "X O \n", "X O \n", "on move: O\n", "X \n", "X O \n", "X O \n", "Episode 348, Total Reward: 1\n", "Average Reward: 0.28448275862068967\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " \n", "X X \n", "O \n", "on move: X\n", " O \n", "X X \n", "O \n", "on move: O\n", " O \n", "X X \n", "O X \n", "on move: X\n", " O O \n", "X X \n", "O X \n", "on move: O\n", "X O O \n", "X X \n", "O X \n", "on move: X\n", "X O O \n", "X X \n", "O O X \n", "on move: O\n", "X O O \n", "X X X \n", "O O X \n", "Episode 349, Total Reward: 1\n", "Average Reward: 0.28653295128939826\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", "X \n", "X \n", "on move: X\n", " O O \n", "X \n", "X \n", "on move: O\n", "X O O \n", "X \n", "X \n", "Episode 350, Total Reward: 1\n", "Average Reward: 0.2885714285714286\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", " X O \n", " O \n", "X \n", "on move: O\n", "X X O \n", " O \n", "X \n", "on move: X\n", "X X O \n", " O \n", "X O \n", "on move: O\n", "X X O \n", "X O \n", "X O \n", "Episode 351, Total Reward: 1\n", "Average Reward: 0.2905982905982906\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", "O O \n", " X \n", " X \n", "on move: O\n", "O O \n", " X X \n", " X \n", "on move: X\n", "O O \n", " X X \n", " X O \n", "on move: O\n", "O O \n", "X X X \n", " X O \n", "Episode 352, Total Reward: 1\n", "Average Reward: 0.29261363636363635\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", "O \n", "X \n", " \n", "on move: O\n", "O \n", "X \n", " X \n", "on move: X\n", "O O \n", "X \n", " X \n", "on move: O\n", "O O \n", "X X \n", " X \n", "on move: X\n", "O O \n", "X X O \n", " X \n", "on move: O\n", "O X O \n", "X X O \n", " X \n", "Episode 353, Total Reward: 1\n", "Average Reward: 0.29461756373937675\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", "O \n", " \n", "on move: O\n", "X X \n", "O \n", " \n", "on move: X\n", "X X \n", "O \n", " O \n", "on move: O\n", "X X X \n", "O \n", " O \n", "Episode 354, Total Reward: 1\n", "Average Reward: 0.2966101694915254\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X X \n", " \n", "on move: X\n", " \n", "O X X \n", "O \n", "on move: O\n", " X \n", "O X X \n", "O \n", "on move: X\n", "O X \n", "O X X \n", "O \n", "Episode 355, Total Reward: -1\n", "Average Reward: 0.29295774647887324\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " \n", "X X \n", "O \n", "on move: X\n", " \n", "X X \n", "O O \n", "on move: O\n", " \n", "X X \n", "O O X \n", "on move: X\n", "O \n", "X X \n", "O O X \n", "on move: O\n", "O \n", "X X X \n", "O O X \n", "Episode 356, Total Reward: 1\n", "Average Reward: 0.2949438202247191\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " \n", "X O \n", "on move: O\n", " \n", " X \n", "X O \n", "on move: X\n", " O \n", " X \n", "X O \n", "on move: O\n", " O \n", " X X \n", "X O \n", "on move: X\n", "O O \n", " X X \n", "X O \n", "on move: O\n", "O O \n", "X X X \n", "X O \n", "Episode 357, Total Reward: 1\n", "Average Reward: 0.2969187675070028\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", " X \n", "X O \n", "X O \n", "on move: X\n", " X \n", "X O O \n", "X O \n", "on move: O\n", " X \n", "X O O \n", "X X O \n", "on move: X\n", "O X \n", "X O O \n", "X X O \n", "Episode 358, Total Reward: -1\n", "Average Reward: 0.29329608938547486\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", "X \n", "X O \n", " \n", "on move: X\n", "X \n", "X O \n", "O \n", "on move: O\n", "X \n", "X X O \n", "O \n", "on move: X\n", "X O \n", "X X O \n", "O \n", "on move: O\n", "X O \n", "X X O \n", "O X \n", "Episode 359, Total Reward: 1\n", "Average Reward: 0.29526462395543174\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X O \n", " \n", " \n", "on move: O\n", "X O \n", " X \n", " \n", "on move: X\n", "X O \n", " X \n", " O \n", "on move: O\n", "X X O \n", " X \n", " O \n", "on move: X\n", "X X O \n", "O X \n", " O \n", "on move: O\n", "X X O \n", "O X X \n", " O \n", "on move: X\n", "X X O \n", "O X X \n", "O O \n", "on move: O\n", "X X O \n", "O X X \n", "O X O \n", "Episode 360, Total Reward: 1\n", "Average Reward: 0.2972222222222222\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", "O \n", "on move: O\n", " X \n", " X \n", "O \n", "on move: X\n", " X \n", " O X \n", "O \n", "on move: O\n", " X \n", "X O X \n", "O \n", "on move: X\n", " X \n", "X O X \n", "O O \n", "on move: O\n", " X X \n", "X O X \n", "O O \n", "on move: X\n", "O X X \n", "X O X \n", "O O \n", "on move: O\n", "O X X \n", "X O X \n", "O O X \n", "Episode 361, Total Reward: 1\n", "Average Reward: 0.29916897506925205\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " O \n", " X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " O \n", "X O \n", "X X \n", "on move: X\n", "O O \n", "X O \n", "X X \n", "on move: O\n", "O O \n", "X O X \n", "X X \n", "on move: X\n", "O O \n", "X O X \n", "X X O \n", "Episode 362, Total Reward: -1\n", "Average Reward: 0.2955801104972376\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", "X X \n", " \n", "O \n", "on move: X\n", "X X \n", " \n", "O O \n", "on move: O\n", "X X X \n", " \n", "O O \n", "Episode 363, Total Reward: 1\n", "Average Reward: 0.2975206611570248\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X O \n", "X X \n", "on move: X\n", " O \n", "X O \n", "X X O \n", "on move: O\n", "X O \n", "X O \n", "X X O \n", "Episode 364, Total Reward: 1\n", "Average Reward: 0.29945054945054944\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X X \n", " O \n", " \n", "on move: X\n", " X X \n", " O O \n", " \n", "on move: O\n", " X X \n", " O O \n", " X \n", "on move: X\n", " X X \n", " O O \n", " O X \n", "on move: O\n", " X X \n", "X O O \n", " O X \n", "on move: X\n", "O X X \n", "X O O \n", " O X \n", "on move: O\n", "O X X \n", "X O O \n", "X O X \n", "Episode 365, Total Reward: 0\n", "Average Reward: 0.29863013698630136\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " \n", " X \n", " X O \n", "on move: X\n", " \n", " X \n", "O X O \n", "on move: O\n", " X \n", " X \n", "O X O \n", "on move: X\n", " O X \n", " X \n", "O X O \n", "on move: O\n", " O X \n", "X X \n", "O X O \n", "on move: X\n", "O O X \n", "X X \n", "O X O \n", "on move: O\n", "O O X \n", "X X X \n", "O X O \n", "Episode 366, Total Reward: 1\n", "Average Reward: 0.3005464480874317\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " \n", " O \n", "X X \n", "on move: X\n", " \n", " O O \n", "X X \n", "on move: O\n", " X \n", " O O \n", "X X \n", "on move: X\n", " O X \n", " O O \n", "X X \n", "on move: O\n", " O X \n", " O O \n", "X X X \n", "Episode 367, Total Reward: 1\n", "Average Reward: 0.3024523160762943\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", "O X \n", " \n", " \n", "on move: O\n", "O X \n", " \n", " X \n", "on move: X\n", "O X \n", " O \n", " X \n", "on move: O\n", "O X \n", " O X \n", " X \n", "on move: X\n", "O X \n", "O O X \n", " X \n", "on move: O\n", "O X \n", "O O X \n", " X X \n", "on move: X\n", "O X \n", "O O X \n", "O X X \n", "Episode 368, Total Reward: -1\n", "Average Reward: 0.29891304347826086\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", "O \n", " \n", "on move: O\n", "X \n", "O \n", "X \n", "on move: X\n", "X \n", "O \n", "X O \n", "on move: O\n", "X \n", "O \n", "X O X \n", "on move: X\n", "X \n", "O O \n", "X O X \n", "on move: O\n", "X X \n", "O O \n", "X O X \n", "on move: X\n", "X X O \n", "O O \n", "X O X \n", "on move: O\n", "X X O \n", "O X O \n", "X O X \n", "Episode 369, Total Reward: 1\n", "Average Reward: 0.3008130081300813\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", "O \n", " X \n", "on move: O\n", " \n", "O \n", " X X \n", "on move: X\n", " \n", "O \n", "O X X \n", "on move: O\n", "X \n", "O \n", "O X X \n", "on move: X\n", "X O \n", "O \n", "O X X \n", "on move: O\n", "X X O \n", "O \n", "O X X \n", "on move: X\n", "X X O \n", "O O \n", "O X X \n", "Episode 370, Total Reward: -1\n", "Average Reward: 0.2972972972972973\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", "O X \n", " X \n", "on move: O\n", " X O \n", "O X \n", " X \n", "on move: X\n", "O X O \n", "O X \n", " X \n", "on move: O\n", "O X O \n", "O X \n", "X X \n", "on move: X\n", "O X O \n", "O X O \n", "X X \n", "on move: O\n", "O X O \n", "O X O \n", "X X X \n", "Episode 371, Total Reward: 1\n", "Average Reward: 0.2991913746630728\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " X O \n", " X \n", " \n", "on move: X\n", " X O \n", " O X \n", " \n", "on move: O\n", " X O \n", "X O X \n", " \n", "on move: X\n", "O X O \n", "X O X \n", " \n", "on move: O\n", "O X O \n", "X O X \n", "X \n", "on move: X\n", "O X O \n", "X O X \n", "X O \n", "Episode 372, Total Reward: -1\n", "Average Reward: 0.2956989247311828\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " \n", "O X \n", "O X \n", "on move: O\n", " \n", "O X \n", "O X X \n", "on move: X\n", " O \n", "O X \n", "O X X \n", "on move: O\n", "X O \n", "O X \n", "O X X \n", "on move: X\n", "X O O \n", "O X \n", "O X X \n", "on move: O\n", "X O O \n", "O X X \n", "O X X \n", "Episode 373, Total Reward: 1\n", "Average Reward: 0.2975871313672922\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", " \n", " O X \n", " X \n", "on move: X\n", " O \n", " O X \n", " X \n", "on move: O\n", " O X \n", " O X \n", " X \n", "on move: X\n", " O X \n", " O X \n", " X O \n", "on move: O\n", " O X \n", "X O X \n", " X O \n", "on move: X\n", "O O X \n", "X O X \n", " X O \n", "Episode 374, Total Reward: -1\n", "Average Reward: 0.29411764705882354\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", "X X \n", " O \n", " \n", "on move: X\n", "X X \n", " O O \n", " \n", "on move: O\n", "X X \n", " O O \n", " X \n", "on move: X\n", "X X O \n", " O O \n", " X \n", "on move: O\n", "X X O \n", " O O \n", " X X \n", "on move: X\n", "X X O \n", "O O O \n", " X X \n", "Episode 375, Total Reward: -1\n", "Average Reward: 0.2906666666666667\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", "O \n", " O \n", "on move: O\n", "X X \n", "O X \n", " O \n", "on move: X\n", "X X \n", "O X \n", "O O \n", "on move: O\n", "X X \n", "O X \n", "O O X \n", "Episode 376, Total Reward: 1\n", "Average Reward: 0.2925531914893617\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", " X \n", "X \n", " O \n", "on move: X\n", " X \n", "X O \n", " O \n", "on move: O\n", " X \n", "X O \n", " O X \n", "on move: X\n", "O X \n", "X O \n", " O X \n", "on move: O\n", "O X \n", "X O \n", "X O X \n", "on move: X\n", "O X \n", "X O O \n", "X O X \n", "on move: O\n", "O X X \n", "X O O \n", "X O X \n", "Episode 377, Total Reward: 0\n", "Average Reward: 0.2917771883289125\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " \n", "O \n", "on move: O\n", "X \n", " X \n", "O \n", "on move: X\n", "X O \n", " X \n", "O \n", "on move: O\n", "X O \n", "X X \n", "O \n", "on move: X\n", "X O \n", "X X O \n", "O \n", "on move: O\n", "X O \n", "X X O \n", "O X \n", "on move: X\n", "X O O \n", "X X O \n", "O X \n", "on move: O\n", "X O O \n", "X X O \n", "O X X \n", "Episode 378, Total Reward: 1\n", "Average Reward: 0.29365079365079366\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " O \n", " \n", "X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", "X O \n", " O \n", "X X \n", "on move: X\n", "X O O \n", " O \n", "X X \n", "on move: O\n", "X O O \n", "X O \n", "X X \n", "Episode 379, Total Reward: 1\n", "Average Reward: 0.2955145118733509\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " O \n", " \n", "on move: O\n", " X \n", " X O \n", " \n", "on move: X\n", "O X \n", " X O \n", " \n", "on move: O\n", "O X \n", " X O \n", "X \n", "Episode 380, Total Reward: 1\n", "Average Reward: 0.29736842105263156\n", "on move: O\n", "X \n", " \n", " \n", "on move: X\n", "X \n", " O \n", " \n", "on move: O\n", "X \n", " O X \n", " \n", "on move: X\n", "X \n", "O O X \n", " \n", "on move: O\n", "X \n", "O O X \n", " X \n", "on move: X\n", "X O \n", "O O X \n", " X \n", "on move: O\n", "X O \n", "O O X \n", "X X \n", "on move: X\n", "X O \n", "O O X \n", "X X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 381, Total Reward: 0\n", "Average Reward: 0.29658792650918636\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X \n", "O \n", "on move: O\n", " X \n", "X \n", "O \n", "on move: X\n", " X \n", "X O \n", "O \n", "on move: O\n", " X \n", "X O \n", "O X \n", "on move: X\n", " X \n", "X O O \n", "O X \n", "on move: O\n", " X \n", "X O O \n", "O X X \n", "on move: X\n", "O X \n", "X O O \n", "O X X \n", "on move: O\n", "O X X \n", "X O O \n", "O X X \n", "Episode 382, Total Reward: 0\n", "Average Reward: 0.29581151832460734\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", "X X \n", " \n", "on move: X\n", "O O \n", "X X \n", " \n", "on move: O\n", "O O \n", "X X \n", "X \n", "on move: X\n", "O O \n", "X X \n", "X O \n", "on move: O\n", "O O \n", "X X \n", "X O X \n", "on move: X\n", "O O O \n", "X X \n", "X O X \n", "Episode 383, Total Reward: -1\n", "Average Reward: 0.2924281984334204\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", "O \n", "X \n", "on move: O\n", " \n", "O X \n", "X \n", "on move: X\n", " \n", "O X O \n", "X \n", "on move: O\n", " X \n", "O X O \n", "X \n", "Episode 384, Total Reward: 1\n", "Average Reward: 0.2942708333333333\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", "O \n", "on move: O\n", " X \n", " \n", "O X \n", "on move: X\n", " X \n", " O \n", "O X \n", "on move: O\n", " X X \n", " O \n", "O X \n", "on move: X\n", " X X \n", " O \n", "O O X \n", "on move: O\n", " X X \n", " X O \n", "O O X \n", "on move: X\n", " X X \n", "O X O \n", "O O X \n", "on move: O\n", "X X X \n", "O X O \n", "O O X \n", "Episode 385, Total Reward: 1\n", "Average Reward: 0.2961038961038961\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", "O \n", " \n", " X \n", "on move: O\n", "O \n", " \n", " X X \n", "on move: X\n", "O \n", " \n", "O X X \n", "on move: O\n", "O X \n", " \n", "O X X \n", "on move: X\n", "O X \n", " O \n", "O X X \n", "on move: O\n", "O X \n", "X O \n", "O X X \n", "on move: X\n", "O O X \n", "X O \n", "O X X \n", "on move: O\n", "O O X \n", "X O X \n", "O X X \n", "Episode 386, Total Reward: 1\n", "Average Reward: 0.2979274611398964\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " \n", " \n", " X O \n", "on move: O\n", " X \n", " \n", " X O \n", "on move: X\n", " X \n", " O \n", " X O \n", "on move: O\n", "X X \n", " O \n", " X O \n", "on move: X\n", "X X \n", "O O \n", " X O \n", "on move: O\n", "X X \n", "O O X \n", " X O \n", "on move: X\n", "X O X \n", "O O X \n", " X O \n", "on move: O\n", "X O X \n", "O O X \n", "X X O \n", "Episode 387, Total Reward: 0\n", "Average Reward: 0.2971576227390181\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " X \n", " O \n", "on move: O\n", "X \n", " X \n", " O \n", "on move: X\n", "X \n", "O X \n", " O \n", "on move: O\n", "X \n", "O X \n", "X O \n", "on move: X\n", "X O \n", "O X \n", "X O \n", "on move: O\n", "X X O \n", "O X \n", "X O \n", "on move: X\n", "X X O \n", "O X \n", "X O O \n", "on move: O\n", "X X O \n", "O X X \n", "X O O \n", "Episode 388, Total Reward: 0\n", "Average Reward: 0.2963917525773196\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", "O \n", " X \n", " \n", "on move: O\n", "O \n", " X \n", " X \n", "on move: X\n", "O \n", "O X \n", " X \n", "on move: O\n", "O \n", "O X X \n", " X \n", "on move: X\n", "O \n", "O X X \n", " O X \n", "on move: O\n", "O \n", "O X X \n", "X O X \n", "on move: X\n", "O O \n", "O X X \n", "X O X \n", "on move: O\n", "O O X \n", "O X X \n", "X O X \n", "Episode 389, Total Reward: 1\n", "Average Reward: 0.2982005141388175\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O \n", "X X \n", " O \n", "on move: O\n", " O \n", "X X \n", "X O \n", "on move: X\n", " O \n", "X X \n", "X O O \n", "on move: O\n", "X O \n", "X X \n", "X O O \n", "Episode 390, Total Reward: 1\n", "Average Reward: 0.3\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " O \n", " X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O O \n", "X X \n", " \n", "on move: O\n", " O O \n", "X X \n", " X \n", "on move: X\n", " O O \n", "X X \n", " X O \n", "on move: O\n", "X O O \n", "X X \n", " X O \n", "on move: X\n", "X O O \n", "X X O \n", " X O \n", "Episode 391, Total Reward: -1\n", "Average Reward: 0.2966751918158568\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", "O \n", "X O \n", " X \n", "on move: O\n", "O X \n", "X O \n", " X \n", "on move: X\n", "O X \n", "X O O \n", " X \n", "on move: O\n", "O X \n", "X O O \n", "X X \n", "on move: X\n", "O X O \n", "X O O \n", "X X \n", "on move: O\n", "O X O \n", "X O O \n", "X X X \n", "Episode 392, Total Reward: 1\n", "Average Reward: 0.29846938775510207\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " \n", "X X \n", "on move: X\n", " O \n", " O \n", "X X \n", "on move: O\n", " O \n", " X O \n", "X X \n", "on move: X\n", " O \n", "O X O \n", "X X \n", "on move: O\n", "X O \n", "O X O \n", "X X \n", "on move: X\n", "X O \n", "O X O \n", "X X O \n", "Episode 393, Total Reward: -1\n", "Average Reward: 0.2951653944020356\n", "on move: O\n", " X \n", " \n", " \n", "on move: X\n", " X \n", " \n", " O \n", "on move: O\n", "X X \n", " \n", " O \n", "on move: X\n", "X X \n", "O \n", " O \n", "on move: O\n", "X X \n", "O \n", "X O \n", "on move: X\n", "X O X \n", "O \n", "X O \n", "on move: O\n", "X O X \n", "O \n", "X X O \n", "on move: X\n", "X O X \n", "O O \n", "X X O \n", "on move: O\n", "X O X \n", "O X O \n", "X X O \n", "Episode 394, Total Reward: 1\n", "Average Reward: 0.2969543147208122\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", "O X \n", " \n", "on move: O\n", " \n", "O X \n", " X \n", "on move: X\n", " O \n", "O X \n", " X \n", "on move: O\n", " X O \n", "O X \n", " X \n", "on move: X\n", " X O \n", "O X \n", " O X \n", "on move: O\n", " X O \n", "O X \n", "X O X \n", "on move: X\n", "O X O \n", "O X \n", "X O X \n", "on move: O\n", "O X O \n", "O X X \n", "X O X \n", "Episode 395, Total Reward: 0\n", "Average Reward: 0.29620253164556964\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " O \n", "X \n", " \n", "on move: O\n", " O \n", "X X \n", " \n", "on move: X\n", " O \n", "X X O \n", " \n", "on move: O\n", " O \n", "X X O \n", " X \n", "on move: X\n", " O \n", "X X O \n", " O X \n", "on move: O\n", "X O \n", "X X O \n", " O X \n", "Episode 396, Total Reward: 1\n", "Average Reward: 0.29797979797979796\n", "on move: O\n", " \n", " X \n", " \n", "on move: X\n", " \n", " O X \n", " \n", "on move: O\n", " \n", " O X \n", "X \n", "on move: X\n", "O \n", " O X \n", "X \n", "on move: O\n", "O \n", " O X \n", "X X \n", "on move: X\n", "O O \n", " O X \n", "X X \n", "on move: O\n", "O O \n", " O X \n", "X X X \n", "Episode 397, Total Reward: 1\n", "Average Reward: 0.29974811083123426\n", "on move: O\n", " \n", " \n", "X \n", "on move: X\n", " \n", " O \n", "X \n", "on move: O\n", " X \n", " O \n", "X \n", "on move: X\n", " X \n", "O O \n", "X \n", "on move: O\n", " X X \n", "O O \n", "X \n", "on move: X\n", " X X \n", "O O \n", "X O \n", "on move: O\n", "X X X \n", "O O \n", "X O \n", "Episode 398, Total Reward: 1\n", "Average Reward: 0.3015075376884422\n", "on move: O\n", " \n", " \n", " X \n", "on move: X\n", " O \n", " \n", " X \n", "on move: O\n", " O \n", " X \n", " X \n", "on move: X\n", " O \n", "O X \n", " X \n", "on move: O\n", " O \n", "O X \n", "X X \n", "on move: X\n", "O O \n", "O X \n", "X X \n", "on move: O\n", "O O \n", "O X \n", "X X X \n", "Episode 399, Total Reward: 1\n", "Average Reward: 0.3032581453634085\n", "on move: O\n", " \n", "X \n", " \n", "on move: X\n", " \n", "X O \n", " \n", "on move: O\n", " \n", "X O \n", " X \n", "on move: X\n", " O \n", "X O \n", " X \n", "on move: O\n", " O \n", "X O X \n", " X \n", "on move: X\n", " O \n", "X O X \n", " X O \n", "on move: O\n", "X O \n", "X O X \n", " X O \n", "on move: X\n", "X O O \n", "X O X \n", " X O \n", "on move: O\n", "X O O \n", "X O X \n", "X X O \n", "Episode 400, Total Reward: 1\n", "Average Reward: 0.305\n" ] } ], "source": [ "env = TicTacToeEnv()\n", "\n", "agent = RandomTicTacToeAgent(symbol=1)\n", "\n", "num_episodes = 400\n", "collected_rewards = []\n", "\n", "oom = 1\n", "\n", "for i in range(num_episodes):\n", " state, _ = env.reset() \n", " total_reward = 0\n", " done = False\n", " om = oom \n", "\n", " for j in range(9): \n", " moves = env.move_generator() \n", "\n", " if not moves:\n", " break\n", "\n", " if len(moves) == 1:\n", " move = moves[0]\n", " else:\n", " move = agent.get_action(moves)\n", "\n", " next_state, reward, done, info = env.step(move)\n", " total_reward += reward\n", " state = next_state\n", "\n", " env.render()\n", "\n", " if done:\n", " break\n", "\n", " om = -om\n", "\n", " collected_rewards.append(total_reward)\n", "\n", " print(f\"Episode {i+1}, Total Reward: {total_reward}\")\n", " average_reward = sum(collected_rewards) / len(collected_rewards)\n", " print(f\"Average Reward: {average_reward}\")\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }