From 7b97bfb9ed53c9f0ddd98219cbfccfa88a1d3042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D1=8F=D1=87=D0=B5=D1=81=D0=BB=D0=B0=D0=B2=20=D0=98?= =?UTF-8?q?=D0=B2=D0=B0=D0=BD=D0=BE=D0=B2?= Date: Sat, 30 Nov 2024 09:53:42 +0400 Subject: [PATCH] done --- mai/lab6.ipynb | 13286 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 13286 insertions(+) create mode 100644 mai/lab6.ipynb diff --git a/mai/lab6.ipynb b/mai/lab6.ipynb new file mode 100644 index 0000000..c1d52d9 --- /dev/null +++ b/mai/lab6.ipynb @@ -0,0 +1,13286 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Лабораторная работа 6\n", + "\n", + "**Крестики-нолики**\thttps://github.com/nczempin/gym-tic-tac-toe/tree/master\n", + "\n", + "#### **Перевод среды на Gymnasium**" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "from gymnasium import spaces\n", + "\n", + "class TicTacToeEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + " \n", + " symbols = ['O', ' ', 'X']\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.action_space = spaces.Discrete(9)\n", + " self.observation_space = spaces.Discrete(9 * 3 * 2)\n", + " self.reset()\n", + "\n", + " def step(self, action):\n", + " done = False\n", + " reward = 0\n", + "\n", + " p, square = action # p - игрок (1 или -1), square - номер клетки\n", + "\n", + " board = self.state['board']\n", + " proposed = board[square] \n", + " om = self.state['on_move'] \n", + " if proposed != 0: # Клетка уже занята\n", + " print(f\"Незаконный ход: Квадрат {square} уже занят.\")\n", + " done = True\n", + " reward = -1 * om \n", + " if p != om: # Не тот игрок на ходу\n", + " print(f\"Незаконный ход: игрок {p} не находится в движении\")\n", + " done = True\n", + " reward = -1 * om\n", + " else:\n", + " board[square] = p\n", + " self.state['on_move'] = -p\n", + "\n", + " for i in range(3):\n", + " # Горизонтали и вертикали\n", + " if (board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) or \\\n", + " (board[i] == p and board[i + 3] == p and board[i + 6] == p):\n", + " reward = p\n", + " done = True\n", + " break\n", + "\n", + " # Диагонали\n", + " if (board[0] == p and board[4] == p and board[8] == p) or \\\n", + " (board[2] == p and board[4] == p and board[6] == p):\n", + " reward = p\n", + " done = True\n", + " \n", + " return self.state, reward, done, {}\n", + "\n", + " def reset(self):\n", + " self.state = {}\n", + " self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] \n", + " self.state['on_move'] = 1 \n", + " return self.state, {}\n", + "\n", + " def render(self, close=False):\n", + " if close:\n", + " return\n", + " print(\"on move: \" , self.symbols[self.state['on_move']+1])\n", + " for i in range (9):\n", + " print (self.symbols[self.state['board'][i]+1], end=\" \");\n", + " if ((i % 3) == 2):\n", + " print();\n", + "\n", + " def move_generator(self):\n", + " moves = []\n", + " for i in range(9):\n", + " if self.state['board'][i] == 0:\n", + " p = self.state['on_move']\n", + " m = [p, i]\n", + " moves.append(m)\n", + " return moves" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Реализация агента**" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "class RandomTicTacToeAgent:\n", + " def __init__(self, symbol):\n", + " self.symbol = symbol # Символ игрока (1 - X, -1 - O\n", + " \n", + " def get_action(self, moves):\n", + " return random.choice(moves) # Выбираем случайный ход из доступных\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Основной цикл обучения**" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + "X \n", + " \n", + " X O \n", + "on move: X\n", + "X O \n", + " \n", + " X O \n", + "on move: O\n", + "X X O \n", + " \n", + " X O \n", + "on move: X\n", + "X X O \n", + " \n", + "O X O \n", + "on move: O\n", + "X X O \n", + " X \n", + "O X O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 1, Total Reward: 1\n", + "Average Reward: 1.0\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X \n", + " O X \n", + "on move: X\n", + " \n", + "O X \n", + " O X \n", + "on move: O\n", + " \n", + "O X \n", + "X O X \n", + "on move: X\n", + " O \n", + "O X \n", + "X O X \n", + "on move: O\n", + " O \n", + "O X X \n", + "X O X \n", + "on move: X\n", + " O O \n", + "O X X \n", + "X O X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X O X \n", + "Episode 2, Total Reward: 1\n", + "Average Reward: 1.0\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O X \n", + "O X \n", + "on move: O\n", + " X \n", + "O X \n", + "O X \n", + "Episode 3, Total Reward: 1\n", + "Average Reward: 1.0\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + " O X \n", + "on move: X\n", + " X \n", + " \n", + "O O X \n", + "on move: O\n", + " X \n", + "X \n", + "O O X \n", + "on move: X\n", + " X O \n", + "X \n", + "O O X \n", + "on move: O\n", + " X O \n", + "X X \n", + "O O X \n", + "on move: X\n", + " X O \n", + "X X O \n", + "O O X \n", + "on move: O\n", + "X X O \n", + "X X O \n", + "O O X \n", + "Episode 4, Total Reward: 1\n", + "Average Reward: 1.0\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + " X X \n", + " O O \n", + " \n", + "on move: O\n", + " X X \n", + "X O O \n", + " \n", + "on move: X\n", + " X X \n", + "X O O \n", + " O \n", + "on move: O\n", + " X X \n", + "X O O \n", + " O X \n", + "on move: X\n", + " X X \n", + "X O O \n", + "O O X \n", + "on move: O\n", + "X X X \n", + "X O O \n", + "O O X \n", + "Episode 5, Total Reward: 1\n", + "Average Reward: 1.0\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O O \n", + "X X \n", + " \n", + "on move: O\n", + " O O \n", + "X X \n", + " X \n", + "on move: X\n", + " O O \n", + "X X \n", + " X O \n", + "on move: O\n", + "X O O \n", + "X X \n", + " X O \n", + "on move: X\n", + "X O O \n", + "X X O \n", + " X O \n", + "Episode 6, Total Reward: -1\n", + "Average Reward: 0.6666666666666666\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O O \n", + " X \n", + " X \n", + "on move: O\n", + " O O \n", + " X \n", + " X X \n", + "on move: X\n", + " O O \n", + " O X \n", + " X X \n", + "on move: O\n", + "X O O \n", + " O X \n", + " X X \n", + "on move: X\n", + "X O O \n", + "O O X \n", + " X X \n", + "on move: O\n", + "X O O \n", + "O O X \n", + "X X X \n", + "Episode 7, Total Reward: 1\n", + "Average Reward: 0.7142857142857143\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O X \n", + " \n", + "on move: X\n", + " X \n", + "O O X \n", + " \n", + "on move: O\n", + " X \n", + "O O X \n", + " X \n", + "on move: X\n", + "O X \n", + "O O X \n", + " X \n", + "on move: O\n", + "O X \n", + "O O X \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O O X \n", + "X X X \n", + "Episode 8, Total Reward: 1\n", + "Average Reward: 0.75\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O \n", + "X X \n", + " O \n", + "on move: O\n", + "X O \n", + "X X \n", + " O \n", + "on move: X\n", + "X O \n", + "X X \n", + " O O \n", + "on move: O\n", + "X O \n", + "X X X \n", + " O O \n", + "Episode 9, Total Reward: 1\n", + "Average Reward: 0.7777777777777778\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " O \n", + " O \n", + "on move: O\n", + "X X X \n", + " O \n", + " O \n", + "Episode 10, Total Reward: 1\n", + "Average Reward: 0.8\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + " \n", + "O X O \n", + " X \n", + "on move: O\n", + "X \n", + "O X O \n", + " X \n", + "on move: X\n", + "X O \n", + "O X O \n", + " X \n", + "on move: O\n", + "X O \n", + "O X O \n", + "X X \n", + "on move: X\n", + "X O O \n", + "O X O \n", + "X X \n", + "on move: O\n", + "X O O \n", + "O X O \n", + "X X X \n", + "Episode 11, Total Reward: 1\n", + "Average Reward: 0.8181818181818182\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + "X \n", + " O X \n", + " \n", + "on move: X\n", + "X \n", + " O X \n", + " O \n", + "on move: O\n", + "X \n", + " O X \n", + " O X \n", + "on move: X\n", + "X \n", + " O X \n", + "O O X \n", + "on move: O\n", + "X \n", + "X O X \n", + "O O X \n", + "on move: X\n", + "X O \n", + "X O X \n", + "O O X \n", + "Episode 12, Total Reward: -1\n", + "Average Reward: 0.6666666666666666\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + " \n", + "X O \n", + "on move: O\n", + "O X \n", + " X \n", + "X O \n", + "on move: X\n", + "O O X \n", + " X \n", + "X O \n", + "on move: O\n", + "O O X \n", + " X \n", + "X X O \n", + "on move: X\n", + "O O X \n", + "O X \n", + "X X O \n", + "on move: O\n", + "O O X \n", + "O X X \n", + "X X O \n", + "Episode 13, Total Reward: 1\n", + "Average Reward: 0.6923076923076923\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + "X X \n", + " X \n", + "O O \n", + "on move: X\n", + "X O X \n", + " X \n", + "O O \n", + "on move: O\n", + "X O X \n", + " X \n", + "O O X \n", + "Episode 14, Total Reward: 1\n", + "Average Reward: 0.7142857142857143\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " \n", + "O O \n", + "on move: O\n", + "X X \n", + " \n", + "O X O \n", + "on move: X\n", + "X X \n", + " O \n", + "O X O \n", + "on move: O\n", + "X X X \n", + " O \n", + "O X O \n", + "Episode 15, Total Reward: 1\n", + "Average Reward: 0.7333333333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X \n", + "O O \n", + " X \n", + "on move: X\n", + "X O X \n", + "O O \n", + " X \n", + "on move: O\n", + "X O X \n", + "O O X \n", + " X \n", + "on move: X\n", + "X O X \n", + "O O X \n", + "O X \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "O X X \n", + "Episode 16, Total Reward: 1\n", + "Average Reward: 0.75\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O \n", + " X \n", + "on move: X\n", + "X \n", + "O \n", + " O X \n", + "on move: O\n", + "X X \n", + "O \n", + " O X \n", + "on move: X\n", + "X X \n", + "O O \n", + " O X \n", + "on move: O\n", + "X X X \n", + "O O \n", + " O X \n", + "Episode 17, Total Reward: 1\n", + "Average Reward: 0.7647058823529411\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X X \n", + " \n", + " \n", + "on move: X\n", + "O X X \n", + "O \n", + " \n", + "on move: O\n", + "O X X \n", + "O \n", + " X \n", + "on move: X\n", + "O X X \n", + "O O \n", + " X \n", + "on move: O\n", + "O X X \n", + "O O \n", + " X X \n", + "on move: X\n", + "O X X \n", + "O O O \n", + " X X \n", + "Episode 18, Total Reward: -1\n", + "Average Reward: 0.6666666666666666\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O \n", + "X O \n", + "X X \n", + "on move: X\n", + " O \n", + "X O \n", + "X O X \n", + "on move: O\n", + " X O \n", + "X O \n", + "X O X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "X O X \n", + "X O X \n", + "Episode 19, Total Reward: 0\n", + "Average Reward: 0.631578947368421\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + "X X \n", + " X \n", + "on move: X\n", + "O O \n", + "X X O \n", + " X \n", + "on move: O\n", + "O O \n", + "X X O \n", + "X X \n", + "on move: X\n", + "O O O \n", + "X X O \n", + "X X \n", + "Episode 20, Total Reward: -1\n", + "Average Reward: 0.55\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X \n", + " O O \n", + "X \n", + "on move: O\n", + "X \n", + " O O \n", + "X X \n", + "on move: X\n", + "X \n", + " O O \n", + "X X O \n", + "on move: O\n", + "X X \n", + " O O \n", + "X X O \n", + "on move: X\n", + "X X \n", + "O O O \n", + "X X O \n", + "Episode 21, Total Reward: -1\n", + "Average Reward: 0.47619047619047616\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + "O O \n", + "X X \n", + " \n", + "on move: O\n", + "O X O \n", + "X X \n", + " \n", + "on move: X\n", + "O X O \n", + "X X \n", + " O \n", + "on move: O\n", + "O X O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "X O X \n", + "X O \n", + "Episode 22, Total Reward: -1\n", + "Average Reward: 0.4090909090909091\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X O \n", + "X O \n", + " \n", + "on move: O\n", + "X O X \n", + "X O \n", + " \n", + "on move: X\n", + "X O X \n", + "X O \n", + " O \n", + "on move: O\n", + "X O X \n", + "X O \n", + "X O \n", + "Episode 23, Total Reward: 1\n", + "Average Reward: 0.43478260869565216\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + "X X \n", + "O \n", + "on move: X\n", + " \n", + "X O X \n", + "O \n", + "on move: O\n", + " X \n", + "X O X \n", + "O \n", + "on move: X\n", + "O X \n", + "X O X \n", + "O \n", + "on move: O\n", + "O X X \n", + "X O X \n", + "O \n", + "on move: X\n", + "O X X \n", + "X O X \n", + "O O \n", + "Episode 24, Total Reward: -1\n", + "Average Reward: 0.375\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X \n", + " O X \n", + "on move: O\n", + "X O \n", + " X \n", + " O X \n", + "Episode 25, Total Reward: 1\n", + "Average Reward: 0.4\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + "O X \n", + " \n", + "on move: O\n", + "O X \n", + "O X \n", + "X \n", + "on move: X\n", + "O X \n", + "O X \n", + "X O \n", + "on move: O\n", + "O X X \n", + "O X \n", + "X O \n", + "on move: X\n", + "O X X \n", + "O O X \n", + "X O \n", + "Episode 26, Total Reward: -1\n", + "Average Reward: 0.34615384615384615\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + " X \n", + "X X \n", + "on move: X\n", + "O O \n", + " X \n", + "X O X \n", + "on move: O\n", + "O O \n", + " X X \n", + "X O X \n", + "on move: X\n", + "O O \n", + "O X X \n", + "X O X \n", + "on move: O\n", + "O O X \n", + "O X X \n", + "X O X \n", + "Episode 27, Total Reward: 1\n", + "Average Reward: 0.37037037037037035\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O X \n", + " \n", + "on move: X\n", + " \n", + "X O X \n", + " O \n", + "on move: O\n", + " X \n", + "X O X \n", + " O \n", + "on move: X\n", + "O X \n", + "X O X \n", + " O \n", + "on move: O\n", + "O X \n", + "X O X \n", + " O X \n", + "on move: X\n", + "O X O \n", + "X O X \n", + " O X \n", + "on move: O\n", + "O X O \n", + "X O X \n", + "X O X \n", + "Episode 28, Total Reward: 0\n", + "Average Reward: 0.35714285714285715\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O O \n", + " X \n", + " X \n", + "on move: O\n", + " O O \n", + " X \n", + "X X \n", + "on move: X\n", + "O O O \n", + " X \n", + "X X \n", + "Episode 29, Total Reward: -1\n", + "Average Reward: 0.3103448275862069\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + "X O \n", + "X \n", + " \n", + "on move: X\n", + "X O O \n", + "X \n", + " \n", + "on move: O\n", + "X O O \n", + "X \n", + "X \n", + "Episode 30, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X \n", + " O X \n", + "on move: O\n", + " X O \n", + " X \n", + " O X \n", + "on move: X\n", + "O X O \n", + " X \n", + " O X \n", + "on move: O\n", + "O X O \n", + "X X \n", + " O X \n", + "on move: X\n", + "O X O \n", + "X X \n", + "O O X \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "O O X \n", + "Episode 31, Total Reward: 1\n", + "Average Reward: 0.3548387096774194\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + "O \n", + "X O \n", + "on move: O\n", + " X \n", + "O X \n", + "X O \n", + "on move: X\n", + "O X \n", + "O X \n", + "X O \n", + "on move: O\n", + "O X X \n", + "O X \n", + "X O \n", + "on move: X\n", + "O X X \n", + "O O X \n", + "X O \n", + "Episode 32, Total Reward: -1\n", + "Average Reward: 0.3125\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O O \n", + " \n", + "on move: O\n", + "X X \n", + "X O O \n", + " \n", + "on move: X\n", + "X X O \n", + "X O O \n", + " \n", + "on move: O\n", + "X X O \n", + "X O O \n", + " X \n", + "on move: X\n", + "X X O \n", + "X O O \n", + "O X \n", + "Episode 33, Total Reward: -1\n", + "Average Reward: 0.2727272727272727\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + "X \n", + "X \n", + "on move: X\n", + " O \n", + "X O \n", + "X \n", + "on move: O\n", + " O X \n", + "X O \n", + "X \n", + "on move: X\n", + " O X \n", + "X O \n", + "X O \n", + "Episode 34, Total Reward: -1\n", + "Average Reward: 0.23529411764705882\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + " O \n", + "O X \n", + "X X \n", + "on move: X\n", + " O \n", + "O O X \n", + "X X \n", + "on move: O\n", + " O X \n", + "O O X \n", + "X X \n", + "Episode 35, Total Reward: 1\n", + "Average Reward: 0.2571428571428571\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X \n", + "X \n", + " O \n", + "on move: X\n", + "X \n", + "X O \n", + " O \n", + "on move: O\n", + "X \n", + "X O \n", + "X O \n", + "Episode 36, Total Reward: 1\n", + "Average Reward: 0.2777777777777778\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O \n", + "X X O \n", + " X \n", + "on move: X\n", + "O O \n", + "X X O \n", + " X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + " X \n", + "Episode 37, Total Reward: 1\n", + "Average Reward: 0.2972972972972973\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + "X \n", + "on move: X\n", + " X \n", + "O O \n", + "X \n", + "on move: O\n", + " X \n", + "O O \n", + "X X \n", + "on move: X\n", + "O X \n", + "O O \n", + "X X \n", + "on move: O\n", + "O X \n", + "O X O \n", + "X X \n", + "Episode 38, Total Reward: 1\n", + "Average Reward: 0.3157894736842105\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + "X \n", + " \n", + "O X \n", + "on move: X\n", + "X O \n", + " \n", + "O X \n", + "on move: O\n", + "X X O \n", + " \n", + "O X \n", + "on move: X\n", + "X X O \n", + " O \n", + "O X \n", + "on move: O\n", + "X X O \n", + "X O \n", + "O X \n", + "on move: X\n", + "X X O \n", + "X O O \n", + "O X \n", + "Episode 39, Total Reward: -1\n", + "Average Reward: 0.28205128205128205\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X \n", + "O X \n", + "on move: O\n", + "X O \n", + " X \n", + "O X \n", + "on move: X\n", + "X O O \n", + " X \n", + "O X \n", + "on move: O\n", + "X O O \n", + " X X \n", + "O X \n", + "on move: X\n", + "X O O \n", + "O X X \n", + "O X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "O X X \n", + "Episode 40, Total Reward: 1\n", + "Average Reward: 0.3\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O \n", + "X X O \n", + " X \n", + "on move: X\n", + " O O \n", + "X X O \n", + " X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + " X \n", + "on move: X\n", + "X O O \n", + "X X O \n", + "O X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + "O X X \n", + "Episode 41, Total Reward: 1\n", + "Average Reward: 0.3170731707317073\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O \n", + "O \n", + "X \n", + "on move: O\n", + "X O \n", + "O \n", + "X X \n", + "on move: X\n", + "X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X X O \n", + "O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + "O O O \n", + "X X \n", + "Episode 42, Total Reward: -1\n", + "Average Reward: 0.2857142857142857\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + "X \n", + "on move: X\n", + " O \n", + "O X \n", + "X \n", + "on move: O\n", + " X O \n", + "O X \n", + "X \n", + "on move: X\n", + "O X O \n", + "O X \n", + "X \n", + "on move: O\n", + "O X O \n", + "O X \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O O X \n", + "X X X \n", + "Episode 43, Total Reward: 1\n", + "Average Reward: 0.3023255813953488\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + " O X \n", + "on move: O\n", + " X \n", + " X O \n", + " O X \n", + "on move: X\n", + " X \n", + "O X O \n", + " O X \n", + "on move: O\n", + "X X \n", + "O X O \n", + " O X \n", + "Episode 44, Total Reward: 1\n", + "Average Reward: 0.3181818181818182\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + "X X \n", + " X \n", + "on move: X\n", + "O O \n", + "X X \n", + " X O \n", + "on move: O\n", + "O X O \n", + "X X \n", + " X O \n", + "on move: X\n", + "O X O \n", + "X O X \n", + " X O \n", + "Episode 45, Total Reward: -1\n", + "Average Reward: 0.28888888888888886\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X X \n", + "X O \n", + "O \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O \n", + "on move: O\n", + "O X X \n", + "X O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 46, Total Reward: 0\n", + "Average Reward: 0.2826086956521739\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O \n", + " O \n", + "X \n", + "on move: O\n", + "X O \n", + "X O \n", + "X \n", + "Episode 47, Total Reward: 1\n", + "Average Reward: 0.2978723404255319\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + " O X \n", + "on move: O\n", + " X \n", + " O \n", + "X O X \n", + "on move: X\n", + "O X \n", + " O \n", + "X O X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X O X \n", + "on move: X\n", + "O O X \n", + "X O \n", + "X O X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X O X \n", + "Episode 48, Total Reward: 1\n", + "Average Reward: 0.3125\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X \n", + " O \n", + "O X \n", + "on move: O\n", + "X \n", + " X O \n", + "O X \n", + "Episode 49, Total Reward: 1\n", + "Average Reward: 0.32653061224489793\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + " \n", + " X X \n", + "O O \n", + "on move: O\n", + " X \n", + " X X \n", + "O O \n", + "on move: X\n", + " X \n", + "O X X \n", + "O O \n", + "on move: O\n", + " X X \n", + "O X X \n", + "O O \n", + "on move: X\n", + " X X \n", + "O X X \n", + "O O O \n", + "Episode 50, Total Reward: -1\n", + "Average Reward: 0.3\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O O \n", + " \n", + "on move: O\n", + " X \n", + "X O O \n", + " X \n", + "on move: X\n", + " X \n", + "X O O \n", + "O X \n", + "on move: O\n", + " X X \n", + "X O O \n", + "O X \n", + "on move: X\n", + " X X \n", + "X O O \n", + "O O X \n", + "on move: O\n", + "X X X \n", + "X O O \n", + "O O X \n", + "Episode 51, Total Reward: 1\n", + "Average Reward: 0.3137254901960784\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + " O \n", + "X O \n", + "on move: O\n", + " X \n", + " O X \n", + "X O \n", + "on move: X\n", + " X O \n", + " O X \n", + "X O \n", + "on move: O\n", + " X O \n", + " O X \n", + "X X O \n", + "on move: X\n", + "O X O \n", + " O X \n", + "X X O \n", + "Episode 52, Total Reward: -1\n", + "Average Reward: 0.28846153846153844\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X \n", + "O O \n", + "X \n", + "on move: O\n", + "X \n", + "O O \n", + "X X \n", + "on move: X\n", + "X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X O \n", + "O O X \n", + "X X \n", + "on move: X\n", + "X O O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "X O O \n", + "O O X \n", + "X X X \n", + "Episode 53, Total Reward: 1\n", + "Average Reward: 0.3018867924528302\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X O \n", + " \n", + " \n", + "on move: O\n", + " X O \n", + " X \n", + " \n", + "on move: X\n", + " X O \n", + " X \n", + "O \n", + "on move: O\n", + " X O \n", + " X \n", + "O X \n", + "Episode 54, Total Reward: 1\n", + "Average Reward: 0.3148148148148148\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X X \n", + " \n", + "on move: X\n", + "O O \n", + " X X \n", + " \n", + "on move: O\n", + "O O \n", + "X X X \n", + " \n", + "Episode 55, Total Reward: 1\n", + "Average Reward: 0.32727272727272727\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X \n", + "X \n", + "on move: X\n", + "O O \n", + "X \n", + "X \n", + "on move: O\n", + "O O \n", + "X X \n", + "X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "X \n", + "on move: O\n", + "O X O \n", + "X O X \n", + "X \n", + "on move: X\n", + "O X O \n", + "X O X \n", + "X O \n", + "on move: O\n", + "O X O \n", + "X O X \n", + "X O X \n", + "Episode 56, Total Reward: 0\n", + "Average Reward: 0.32142857142857145\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + " X X \n", + "on move: X\n", + " \n", + " O \n", + "O X X \n", + "on move: O\n", + " \n", + "X O \n", + "O X X \n", + "on move: X\n", + " \n", + "X O O \n", + "O X X \n", + "on move: O\n", + " X \n", + "X O O \n", + "O X X \n", + "on move: X\n", + "O X \n", + "X O O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 57, Total Reward: 0\n", + "Average Reward: 0.3157894736842105\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " \n", + " O \n", + "X O X \n", + "on move: O\n", + " X \n", + " O \n", + "X O X \n", + "on move: X\n", + " X \n", + "O O \n", + "X O X \n", + "on move: O\n", + " X X \n", + "O O \n", + "X O X \n", + "on move: X\n", + " X X \n", + "O O O \n", + "X O X \n", + "Episode 58, Total Reward: -1\n", + "Average Reward: 0.29310344827586204\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X X \n", + " \n", + "O \n", + "on move: X\n", + "O X X \n", + " \n", + "O \n", + "on move: O\n", + "O X X \n", + " X \n", + "O \n", + "on move: X\n", + "O X X \n", + " X \n", + "O O \n", + "on move: O\n", + "O X X \n", + " X \n", + "O O X \n", + "on move: X\n", + "O X X \n", + "O X \n", + "O O X \n", + "Episode 59, Total Reward: -1\n", + "Average Reward: 0.2711864406779661\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " O X \n", + " O \n", + " X \n", + "on move: O\n", + " O X \n", + " O \n", + " X X \n", + "on move: X\n", + " O X \n", + "O O \n", + " X X \n", + "on move: O\n", + " O X \n", + "O X O \n", + " X X \n", + "on move: X\n", + " O X \n", + "O X O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "O X X \n", + "Episode 60, Total Reward: 1\n", + "Average Reward: 0.2833333333333333\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " \n", + "X \n", + "O X \n", + "on move: X\n", + "O \n", + "X \n", + "O X \n", + "on move: O\n", + "O \n", + "X \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X \n", + "O X X \n", + "on move: O\n", + "O X O \n", + "X \n", + "O X X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "O X X \n", + "Episode 61, Total Reward: -1\n", + "Average Reward: 0.26229508196721313\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X \n", + "O X \n", + "on move: O\n", + " O \n", + "X \n", + "O X X \n", + "on move: X\n", + " O \n", + "X O \n", + "O X X \n", + "on move: O\n", + "X O \n", + "X O \n", + "O X X \n", + "on move: X\n", + "X O \n", + "X O O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "O X X \n", + "Episode 62, Total Reward: 0\n", + "Average Reward: 0.25806451612903225\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X \n", + "O X \n", + "on move: X\n", + "O \n", + " X \n", + "O X \n", + "on move: O\n", + "O X \n", + " X \n", + "O X \n", + "Episode 63, Total Reward: 1\n", + "Average Reward: 0.2698412698412698\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " O \n", + " X \n", + "on move: O\n", + "X O \n", + " X O \n", + " X \n", + "Episode 64, Total Reward: 1\n", + "Average Reward: 0.28125\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O O X \n", + " X \n", + "on move: O\n", + " \n", + "O O X \n", + "X X \n", + "on move: X\n", + "O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "O X \n", + "O O X \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O O X \n", + "X X X \n", + "Episode 65, Total Reward: 1\n", + "Average Reward: 0.2923076923076923\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X O \n", + " X \n", + "X O \n", + "on move: X\n", + "X O O \n", + " X \n", + "X O \n", + "on move: O\n", + "X O O \n", + " X \n", + "X X O \n", + "on move: X\n", + "X O O \n", + "O X \n", + "X X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X X O \n", + "Episode 66, Total Reward: 0\n", + "Average Reward: 0.2878787878787879\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " \n", + "O X \n", + "on move: O\n", + "X O X \n", + " \n", + "O X \n", + "on move: X\n", + "X O X \n", + " \n", + "O O X \n", + "on move: O\n", + "X O X \n", + " X \n", + "O O X \n", + "Episode 67, Total Reward: 1\n", + "Average Reward: 0.29850746268656714\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O \n", + " X \n", + "on move: X\n", + "X \n", + "O O \n", + " X \n", + "on move: O\n", + "X \n", + "O O \n", + "X X \n", + "on move: X\n", + "X \n", + "O O O \n", + "X X \n", + "Episode 68, Total Reward: -1\n", + "Average Reward: 0.27941176470588236\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + " \n", + "O X X \n", + "on move: X\n", + " O \n", + " \n", + "O X X \n", + "on move: O\n", + " O \n", + " X \n", + "O X X \n", + "on move: X\n", + " O O \n", + " X \n", + "O X X \n", + "on move: O\n", + " O O \n", + " X X \n", + "O X X \n", + "on move: X\n", + "O O O \n", + " X X \n", + "O X X \n", + "Episode 69, Total Reward: -1\n", + "Average Reward: 0.2608695652173913\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O O \n", + " \n", + "X \n", + "on move: O\n", + "X O O \n", + " \n", + "X X \n", + "on move: X\n", + "X O O \n", + " O \n", + "X X \n", + "on move: O\n", + "X O O \n", + "X O \n", + "X X \n", + "Episode 70, Total Reward: 1\n", + "Average Reward: 0.2714285714285714\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " X O \n", + " \n", + "on move: X\n", + " X \n", + " X O \n", + " O \n", + "on move: O\n", + " X \n", + " X O \n", + "X O \n", + "on move: X\n", + " X \n", + " X O \n", + "X O O \n", + "on move: O\n", + " X X \n", + " X O \n", + "X O O \n", + "Episode 71, Total Reward: 1\n", + "Average Reward: 0.28169014084507044\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " O \n", + " X \n", + "on move: O\n", + "X X O \n", + " O \n", + " X \n", + "on move: X\n", + "X X O \n", + "O O \n", + " X \n", + "on move: O\n", + "X X O \n", + "O O \n", + " X X \n", + "on move: X\n", + "X X O \n", + "O O O \n", + " X X \n", + "Episode 72, Total Reward: -1\n", + "Average Reward: 0.2638888888888889\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " O X \n", + "on move: O\n", + " X \n", + " \n", + " O X \n", + "on move: X\n", + " X \n", + " \n", + "O O X \n", + "on move: O\n", + " X \n", + " X \n", + "O O X \n", + "on move: X\n", + " X O \n", + " X \n", + "O O X \n", + "on move: O\n", + "X X O \n", + " X \n", + "O O X \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O O X \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O O X \n", + "Episode 73, Total Reward: 1\n", + "Average Reward: 0.273972602739726\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + "X X \n", + "O \n", + "on move: X\n", + " \n", + "X X O \n", + "O \n", + "on move: O\n", + " \n", + "X X O \n", + "O X \n", + "on move: X\n", + " O \n", + "X X O \n", + "O X \n", + "on move: O\n", + " O \n", + "X X O \n", + "O X X \n", + "on move: X\n", + " O O \n", + "X X O \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + "O X X \n", + "Episode 74, Total Reward: 1\n", + "Average Reward: 0.28378378378378377\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X \n", + " O O \n", + "on move: O\n", + " X \n", + "X X \n", + " O O \n", + "on move: X\n", + "O X \n", + "X X \n", + " O O \n", + "on move: O\n", + "O X \n", + "X X X \n", + " O O \n", + "Episode 75, Total Reward: 1\n", + "Average Reward: 0.29333333333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " O X \n", + " X \n", + " O \n", + "on move: O\n", + " O X \n", + " X X \n", + " O \n", + "on move: X\n", + " O X \n", + " X X \n", + "O O \n", + "on move: O\n", + " O X \n", + "X X X \n", + "O O \n", + "Episode 76, Total Reward: 1\n", + "Average Reward: 0.3026315789473684\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " \n", + "X \n", + "O X \n", + "on move: X\n", + "O \n", + "X \n", + "O X \n", + "on move: O\n", + "O X \n", + "X \n", + "O X \n", + "on move: X\n", + "O X O \n", + "X \n", + "O X \n", + "on move: O\n", + "O X O \n", + "X X \n", + "O X \n", + "on move: X\n", + "O X O \n", + "X O X \n", + "O X \n", + "Episode 77, Total Reward: -1\n", + "Average Reward: 0.2857142857142857\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X \n", + "O O \n", + "on move: O\n", + " X \n", + "X X \n", + "O O \n", + "on move: X\n", + " X \n", + "X X O \n", + "O O \n", + "on move: O\n", + " X \n", + "X X O \n", + "O X O \n", + "Episode 78, Total Reward: 1\n", + "Average Reward: 0.2948717948717949\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + "X X \n", + "O \n", + "on move: X\n", + " \n", + "X X \n", + "O O \n", + "on move: O\n", + " \n", + "X X X \n", + "O O \n", + "Episode 79, Total Reward: 1\n", + "Average Reward: 0.3037974683544304\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + " \n", + " X O \n", + "O X \n", + "on move: O\n", + " X \n", + " X O \n", + "O X \n", + "on move: X\n", + " X \n", + "O X O \n", + "O X \n", + "on move: O\n", + " X X \n", + "O X O \n", + "O X \n", + "Episode 80, Total Reward: 1\n", + "Average Reward: 0.3125\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + "O O \n", + " \n", + "X X \n", + "on move: O\n", + "O O \n", + " X \n", + "X X \n", + "on move: X\n", + "O O O \n", + " X \n", + "X X \n", + "Episode 81, Total Reward: -1\n", + "Average Reward: 0.2962962962962963\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + " X O \n", + "O \n", + "X X \n", + "on move: X\n", + " X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X X O \n", + "O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X O X \n", + "Episode 82, Total Reward: 0\n", + "Average Reward: 0.2926829268292683\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O O \n", + "X X \n", + " \n", + "on move: O\n", + " O O \n", + "X X \n", + " X \n", + "on move: X\n", + " O O \n", + "X X \n", + " X O \n", + "on move: O\n", + " O O \n", + "X X X \n", + " X O \n", + "Episode 83, Total Reward: 1\n", + "Average Reward: 0.30120481927710846\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " \n", + " X \n", + "X O O \n", + "on move: O\n", + " \n", + "X X \n", + "X O O \n", + "on move: X\n", + "O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O X \n", + "X X \n", + "X O O \n", + "on move: X\n", + "O X O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "X O O \n", + "Episode 84, Total Reward: 1\n", + "Average Reward: 0.30952380952380953\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X O \n", + " X \n", + "on move: O\n", + " O \n", + "X X O \n", + " X \n", + "on move: X\n", + " O \n", + "X X O \n", + " O X \n", + "on move: O\n", + " O \n", + "X X O \n", + "X O X \n", + "on move: X\n", + "O O \n", + "X X O \n", + "X O X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X O X \n", + "Episode 85, Total Reward: 1\n", + "Average Reward: 0.3176470588235294\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + " O X \n", + "on move: X\n", + "O \n", + "X \n", + " O X \n", + "on move: O\n", + "O \n", + "X \n", + "X O X \n", + "on move: X\n", + "O \n", + "X O \n", + "X O X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X O X \n", + "on move: X\n", + "O X \n", + "X O O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "X O X \n", + "Episode 86, Total Reward: 0\n", + "Average Reward: 0.313953488372093\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O O \n", + " X \n", + "on move: O\n", + " X \n", + " O O \n", + "X X \n", + "on move: X\n", + "O X \n", + " O O \n", + "X X \n", + "on move: O\n", + "O X \n", + "X O O \n", + "X X \n", + "on move: X\n", + "O X \n", + "X O O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "X O X \n", + "Episode 87, Total Reward: 0\n", + "Average Reward: 0.3103448275862069\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + "O X \n", + " O \n", + " X \n", + "on move: O\n", + "O X X \n", + " O \n", + " X \n", + "on move: X\n", + "O X X \n", + " O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O X O \n", + "Episode 88, Total Reward: -1\n", + "Average Reward: 0.29545454545454547\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + "X \n", + " \n", + "on move: X\n", + "X O \n", + "X \n", + " O \n", + "on move: O\n", + "X O X \n", + "X \n", + " O \n", + "on move: X\n", + "X O X \n", + "X O \n", + " O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + " O \n", + "on move: X\n", + "X O X \n", + "X X O \n", + " O O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "X O O \n", + "Episode 89, Total Reward: 1\n", + "Average Reward: 0.30337078651685395\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X \n", + "O O \n", + " X \n", + "on move: O\n", + "X \n", + "O O \n", + "X X \n", + "on move: X\n", + "X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X O X \n", + "O O \n", + "X X \n", + "on move: X\n", + "X O X \n", + "O O O \n", + "X X \n", + "Episode 90, Total Reward: -1\n", + "Average Reward: 0.28888888888888886\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X O \n", + " O \n", + " X \n", + "on move: O\n", + "X O \n", + "X O \n", + " X \n", + "on move: X\n", + "X O \n", + "X O \n", + " X O \n", + "on move: O\n", + "X O \n", + "X X O \n", + " X O \n", + "on move: X\n", + "X O O \n", + "X X O \n", + " X O \n", + "Episode 91, Total Reward: -1\n", + "Average Reward: 0.27472527472527475\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O X O \n", + " X \n", + "on move: O\n", + " \n", + "O X O \n", + "X X \n", + "on move: X\n", + " O \n", + "O X O \n", + "X X \n", + "on move: O\n", + " O \n", + "O X O \n", + "X X X \n", + "Episode 92, Total Reward: 1\n", + "Average Reward: 0.2826086956521739\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + "X \n", + "on move: X\n", + " O X \n", + "O \n", + "X \n", + "on move: O\n", + " O X \n", + "O X \n", + "X \n", + "Episode 93, Total Reward: 1\n", + "Average Reward: 0.2903225806451613\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X \n", + "X X \n", + "O \n", + "on move: X\n", + "O X \n", + "X X \n", + "O O \n", + "on move: O\n", + "O X X \n", + "X X \n", + "O O \n", + "on move: X\n", + "O X X \n", + "X X \n", + "O O O \n", + "Episode 94, Total Reward: -1\n", + "Average Reward: 0.2765957446808511\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O X \n", + "X O \n", + " X \n", + "on move: X\n", + " O X \n", + "X O O \n", + " X \n", + "on move: O\n", + " O X \n", + "X O O \n", + " X X \n", + "on move: X\n", + " O X \n", + "X O O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "O X X \n", + "Episode 95, Total Reward: 0\n", + "Average Reward: 0.2736842105263158\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + " X \n", + "X X \n", + "O O \n", + "on move: X\n", + " X O \n", + "X X \n", + "O O \n", + "on move: O\n", + " X O \n", + "X X X \n", + "O O \n", + "Episode 96, Total Reward: 1\n", + "Average Reward: 0.28125\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X O \n", + "X O \n", + " \n", + "on move: O\n", + "X O \n", + "X O \n", + "X \n", + "Episode 97, Total Reward: 1\n", + "Average Reward: 0.28865979381443296\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + " X \n", + " O X \n", + "on move: O\n", + "O X \n", + " X \n", + " O X \n", + "on move: X\n", + "O X \n", + " X \n", + "O O X \n", + "on move: O\n", + "O X \n", + "X X \n", + "O O X \n", + "on move: X\n", + "O X \n", + "X O X \n", + "O O X \n", + "on move: O\n", + "O X X \n", + "X O X \n", + "O O X \n", + "Episode 98, Total Reward: 1\n", + "Average Reward: 0.29591836734693877\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X \n", + "O X \n", + "on move: X\n", + "O \n", + " X \n", + "O X \n", + "on move: O\n", + "O \n", + "X X \n", + "O X \n", + "on move: X\n", + "O O \n", + "X X \n", + "O X \n", + "on move: O\n", + "O X O \n", + "X X \n", + "O X \n", + "on move: X\n", + "O X O \n", + "X X O \n", + "O X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + "O X X \n", + "Episode 99, Total Reward: 1\n", + "Average Reward: 0.30303030303030304\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " \n", + "O X \n", + "X O \n", + "on move: O\n", + " \n", + "O X \n", + "X X O \n", + "on move: X\n", + " \n", + "O O X \n", + "X X O \n", + "on move: O\n", + " X \n", + "O O X \n", + "X X O \n", + "on move: X\n", + " O X \n", + "O O X \n", + "X X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 100, Total Reward: 0\n", + "Average Reward: 0.3\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + " X \n", + "O X \n", + "on move: X\n", + " \n", + " X O \n", + "O X \n", + "on move: O\n", + " \n", + " X O \n", + "O X X \n", + "on move: X\n", + " \n", + "O X O \n", + "O X X \n", + "on move: O\n", + "X \n", + "O X O \n", + "O X X \n", + "Episode 101, Total Reward: 1\n", + "Average Reward: 0.3069306930693069\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + " O \n", + "O \n", + "X X X \n", + "Episode 102, Total Reward: 1\n", + "Average Reward: 0.3137254901960784\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " \n", + "X X \n", + "on move: X\n", + "O O \n", + " \n", + "X X \n", + "on move: O\n", + "O O \n", + "X \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O O \n", + "X X X \n", + "Episode 103, Total Reward: 1\n", + "Average Reward: 0.32038834951456313\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + "X O \n", + "O \n", + "X X \n", + "on move: X\n", + "X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X X O \n", + "O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + "O O O \n", + "X X \n", + "Episode 104, Total Reward: -1\n", + "Average Reward: 0.3076923076923077\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X X \n", + "O O \n", + " \n", + "Episode 105, Total Reward: 1\n", + "Average Reward: 0.3142857142857143\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + "X \n", + "O X \n", + " \n", + "on move: X\n", + "X O \n", + "O X \n", + " \n", + "on move: O\n", + "X O \n", + "O X \n", + "X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "X \n", + "on move: O\n", + "X O O \n", + "O X \n", + "X X \n", + "on move: X\n", + "X O O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "X O O \n", + "O O X \n", + "X X X \n", + "Episode 106, Total Reward: 1\n", + "Average Reward: 0.32075471698113206\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " O \n", + " X \n", + "on move: O\n", + "X O \n", + " O \n", + " X X \n", + "on move: X\n", + "X O \n", + "O O \n", + " X X \n", + "on move: O\n", + "X O \n", + "O X O \n", + " X X \n", + "Episode 107, Total Reward: 1\n", + "Average Reward: 0.32710280373831774\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + " X O \n", + "on move: X\n", + " \n", + "X \n", + "O X O \n", + "on move: O\n", + " \n", + "X X \n", + "O X O \n", + "on move: X\n", + " O \n", + "X X \n", + "O X O \n", + "on move: O\n", + " O \n", + "X X X \n", + "O X O \n", + "Episode 108, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " O X \n", + " \n", + " \n", + "on move: O\n", + " O X \n", + " X \n", + " \n", + "on move: X\n", + " O X \n", + " X \n", + " O \n", + "on move: O\n", + "X O X \n", + " X \n", + " O \n", + "on move: X\n", + "X O X \n", + " X \n", + "O O \n", + "on move: O\n", + "X O X \n", + "X X \n", + "O O \n", + "on move: X\n", + "X O X \n", + "X X O \n", + "O O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 109, Total Reward: 0\n", + "Average Reward: 0.3302752293577982\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + "O \n", + " X \n", + "on move: O\n", + "X O \n", + "O X \n", + " X \n", + "Episode 110, Total Reward: 1\n", + "Average Reward: 0.33636363636363636\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + " O X \n", + " O \n", + "X \n", + "on move: O\n", + " O X \n", + " O \n", + "X X \n", + "on move: X\n", + "O O X \n", + " O \n", + "X X \n", + "on move: O\n", + "O O X \n", + " X O \n", + "X X \n", + "Episode 111, Total Reward: 1\n", + "Average Reward: 0.34234234234234234\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + " O \n", + "O X \n", + " X X \n", + "on move: X\n", + " O \n", + "O O X \n", + " X X \n", + "on move: O\n", + " O \n", + "O O X \n", + "X X X \n", + "Episode 112, Total Reward: 1\n", + "Average Reward: 0.3482142857142857\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + " \n", + "O X X \n", + "O \n", + "on move: O\n", + " \n", + "O X X \n", + "O X \n", + "on move: X\n", + " O \n", + "O X X \n", + "O X \n", + "on move: O\n", + " O X \n", + "O X X \n", + "O X \n", + "on move: X\n", + " O X \n", + "O X X \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + "O X O \n", + "Episode 113, Total Reward: 0\n", + "Average Reward: 0.34513274336283184\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X \n", + "O O \n", + "on move: O\n", + " X X \n", + "X \n", + "O O \n", + "on move: X\n", + " X X \n", + "X \n", + "O O O \n", + "Episode 114, Total Reward: -1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " \n", + "O O \n", + "on move: O\n", + "X X \n", + " X \n", + "O O \n", + "on move: X\n", + "X X O \n", + " X \n", + "O O \n", + "on move: O\n", + "X X O \n", + "X X \n", + "O O \n", + "on move: X\n", + "X X O \n", + "X O X \n", + "O O \n", + "Episode 115, Total Reward: -1\n", + "Average Reward: 0.3217391304347826\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + " O O \n", + "on move: O\n", + " X \n", + " X X \n", + " O O \n", + "on move: X\n", + " X \n", + " X X \n", + "O O O \n", + "Episode 116, Total Reward: -1\n", + "Average Reward: 0.3103448275862069\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + "O \n", + "X X \n", + " O \n", + "on move: O\n", + "O \n", + "X X \n", + " X O \n", + "on move: X\n", + "O O \n", + "X X \n", + " X O \n", + "on move: O\n", + "O O X \n", + "X X \n", + " X O \n", + "on move: X\n", + "O O X \n", + "X X O \n", + " X O \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X X O \n", + "Episode 117, Total Reward: 1\n", + "Average Reward: 0.3162393162393162\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X \n", + "X \n", + "on move: X\n", + "O \n", + "X \n", + "X O \n", + "on move: O\n", + "O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O \n", + "X X \n", + "X O \n", + "on move: O\n", + "O O X \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O X \n", + "X O X \n", + "X O \n", + "Episode 118, Total Reward: -1\n", + "Average Reward: 0.3050847457627119\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " X \n", + " \n", + "O X \n", + "on move: X\n", + " X \n", + " O \n", + "O X \n", + "on move: O\n", + "X X \n", + " O \n", + "O X \n", + "on move: X\n", + "X X \n", + "O O \n", + "O X \n", + "on move: O\n", + "X X \n", + "O O \n", + "O X X \n", + "on move: X\n", + "X X O \n", + "O O \n", + "O X X \n", + "Episode 119, Total Reward: -1\n", + "Average Reward: 0.29411764705882354\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " O X \n", + " \n", + "X O \n", + "on move: O\n", + "X O X \n", + " \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O \n", + "X O \n", + "on move: O\n", + "X O X \n", + "O X \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O O X \n", + "X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 120, Total Reward: 0\n", + "Average Reward: 0.2916666666666667\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O X \n", + " X O \n", + " X \n", + "on move: X\n", + "O X \n", + "O X O \n", + " X \n", + "on move: O\n", + "O X \n", + "O X O \n", + " X X \n", + "Episode 121, Total Reward: 1\n", + "Average Reward: 0.2975206611570248\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O \n", + "X O \n", + "on move: X\n", + " X \n", + "X O \n", + "X O O \n", + "on move: O\n", + "X X \n", + "X O \n", + "X O O \n", + "Episode 122, Total Reward: 1\n", + "Average Reward: 0.30327868852459017\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + "X \n", + "X \n", + "on move: X\n", + "O \n", + "X O \n", + "X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "X \n", + "on move: O\n", + "O X O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O O \n", + "X X X \n", + "Episode 123, Total Reward: 1\n", + "Average Reward: 0.3089430894308943\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + " O \n", + "X O \n", + "on move: O\n", + " X \n", + " O \n", + "X O X \n", + "on move: X\n", + "O X \n", + " O \n", + "X O X \n", + "on move: O\n", + "O X \n", + " X O \n", + "X O X \n", + "Episode 124, Total Reward: 1\n", + "Average Reward: 0.31451612903225806\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + " \n", + "X O \n", + " X O \n", + "on move: O\n", + "X \n", + "X O \n", + " X O \n", + "on move: X\n", + "X \n", + "X O O \n", + " X O \n", + "on move: O\n", + "X \n", + "X O O \n", + "X X O \n", + "Episode 125, Total Reward: 1\n", + "Average Reward: 0.32\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " O \n", + "X \n", + "X O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "X O \n", + "X X \n", + "X O O \n", + "Episode 126, Total Reward: 1\n", + "Average Reward: 0.3253968253968254\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " \n", + " O X \n", + "on move: O\n", + "X O \n", + " X \n", + " O X \n", + "on move: X\n", + "X O \n", + " X \n", + "O O X \n", + "on move: O\n", + "X O X \n", + " X \n", + "O O X \n", + "Episode 127, Total Reward: 1\n", + "Average Reward: 0.33070866141732286\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X \n", + "O O \n", + "on move: O\n", + "X X \n", + "X \n", + "O O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X X \n", + "X O \n", + "O O \n", + "Episode 128, Total Reward: 1\n", + "Average Reward: 0.3359375\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " X O \n", + " \n", + "on move: X\n", + " X \n", + "O X O \n", + " \n", + "on move: O\n", + " X \n", + "O X O \n", + "X \n", + "on move: X\n", + " X \n", + "O X O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O X O \n", + "X O \n", + "on move: X\n", + "X X \n", + "O X O \n", + "X O O \n", + "on move: O\n", + "X X X \n", + "O X O \n", + "X O O \n", + "Episode 129, Total Reward: 1\n", + "Average Reward: 0.34108527131782945\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X X \n", + " \n", + "on move: X\n", + " O O \n", + " X X \n", + " \n", + "on move: O\n", + "X O O \n", + " X X \n", + " \n", + "on move: X\n", + "X O O \n", + " X X \n", + " O \n", + "on move: O\n", + "X O O \n", + "X X X \n", + " O \n", + "Episode 130, Total Reward: 1\n", + "Average Reward: 0.34615384615384615\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + "X X \n", + "X O \n", + " O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X \n", + "X O X \n", + "O O \n", + "on move: X\n", + "X X \n", + "X O X \n", + "O O O \n", + "Episode 131, Total Reward: -1\n", + "Average Reward: 0.33587786259541985\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " \n", + "X O \n", + "X O \n", + "on move: O\n", + " X \n", + "X O \n", + "X O \n", + "on move: X\n", + " X O \n", + "X O \n", + "X O \n", + "on move: O\n", + "X X O \n", + "X O \n", + "X O \n", + "Episode 132, Total Reward: 1\n", + "Average Reward: 0.3409090909090909\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + " \n", + " X O \n", + "O X \n", + "on move: O\n", + " \n", + "X X O \n", + "O X \n", + "on move: X\n", + " \n", + "X X O \n", + "O X O \n", + "on move: O\n", + " X \n", + "X X O \n", + "O X O \n", + "on move: X\n", + " O X \n", + "X X O \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 133, Total Reward: 0\n", + "Average Reward: 0.3383458646616541\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + "X \n", + " X \n", + "on move: X\n", + "O \n", + "X \n", + "O X \n", + "on move: O\n", + "O X \n", + "X \n", + "O X \n", + "on move: X\n", + "O X \n", + "X O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O X O \n", + "Episode 134, Total Reward: -1\n", + "Average Reward: 0.3283582089552239\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X \n", + "X \n", + " O \n", + "on move: X\n", + "X \n", + "X O \n", + " O \n", + "on move: O\n", + "X \n", + "X O \n", + " X O \n", + "on move: X\n", + "X \n", + "X O \n", + "O X O \n", + "on move: O\n", + "X \n", + "X X O \n", + "O X O \n", + "on move: X\n", + "X O \n", + "X X O \n", + "O X O \n", + "Episode 135, Total Reward: -1\n", + "Average Reward: 0.31851851851851853\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X \n", + "X X \n", + "O \n", + "on move: X\n", + "O X \n", + "X X \n", + "O O \n", + "on move: O\n", + "O X \n", + "X X \n", + "O O X \n", + "on move: X\n", + "O X O \n", + "X X \n", + "O O X \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "O O X \n", + "Episode 136, Total Reward: 1\n", + "Average Reward: 0.3235294117647059\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X X \n", + " \n", + "on move: X\n", + "O O \n", + " X X \n", + " \n", + "on move: O\n", + "O O \n", + " X X \n", + " X \n", + "on move: X\n", + "O O O \n", + " X X \n", + " X \n", + "Episode 137, Total Reward: -1\n", + "Average Reward: 0.31386861313868614\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O X \n", + " X \n", + " \n", + "on move: X\n", + " O X \n", + " X \n", + " O \n", + "on move: O\n", + " O X \n", + " X \n", + "X O \n", + "on move: X\n", + "O O X \n", + " X \n", + "X O \n", + "on move: O\n", + "O O X \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O X \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O O X \n", + "X X X \n", + "X O O \n", + "Episode 138, Total Reward: 1\n", + "Average Reward: 0.3188405797101449\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O O \n", + " \n", + "X X \n", + "on move: O\n", + " O O \n", + "X \n", + "X X \n", + "on move: X\n", + " O O \n", + "X O \n", + "X X \n", + "on move: O\n", + " O O \n", + "X O \n", + "X X X \n", + "Episode 139, Total Reward: 1\n", + "Average Reward: 0.3237410071942446\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X \n", + " \n", + "X O \n", + "on move: X\n", + "X \n", + " O \n", + "X O \n", + "on move: O\n", + "X X \n", + " O \n", + "X O \n", + "on move: X\n", + "X O X \n", + " O \n", + "X O \n", + "on move: O\n", + "X O X \n", + "X O \n", + "X O \n", + "Episode 140, Total Reward: 1\n", + "Average Reward: 0.32857142857142857\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X O \n", + " \n", + "O \n", + "on move: O\n", + "X X O \n", + " X \n", + "O \n", + "on move: X\n", + "X X O \n", + " X \n", + "O O \n", + "on move: O\n", + "X X O \n", + " X \n", + "O O X \n", + "Episode 141, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O O \n", + " \n", + "X X \n", + "on move: O\n", + " O O \n", + " \n", + "X X X \n", + "Episode 142, Total Reward: 1\n", + "Average Reward: 0.3380281690140845\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " \n", + "X O \n", + "X O \n", + "on move: O\n", + " X \n", + "X O \n", + "X O \n", + "on move: X\n", + " X O \n", + "X O \n", + "X O \n", + "Episode 143, Total Reward: -1\n", + "Average Reward: 0.32867132867132864\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + " O \n", + "X X X \n", + "Episode 144, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " \n", + " X X \n", + "on move: X\n", + "O \n", + " \n", + "O X X \n", + "on move: O\n", + "O \n", + " X \n", + "O X X \n", + "on move: X\n", + "O \n", + "O X \n", + "O X X \n", + "Episode 145, Total Reward: -1\n", + "Average Reward: 0.32413793103448274\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X X \n", + " X \n", + "O \n", + "on move: X\n", + "O X X \n", + " X \n", + "O O \n", + "on move: O\n", + "O X X \n", + " X \n", + "O X O \n", + "Episode 146, Total Reward: 1\n", + "Average Reward: 0.3287671232876712\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " X O \n", + " \n", + "on move: X\n", + "X \n", + "O X O \n", + " \n", + "on move: O\n", + "X \n", + "O X O \n", + " X \n", + "on move: X\n", + "X O \n", + "O X O \n", + " X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + " X \n", + "on move: X\n", + "X O X \n", + "O X O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "X X O \n", + "Episode 147, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " \n", + " O X \n", + "X O \n", + "on move: O\n", + " \n", + "X O X \n", + "X O \n", + "on move: X\n", + " \n", + "X O X \n", + "X O O \n", + "on move: O\n", + " X \n", + "X O X \n", + "X O O \n", + "on move: X\n", + "O X \n", + "X O X \n", + "X O O \n", + "Episode 148, Total Reward: -1\n", + "Average Reward: 0.32432432432432434\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X X \n", + " \n", + "on move: X\n", + "O \n", + "O X X \n", + " \n", + "on move: O\n", + "O \n", + "O X X \n", + "X \n", + "on move: X\n", + "O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "O X \n", + "O X X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 149, Total Reward: 0\n", + "Average Reward: 0.3221476510067114\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X \n", + " O \n", + "O X \n", + "on move: O\n", + "X X \n", + " O \n", + "O X \n", + "on move: X\n", + "X X \n", + " O O \n", + "O X \n", + "on move: O\n", + "X X \n", + "X O O \n", + "O X \n", + "on move: X\n", + "X O X \n", + "X O O \n", + "O X \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "O X X \n", + "Episode 150, Total Reward: 0\n", + "Average Reward: 0.32\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X X \n", + " X \n", + "O \n", + "on move: X\n", + "O X X \n", + "O X \n", + "O \n", + "Episode 151, Total Reward: -1\n", + "Average Reward: 0.31125827814569534\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + "O O \n", + " \n", + "X X \n", + "on move: O\n", + "O O \n", + "X \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O \n", + "X X \n", + "on move: O\n", + "O O X \n", + "X O \n", + "X X \n", + "on move: X\n", + "O O X \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O O X \n", + "X O O \n", + "X X X \n", + "Episode 152, Total Reward: 1\n", + "Average Reward: 0.3157894736842105\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " X \n", + "O X \n", + " \n", + "on move: X\n", + " X O \n", + "O X \n", + " \n", + "on move: O\n", + " X O \n", + "O X X \n", + " \n", + "on move: X\n", + " X O \n", + "O X X \n", + " O \n", + "on move: O\n", + " X O \n", + "O X X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X X O \n", + "Episode 153, Total Reward: 1\n", + "Average Reward: 0.3202614379084967\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + " X \n", + "X X \n", + "on move: X\n", + "O O \n", + " O X \n", + "X X \n", + "on move: O\n", + "O O \n", + "X O X \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "X X O \n", + "Episode 154, Total Reward: -1\n", + "Average Reward: 0.3116883116883117\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " X \n", + "O X \n", + " \n", + "on move: X\n", + " X \n", + "O X \n", + " O \n", + "on move: O\n", + " X X \n", + "O X \n", + " O \n", + "on move: X\n", + " X X \n", + "O X \n", + "O O \n", + "on move: O\n", + "X X X \n", + "O X \n", + "O O \n", + "Episode 155, Total Reward: 1\n", + "Average Reward: 0.3161290322580645\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O O \n", + " \n", + " X \n", + "on move: O\n", + "X O O \n", + " X \n", + " X \n", + "on move: X\n", + "X O O \n", + " X \n", + "O X \n", + "on move: O\n", + "X O O \n", + " X X \n", + "O X \n", + "on move: X\n", + "X O O \n", + " X X \n", + "O X O \n", + "on move: O\n", + "X O O \n", + "X X X \n", + "O X O \n", + "Episode 156, Total Reward: 1\n", + "Average Reward: 0.32051282051282054\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X \n", + " O O \n", + " X \n", + "on move: O\n", + "X \n", + " O O \n", + "X X \n", + "on move: X\n", + "X O \n", + " O O \n", + "X X \n", + "on move: O\n", + "X O \n", + " O O \n", + "X X X \n", + "Episode 157, Total Reward: 1\n", + "Average Reward: 0.3248407643312102\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " O X \n", + "X \n", + "on move: X\n", + " O \n", + " O X \n", + "X \n", + "on move: O\n", + " O \n", + " O X \n", + "X X \n", + "on move: X\n", + "O O \n", + " O X \n", + "X X \n", + "on move: O\n", + "O O \n", + " O X \n", + "X X X \n", + "Episode 158, Total Reward: 1\n", + "Average Reward: 0.3291139240506329\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + "O X \n", + "X \n", + "on move: O\n", + " O \n", + "O X \n", + "X X \n", + "on move: X\n", + " O \n", + "O X \n", + "X X O \n", + "on move: O\n", + " O \n", + "O X X \n", + "X X O \n", + "on move: X\n", + " O O \n", + "O X X \n", + "X X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X X O \n", + "Episode 159, Total Reward: 0\n", + "Average Reward: 0.3270440251572327\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " \n", + "X \n", + "X O O \n", + "on move: O\n", + " X \n", + "X \n", + "X O O \n", + "on move: X\n", + " X \n", + "X O \n", + "X O O \n", + "on move: O\n", + " X X \n", + "X O \n", + "X O O \n", + "on move: X\n", + " X X \n", + "X O O \n", + "X O O \n", + "on move: O\n", + "X X X \n", + "X O O \n", + "X O O \n", + "Episode 160, Total Reward: 1\n", + "Average Reward: 0.33125\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " X O \n", + " \n", + " X \n", + "on move: X\n", + " X O \n", + " \n", + " O X \n", + "on move: O\n", + "X X O \n", + " \n", + " O X \n", + "on move: X\n", + "X X O \n", + "O \n", + " O X \n", + "on move: O\n", + "X X O \n", + "O \n", + "X O X \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X X O \n", + "O X O \n", + "X O X \n", + "Episode 161, Total Reward: 1\n", + "Average Reward: 0.33540372670807456\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X \n", + " O \n", + "O \n", + "on move: O\n", + "X X \n", + " X O \n", + "O \n", + "on move: X\n", + "X X \n", + " X O \n", + "O O \n", + "on move: O\n", + "X X \n", + "X X O \n", + "O O \n", + "on move: X\n", + "X O X \n", + "X X O \n", + "O O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O O X \n", + "Episode 162, Total Reward: 1\n", + "Average Reward: 0.3395061728395062\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X X \n", + " O \n", + "on move: X\n", + " \n", + "O X X \n", + " O \n", + "on move: O\n", + " \n", + "O X X \n", + "X O \n", + "on move: X\n", + " O \n", + "O X X \n", + "X O \n", + "on move: O\n", + " O \n", + "O X X \n", + "X X O \n", + "on move: X\n", + " O O \n", + "O X X \n", + "X X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X X O \n", + "Episode 163, Total Reward: 0\n", + "Average Reward: 0.3374233128834356\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + " O \n", + "O X \n", + "X X \n", + "on move: X\n", + " O \n", + "O O X \n", + "X X \n", + "on move: O\n", + " O \n", + "O O X \n", + "X X X \n", + "Episode 164, Total Reward: 1\n", + "Average Reward: 0.34146341463414637\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + "O \n", + "X X \n", + " O \n", + "on move: O\n", + "O \n", + "X X \n", + " O X \n", + "on move: X\n", + "O O \n", + "X X \n", + " O X \n", + "on move: O\n", + "O O X \n", + "X X \n", + " O X \n", + "on move: X\n", + "O O X \n", + "X X \n", + "O O X \n", + "on move: O\n", + "O O X \n", + "X X X \n", + "O O X \n", + "Episode 165, Total Reward: 1\n", + "Average Reward: 0.34545454545454546\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + " O \n", + " X \n", + "on move: O\n", + "O X X \n", + " O \n", + " X \n", + "on move: X\n", + "O X X \n", + " O O \n", + " X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + " X \n", + "on move: X\n", + "O X X \n", + "X O O \n", + "O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 166, Total Reward: 0\n", + "Average Reward: 0.3433734939759036\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O O \n", + " X \n", + "on move: O\n", + " X \n", + " O O \n", + "X X \n", + "on move: X\n", + " X \n", + "O O O \n", + "X X \n", + "Episode 167, Total Reward: -1\n", + "Average Reward: 0.33532934131736525\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " O X \n", + " X \n", + "on move: O\n", + " O \n", + "X O X \n", + " X \n", + "on move: X\n", + " O O \n", + "X O X \n", + " X \n", + "on move: O\n", + " O O \n", + "X O X \n", + " X X \n", + "on move: X\n", + "O O O \n", + "X O X \n", + " X X \n", + "Episode 168, Total Reward: -1\n", + "Average Reward: 0.3273809523809524\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X X \n", + " \n", + "on move: X\n", + "O \n", + "X X \n", + "O \n", + "on move: O\n", + "O \n", + "X X \n", + "O X \n", + "on move: X\n", + "O O \n", + "X X \n", + "O X \n", + "on move: O\n", + "O O \n", + "X X X \n", + "O X \n", + "Episode 169, Total Reward: 1\n", + "Average Reward: 0.33136094674556216\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + " X O \n", + "on move: X\n", + "O X \n", + " \n", + " X O \n", + "on move: O\n", + "O X \n", + " \n", + "X X O \n", + "on move: X\n", + "O O X \n", + " \n", + "X X O \n", + "on move: O\n", + "O O X \n", + " X \n", + "X X O \n", + "on move: X\n", + "O O X \n", + " O X \n", + "X X O \n", + "Episode 170, Total Reward: -1\n", + "Average Reward: 0.3235294117647059\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " X O \n", + " \n", + " X \n", + "on move: X\n", + " X O \n", + "O \n", + " X \n", + "on move: O\n", + " X O \n", + "O \n", + "X X \n", + "on move: X\n", + " X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X X O \n", + "O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + "O O O \n", + "X X \n", + "Episode 171, Total Reward: -1\n", + "Average Reward: 0.3157894736842105\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X \n", + " X \n", + " O \n", + "on move: X\n", + "X \n", + "O X \n", + " O \n", + "on move: O\n", + "X \n", + "O X \n", + " X O \n", + "on move: X\n", + "X O \n", + "O X \n", + " X O \n", + "on move: O\n", + "X O \n", + "O X \n", + "X X O \n", + "on move: X\n", + "X O O \n", + "O X \n", + "X X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X X O \n", + "Episode 172, Total Reward: 0\n", + "Average Reward: 0.313953488372093\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + "O \n", + "on move: O\n", + "X O \n", + " X \n", + "O X \n", + "on move: X\n", + "X O \n", + "O X \n", + "O X \n", + "on move: O\n", + "X O \n", + "O X \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "O X X \n", + "Episode 173, Total Reward: 1\n", + "Average Reward: 0.3179190751445087\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + "X \n", + "X \n", + "on move: X\n", + " O \n", + "X \n", + "X O \n", + "on move: O\n", + "X O \n", + "X \n", + "X O \n", + "Episode 174, Total Reward: 1\n", + "Average Reward: 0.3218390804597701\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + " \n", + " X O \n", + "on move: O\n", + "O X \n", + " \n", + "X X O \n", + "on move: X\n", + "O X \n", + " O \n", + "X X O \n", + "on move: O\n", + "O X \n", + "X O \n", + "X X O \n", + "on move: X\n", + "O O X \n", + "X O \n", + "X X O \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X X O \n", + "Episode 175, Total Reward: 1\n", + "Average Reward: 0.32571428571428573\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X X \n", + " \n", + "on move: X\n", + "O O \n", + " X X \n", + " \n", + "on move: O\n", + "O O \n", + "X X X \n", + " \n", + "Episode 176, Total Reward: 1\n", + "Average Reward: 0.32954545454545453\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X O \n", + "O \n", + " \n", + "on move: O\n", + "X X O \n", + "O X \n", + " \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O \n", + "on move: O\n", + "X X O \n", + "O X \n", + "O X \n", + "Episode 177, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + "X \n", + "O \n", + " X \n", + "on move: X\n", + "X O \n", + "O \n", + " X \n", + "on move: O\n", + "X O \n", + "O \n", + "X X \n", + "on move: X\n", + "X O \n", + "O O \n", + "X X \n", + "on move: O\n", + "X X O \n", + "O O \n", + "X X \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X X O \n", + "O X O \n", + "X O X \n", + "Episode 178, Total Reward: 1\n", + "Average Reward: 0.33707865168539325\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + " X \n", + " X X \n", + "on move: X\n", + "O O \n", + " X \n", + "O X X \n", + "on move: O\n", + "O O \n", + "X X \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "O X X \n", + "Episode 179, Total Reward: -1\n", + "Average Reward: 0.329608938547486\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + "X \n", + "O X \n", + " \n", + "on move: X\n", + "X \n", + "O X \n", + "O \n", + "on move: O\n", + "X X \n", + "O X \n", + "O \n", + "on move: X\n", + "X X \n", + "O X O \n", + "O \n", + "on move: O\n", + "X X \n", + "O X O \n", + "O X \n", + "on move: X\n", + "X X \n", + "O X O \n", + "O X O \n", + "on move: O\n", + "X X X \n", + "O X O \n", + "O X O \n", + "Episode 180, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O O \n", + " \n", + " X \n", + "on move: O\n", + "X O O \n", + "X \n", + " X \n", + "on move: X\n", + "X O O \n", + "X O \n", + " X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + " X \n", + "Episode 181, Total Reward: 1\n", + "Average Reward: 0.3370165745856354\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + " X \n", + "X O \n", + "on move: O\n", + " O X \n", + " X \n", + "X O \n", + "Episode 182, Total Reward: 1\n", + "Average Reward: 0.34065934065934067\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X X \n", + " \n", + "on move: X\n", + " O \n", + " X X \n", + "O \n", + "on move: O\n", + " O \n", + " X X \n", + "O X \n", + "on move: X\n", + " O \n", + " X X \n", + "O O X \n", + "on move: O\n", + " O \n", + "X X X \n", + "O O X \n", + "Episode 183, Total Reward: 1\n", + "Average Reward: 0.3442622950819672\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + " O X \n", + "on move: O\n", + " X \n", + " O \n", + "X O X \n", + "on move: X\n", + "O X \n", + " O \n", + "X O X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X O X \n", + "on move: X\n", + "O O X \n", + "X O \n", + "X O X \n", + "Episode 184, Total Reward: -1\n", + "Average Reward: 0.33695652173913043\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X \n", + "O X \n", + "on move: O\n", + " O \n", + "X \n", + "O X X \n", + "on move: X\n", + " O \n", + "X O \n", + "O X X \n", + "on move: O\n", + "X O \n", + "X O \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "X O \n", + "O X X \n", + "Episode 185, Total Reward: -1\n", + "Average Reward: 0.32972972972972975\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + " X \n", + " X \n", + "O O X \n", + "on move: X\n", + " X \n", + " X O \n", + "O O X \n", + "on move: O\n", + "X X \n", + " X O \n", + "O O X \n", + "Episode 186, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O X \n", + " \n", + "on move: X\n", + " O X \n", + "O X \n", + " \n", + "on move: O\n", + " O X \n", + "O X \n", + " X \n", + "on move: X\n", + " O X \n", + "O X \n", + " O X \n", + "on move: O\n", + " O X \n", + "O X X \n", + " O X \n", + "Episode 187, Total Reward: 1\n", + "Average Reward: 0.33689839572192515\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X O \n", + "X \n", + "O \n", + "on move: O\n", + " X O \n", + "X \n", + "O X \n", + "on move: X\n", + " X O \n", + "X O \n", + "O X \n", + "Episode 188, Total Reward: -1\n", + "Average Reward: 0.32978723404255317\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + "X X \n", + " X \n", + "O O \n", + "on move: X\n", + "X X \n", + "O X \n", + "O O \n", + "on move: O\n", + "X X \n", + "O X \n", + "O X O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 189, Total Reward: 1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + " O \n", + "O \n", + "on move: O\n", + "X X \n", + " O \n", + "O X \n", + "on move: X\n", + "X X \n", + "O O \n", + "O X \n", + "on move: O\n", + "X X \n", + "O O \n", + "O X X \n", + "on move: X\n", + "X O X \n", + "O O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "O X X \n", + "Episode 190, Total Reward: 1\n", + "Average Reward: 0.3368421052631579\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + "X \n", + " X \n", + "on move: X\n", + "O \n", + "X O \n", + " X \n", + "on move: O\n", + "O \n", + "X X O \n", + " X \n", + "on move: X\n", + "O O \n", + "X X O \n", + " X \n", + "on move: O\n", + "O O \n", + "X X O \n", + " X X \n", + "on move: X\n", + "O O \n", + "X X O \n", + "O X X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + "O X X \n", + "Episode 191, Total Reward: 1\n", + "Average Reward: 0.3403141361256545\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + "O \n", + " X \n", + "X O \n", + "on move: O\n", + "O \n", + " X \n", + "X X O \n", + "on move: X\n", + "O O \n", + " X \n", + "X X O \n", + "on move: O\n", + "O O \n", + "X X \n", + "X X O \n", + "on move: X\n", + "O O O \n", + "X X \n", + "X X O \n", + "Episode 192, Total Reward: -1\n", + "Average Reward: 0.3333333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + "X \n", + " X \n", + "on move: X\n", + "O \n", + "X \n", + " X O \n", + "on move: O\n", + "O X \n", + "X \n", + " X O \n", + "on move: X\n", + "O X \n", + "X O \n", + " X O \n", + "Episode 193, Total Reward: -1\n", + "Average Reward: 0.32642487046632124\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X X \n", + "X O \n", + "O \n", + "on move: X\n", + " X X \n", + "X O O \n", + "O \n", + "on move: O\n", + "X X X \n", + "X O O \n", + "O \n", + "Episode 194, Total Reward: 1\n", + "Average Reward: 0.32989690721649484\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + " X \n", + "O O \n", + "X \n", + "on move: O\n", + " X X \n", + "O O \n", + "X \n", + "on move: X\n", + "O X X \n", + "O O \n", + "X \n", + "on move: O\n", + "O X X \n", + "O O X \n", + "X \n", + "on move: X\n", + "O X X \n", + "O O X \n", + "X O \n", + "Episode 195, Total Reward: -1\n", + "Average Reward: 0.3230769230769231\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + " X O \n", + "on move: O\n", + " X \n", + " O \n", + "X X O \n", + "on move: X\n", + " X \n", + " O O \n", + "X X O \n", + "on move: O\n", + "X X \n", + " O O \n", + "X X O \n", + "on move: X\n", + "X X O \n", + " O O \n", + "X X O \n", + "Episode 196, Total Reward: -1\n", + "Average Reward: 0.3163265306122449\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " \n", + "X X \n", + "on move: X\n", + "O O \n", + " \n", + "X X \n", + "on move: O\n", + "O O \n", + "X \n", + "X X \n", + "on move: X\n", + "O O O \n", + "X \n", + "X X \n", + "Episode 197, Total Reward: -1\n", + "Average Reward: 0.3096446700507614\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " O X \n", + " \n", + " \n", + "on move: O\n", + " O X \n", + " X \n", + " \n", + "on move: X\n", + " O X \n", + " X \n", + " O \n", + "on move: O\n", + " O X \n", + " X \n", + "X O \n", + "on move: X\n", + " O X \n", + " O X \n", + "X O \n", + "Episode 198, Total Reward: -1\n", + "Average Reward: 0.30303030303030304\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O X \n", + " \n", + " \n", + "on move: X\n", + "X O X \n", + " \n", + " O \n", + "on move: O\n", + "X O X \n", + " \n", + " X O \n", + "on move: X\n", + "X O X \n", + "O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "O X \n", + " X O \n", + "on move: X\n", + "X O X \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + "O X O \n", + "Episode 199, Total Reward: 0\n", + "Average Reward: 0.3015075376884422\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O O \n", + " X \n", + "on move: O\n", + " X \n", + "O X O \n", + " X \n", + "Episode 200, Total Reward: 1\n", + "Average Reward: 0.305\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + " O \n", + " O \n", + "on move: O\n", + "X X \n", + " O \n", + "X O \n", + "on move: X\n", + "X O X \n", + " O \n", + "X O \n", + "on move: O\n", + "X O X \n", + " O X \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O O X \n", + "X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 201, Total Reward: 0\n", + "Average Reward: 0.3034825870646766\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X \n", + "X \n", + "O \n", + "on move: X\n", + "X \n", + "X \n", + "O O \n", + "on move: O\n", + "X X \n", + "X \n", + "O O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X \n", + "X O \n", + "O X O \n", + "on move: X\n", + "X X O \n", + "X O \n", + "O X O \n", + "Episode 202, Total Reward: -1\n", + "Average Reward: 0.297029702970297\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + " X O \n", + "X X \n", + "on move: X\n", + " O O \n", + " X O \n", + "X X \n", + "on move: O\n", + " O O \n", + " X O \n", + "X X X \n", + "Episode 203, Total Reward: 1\n", + "Average Reward: 0.30049261083743845\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X \n", + "X O O \n", + " \n", + "on move: O\n", + "X X \n", + "X O O \n", + " \n", + "on move: X\n", + "X X \n", + "X O O \n", + " O \n", + "on move: O\n", + "X X \n", + "X O O \n", + "X O \n", + "Episode 204, Total Reward: 1\n", + "Average Reward: 0.30392156862745096\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X \n", + "O O \n", + "on move: O\n", + " X \n", + "X X \n", + "O O \n", + "on move: X\n", + " O X \n", + "X X \n", + "O O \n", + "on move: O\n", + " O X \n", + "X X X \n", + "O O \n", + "Episode 205, Total Reward: 1\n", + "Average Reward: 0.3073170731707317\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X O \n", + " \n", + " \n", + "on move: O\n", + " X O \n", + " \n", + "X \n", + "on move: X\n", + " X O \n", + " O \n", + "X \n", + "on move: O\n", + " X O \n", + "X O \n", + "X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "X \n", + "on move: O\n", + "O X O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O O \n", + "X X X \n", + "Episode 206, Total Reward: 1\n", + "Average Reward: 0.3106796116504854\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " \n", + "X X \n", + "on move: X\n", + "O \n", + "O \n", + "X X \n", + "on move: O\n", + "O \n", + "O \n", + "X X X \n", + "Episode 207, Total Reward: 1\n", + "Average Reward: 0.3140096618357488\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + "O \n", + "X \n", + "X O \n", + "on move: O\n", + "O X \n", + "X \n", + "X O \n", + "on move: X\n", + "O O X \n", + "X \n", + "X O \n", + "on move: O\n", + "O O X \n", + "X \n", + "X X O \n", + "on move: X\n", + "O O X \n", + "X O \n", + "X X O \n", + "Episode 208, Total Reward: -1\n", + "Average Reward: 0.3076923076923077\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X O \n", + " X X \n", + " O \n", + "on move: X\n", + "X O \n", + "O X X \n", + " O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + " O \n", + "on move: X\n", + "X O X \n", + "O X X \n", + "O O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + "O O X \n", + "Episode 209, Total Reward: 1\n", + "Average Reward: 0.31100478468899523\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O O \n", + " X \n", + "on move: O\n", + " X \n", + "O O X \n", + " X \n", + "on move: X\n", + "O X \n", + "O O X \n", + " X \n", + "on move: O\n", + "O X \n", + "O O X \n", + "X X \n", + "on move: X\n", + "O X \n", + "O O X \n", + "X X O \n", + "Episode 210, Total Reward: -1\n", + "Average Reward: 0.3047619047619048\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + " X X \n", + " X \n", + "O O \n", + "on move: X\n", + "O X X \n", + " X \n", + "O O \n", + "on move: O\n", + "O X X \n", + "X X \n", + "O O \n", + "on move: X\n", + "O X X \n", + "X X \n", + "O O O \n", + "Episode 211, Total Reward: -1\n", + "Average Reward: 0.2985781990521327\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + " O \n", + "X \n", + "on move: O\n", + "O X \n", + " O \n", + "X X \n", + "on move: X\n", + "O X \n", + " O O \n", + "X X \n", + "on move: O\n", + "O X \n", + " O O \n", + "X X X \n", + "Episode 212, Total Reward: 1\n", + "Average Reward: 0.3018867924528302\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " O X \n", + "X \n", + " O \n", + "on move: O\n", + " O X \n", + "X \n", + " O X \n", + "on move: X\n", + " O X \n", + "X O \n", + " O X \n", + "on move: O\n", + "X O X \n", + "X O \n", + " O X \n", + "on move: X\n", + "X O X \n", + "X O O \n", + " O X \n", + "Episode 213, Total Reward: -1\n", + "Average Reward: 0.29577464788732394\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + " X X \n", + "on move: X\n", + " O \n", + " O \n", + " X X \n", + "on move: O\n", + "X O \n", + " O \n", + " X X \n", + "on move: X\n", + "X O \n", + " O \n", + "O X X \n", + "on move: O\n", + "X O \n", + "X O \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "X O \n", + "O X X \n", + "Episode 214, Total Reward: -1\n", + "Average Reward: 0.2897196261682243\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + " X \n", + " O \n", + "X O \n", + "on move: O\n", + " X \n", + " O \n", + "X O X \n", + "on move: X\n", + " X \n", + "O O \n", + "X O X \n", + "on move: O\n", + " X X \n", + "O O \n", + "X O X \n", + "on move: X\n", + " X X \n", + "O O O \n", + "X O X \n", + "Episode 215, Total Reward: -1\n", + "Average Reward: 0.2837209302325581\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X O \n", + " O \n", + "X \n", + "on move: O\n", + "X O \n", + " O \n", + "X X \n", + "on move: X\n", + "X O \n", + " O \n", + "X X O \n", + "on move: O\n", + "X O \n", + " O X \n", + "X X O \n", + "on move: X\n", + "X O O \n", + " O X \n", + "X X O \n", + "on move: O\n", + "X O O \n", + "X O X \n", + "X X O \n", + "Episode 216, Total Reward: 1\n", + "Average Reward: 0.28703703703703703\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + "X \n", + " \n", + "X O \n", + "on move: X\n", + "X \n", + "O \n", + "X O \n", + "on move: O\n", + "X \n", + "O X \n", + "X O \n", + "on move: X\n", + "X \n", + "O X \n", + "X O O \n", + "on move: O\n", + "X X \n", + "O X \n", + "X O O \n", + "on move: X\n", + "X X \n", + "O X O \n", + "X O O \n", + "on move: O\n", + "X X X \n", + "O X O \n", + "X O O \n", + "Episode 217, Total Reward: 1\n", + "Average Reward: 0.2903225806451613\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X X \n", + " O \n", + "on move: X\n", + " \n", + "O X X \n", + " O \n", + "on move: O\n", + " \n", + "O X X \n", + "X O \n", + "on move: X\n", + "O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "O \n", + "O X X \n", + "X O X \n", + "on move: X\n", + "O O \n", + "O X X \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 218, Total Reward: 0\n", + "Average Reward: 0.2889908256880734\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + " O X \n", + "on move: X\n", + " X \n", + "O \n", + " O X \n", + "on move: O\n", + " X \n", + "O X \n", + " O X \n", + "on move: X\n", + " X \n", + "O X \n", + "O O X \n", + "on move: O\n", + " X \n", + "O X X \n", + "O O X \n", + "on move: X\n", + "O X \n", + "O X X \n", + "O O X \n", + "Episode 219, Total Reward: -1\n", + "Average Reward: 0.2831050228310502\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O \n", + "O X \n", + "on move: O\n", + "X X \n", + "O \n", + "O X \n", + "on move: X\n", + "X X \n", + "O \n", + "O X O \n", + "on move: O\n", + "X X \n", + "O X \n", + "O X O \n", + "on move: X\n", + "X O X \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + "O X O \n", + "Episode 220, Total Reward: 0\n", + "Average Reward: 0.2818181818181818\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " \n", + " X X \n", + "on move: X\n", + "O \n", + " O \n", + " X X \n", + "on move: O\n", + "O X \n", + " O \n", + " X X \n", + "on move: X\n", + "O X \n", + "O O \n", + " X X \n", + "on move: O\n", + "O X \n", + "O X O \n", + " X X \n", + "on move: X\n", + "O X \n", + "O X O \n", + "O X X \n", + "Episode 221, Total Reward: -1\n", + "Average Reward: 0.27601809954751133\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " \n", + "O O \n", + "X X \n", + "on move: O\n", + "X \n", + "O O \n", + "X X \n", + "on move: X\n", + "X \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X X \n", + "O O \n", + "X X O \n", + "on move: X\n", + "X X \n", + "O O O \n", + "X X O \n", + "Episode 222, Total Reward: -1\n", + "Average Reward: 0.2702702702702703\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X \n", + "O O \n", + " X \n", + "on move: X\n", + "X O X \n", + "O O \n", + " X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + " X \n", + "on move: X\n", + "X O X \n", + "O X O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "X X O \n", + "Episode 223, Total Reward: 1\n", + "Average Reward: 0.273542600896861\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + " X \n", + " X \n", + "O X O \n", + "on move: X\n", + " X \n", + " O X \n", + "O X O \n", + "on move: O\n", + " X \n", + "X O X \n", + "O X O \n", + "on move: X\n", + " O X \n", + "X O X \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X O X \n", + "O X O \n", + "Episode 224, Total Reward: 0\n", + "Average Reward: 0.27232142857142855\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " X O \n", + "X \n", + "on move: X\n", + " \n", + " X O \n", + "X O \n", + "on move: O\n", + " \n", + " X O \n", + "X O X \n", + "on move: X\n", + "O \n", + " X O \n", + "X O X \n", + "on move: O\n", + "O \n", + "X X O \n", + "X O X \n", + "on move: X\n", + "O O \n", + "X X O \n", + "X O X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X O X \n", + "Episode 225, Total Reward: 1\n", + "Average Reward: 0.27555555555555555\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + " \n", + "X X \n", + "O O \n", + "on move: O\n", + " \n", + "X X X \n", + "O O \n", + "Episode 226, Total Reward: 1\n", + "Average Reward: 0.27876106194690264\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + "X \n", + "on move: X\n", + " \n", + "O O X \n", + "X \n", + "on move: O\n", + " X \n", + "O O X \n", + "X \n", + "on move: X\n", + " X \n", + "O O X \n", + "X O \n", + "on move: O\n", + "X X \n", + "O O X \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O O X \n", + "X O \n", + "Episode 227, Total Reward: -1\n", + "Average Reward: 0.27312775330396477\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + "X \n", + "X \n", + "on move: X\n", + " O \n", + "X \n", + "X O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O \n", + "X X \n", + "X O \n", + "on move: O\n", + "O O \n", + "X X X \n", + "X O \n", + "Episode 228, Total Reward: 1\n", + "Average Reward: 0.27631578947368424\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + "O X \n", + "on move: O\n", + " X \n", + " O X \n", + "O X \n", + "on move: X\n", + " X \n", + "O O X \n", + "O X \n", + "on move: O\n", + " X \n", + "O O X \n", + "O X X \n", + "on move: X\n", + " X O \n", + "O O X \n", + "O X X \n", + "Episode 229, Total Reward: -1\n", + "Average Reward: 0.27074235807860264\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + "X \n", + "on move: X\n", + "O \n", + " X \n", + "X O \n", + "on move: O\n", + "O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O \n", + "X X \n", + "X O \n", + "on move: O\n", + "O O \n", + "X X \n", + "X X O \n", + "on move: X\n", + "O O \n", + "X X O \n", + "X X O \n", + "Episode 230, Total Reward: -1\n", + "Average Reward: 0.26521739130434785\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " O X \n", + " \n", + " \n", + "on move: O\n", + " O X \n", + " \n", + " X \n", + "on move: X\n", + " O X \n", + " O \n", + " X \n", + "on move: O\n", + " O X \n", + "X O \n", + " X \n", + "on move: X\n", + "O O X \n", + "X O \n", + " X \n", + "on move: O\n", + "O O X \n", + "X O \n", + " X X \n", + "on move: X\n", + "O O X \n", + "X O O \n", + " X X \n", + "on move: O\n", + "O O X \n", + "X O O \n", + "X X X \n", + "Episode 231, Total Reward: 1\n", + "Average Reward: 0.2683982683982684\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + " X \n", + " X \n", + "O O X \n", + "on move: X\n", + " X \n", + " X O \n", + "O O X \n", + "on move: O\n", + " X \n", + "X X O \n", + "O O X \n", + "on move: X\n", + " O X \n", + "X X O \n", + "O O X \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O O X \n", + "Episode 232, Total Reward: 1\n", + "Average Reward: 0.27155172413793105\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " O X \n", + " \n", + "X O \n", + "on move: O\n", + " O X \n", + " X \n", + "X O \n", + "Episode 233, Total Reward: 1\n", + "Average Reward: 0.27467811158798283\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X \n", + "O X \n", + " O \n", + "on move: X\n", + "X X \n", + "O X \n", + "O O \n", + "on move: O\n", + "X X X \n", + "O X \n", + "O O \n", + "Episode 234, Total Reward: 1\n", + "Average Reward: 0.2777777777777778\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O O \n", + "X X \n", + " \n", + "on move: O\n", + " O O \n", + "X X \n", + " X \n", + "on move: X\n", + " O O \n", + "X X \n", + "O X \n", + "on move: O\n", + " O O \n", + "X X \n", + "O X X \n", + "on move: X\n", + " O O \n", + "X X O \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + "O X X \n", + "Episode 235, Total Reward: 1\n", + "Average Reward: 0.28085106382978725\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + "X \n", + " X \n", + "on move: X\n", + "O \n", + "X O \n", + " X \n", + "on move: O\n", + "O \n", + "X O X \n", + " X \n", + "on move: X\n", + "O O \n", + "X O X \n", + " X \n", + "on move: O\n", + "O O X \n", + "X O X \n", + " X \n", + "on move: X\n", + "O O X \n", + "X O X \n", + " X O \n", + "Episode 236, Total Reward: -1\n", + "Average Reward: 0.2754237288135593\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X X \n", + " \n", + "on move: X\n", + "O O \n", + "X X \n", + " \n", + "on move: O\n", + "O X O \n", + "X X \n", + " \n", + "on move: X\n", + "O X O \n", + "X X \n", + "O \n", + "on move: O\n", + "O X O \n", + "X X \n", + "O X \n", + "on move: X\n", + "O X O \n", + "X X \n", + "O X O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "O X O \n", + "Episode 237, Total Reward: 1\n", + "Average Reward: 0.27848101265822783\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + "O O \n", + "X \n", + " X \n", + "on move: O\n", + "O O \n", + "X X \n", + " X \n", + "on move: X\n", + "O O \n", + "X X \n", + "O X \n", + "on move: O\n", + "O O \n", + "X X \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "O X X \n", + "Episode 238, Total Reward: -1\n", + "Average Reward: 0.27310924369747897\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + "O X \n", + "X \n", + "on move: O\n", + " O \n", + "O X X \n", + "X \n", + "on move: X\n", + " O O \n", + "O X X \n", + "X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X \n", + "on move: X\n", + "X O O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X O X \n", + "Episode 239, Total Reward: 1\n", + "Average Reward: 0.27615062761506276\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X X \n", + " \n", + "O \n", + "on move: X\n", + " X X \n", + " O \n", + "O \n", + "on move: O\n", + " X X \n", + "X O \n", + "O \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O \n", + "on move: O\n", + "O X X \n", + "X O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O O X \n", + "on move: O\n", + "O X X \n", + "X O X \n", + "O O X \n", + "Episode 240, Total Reward: 1\n", + "Average Reward: 0.2791666666666667\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + "O X \n", + " O \n", + "X \n", + "on move: O\n", + "O X \n", + " O X \n", + "X \n", + "on move: X\n", + "O X \n", + "O O X \n", + "X \n", + "on move: O\n", + "O X \n", + "O O X \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O O X \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O O X \n", + "X X X \n", + "Episode 241, Total Reward: 1\n", + "Average Reward: 0.2821576763485477\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " O X \n", + "on move: O\n", + " \n", + " \n", + "X O X \n", + "on move: X\n", + " \n", + " O \n", + "X O X \n", + "on move: O\n", + " X \n", + " O \n", + "X O X \n", + "on move: X\n", + " X \n", + " O O \n", + "X O X \n", + "on move: O\n", + " X X \n", + " O O \n", + "X O X \n", + "on move: X\n", + " X X \n", + "O O O \n", + "X O X \n", + "Episode 242, Total Reward: -1\n", + "Average Reward: 0.2768595041322314\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + "X \n", + "O X \n", + " \n", + "on move: X\n", + "X \n", + "O O X \n", + " \n", + "on move: O\n", + "X \n", + "O O X \n", + " X \n", + "on move: X\n", + "X O \n", + "O O X \n", + " X \n", + "on move: O\n", + "X O \n", + "O O X \n", + " X X \n", + "on move: X\n", + "X O \n", + "O O X \n", + "O X X \n", + "Episode 243, Total Reward: -1\n", + "Average Reward: 0.2716049382716049\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X \n", + "O O \n", + "X \n", + "on move: O\n", + "X X \n", + "O O \n", + "X \n", + "on move: X\n", + "X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O X O \n", + "X O \n", + "on move: X\n", + "X X \n", + "O X O \n", + "X O O \n", + "on move: O\n", + "X X X \n", + "O X O \n", + "X O O \n", + "Episode 244, Total Reward: 1\n", + "Average Reward: 0.27459016393442626\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O X \n", + " X O \n", + " X \n", + "on move: X\n", + "O X \n", + "O X O \n", + " X \n", + "on move: O\n", + "O X X \n", + "O X O \n", + " X \n", + "on move: X\n", + "O X X \n", + "O X O \n", + "O X \n", + "Episode 245, Total Reward: -1\n", + "Average Reward: 0.2693877551020408\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " \n", + " X \n", + "X O O \n", + "on move: O\n", + " X \n", + " X \n", + "X O O \n", + "on move: X\n", + " X \n", + " X O \n", + "X O O \n", + "on move: O\n", + " X X \n", + " X O \n", + "X O O \n", + "Episode 246, Total Reward: 1\n", + "Average Reward: 0.27235772357723576\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " O \n", + " O \n", + "on move: O\n", + "X X \n", + "X O \n", + " O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X \n", + "X O X \n", + "O O \n", + "on move: X\n", + "X X \n", + "X O X \n", + "O O O \n", + "Episode 247, Total Reward: -1\n", + "Average Reward: 0.26720647773279355\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " X \n", + "O X \n", + " \n", + "on move: X\n", + " X \n", + "O X \n", + "O \n", + "on move: O\n", + "X X \n", + "O X \n", + "O \n", + "on move: X\n", + "X X \n", + "O O X \n", + "O \n", + "on move: O\n", + "X X \n", + "O O X \n", + "O X \n", + "on move: X\n", + "X X \n", + "O O X \n", + "O X O \n", + "on move: O\n", + "X X X \n", + "O O X \n", + "O X O \n", + "Episode 248, Total Reward: 1\n", + "Average Reward: 0.2701612903225806\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X X \n", + " \n", + "on move: X\n", + "O O \n", + " X X \n", + " \n", + "on move: O\n", + "O O \n", + "X X X \n", + " \n", + "Episode 249, Total Reward: 1\n", + "Average Reward: 0.27309236947791166\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " \n", + " \n", + "X X O \n", + "on move: X\n", + " O \n", + " \n", + "X X O \n", + "on move: O\n", + " O \n", + " X \n", + "X X O \n", + "on move: X\n", + " O \n", + " X O \n", + "X X O \n", + "Episode 250, Total Reward: -1\n", + "Average Reward: 0.268\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " X \n", + "O \n", + "X \n", + "on move: X\n", + " X \n", + "O O \n", + "X \n", + "on move: O\n", + "X X \n", + "O O \n", + "X \n", + "on move: X\n", + "X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O O \n", + "X X O \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 251, Total Reward: 0\n", + "Average Reward: 0.26693227091633465\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " \n", + "X X O \n", + "on move: X\n", + " O \n", + " \n", + "X X O \n", + "on move: O\n", + " O \n", + "X \n", + "X X O \n", + "on move: X\n", + " O \n", + "X O \n", + "X X O \n", + "on move: O\n", + "X O \n", + "X O \n", + "X X O \n", + "Episode 252, Total Reward: 1\n", + "Average Reward: 0.2698412698412698\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " \n", + "O O \n", + "on move: O\n", + "X X \n", + " \n", + "O O X \n", + "on move: X\n", + "X O X \n", + " \n", + "O O X \n", + "on move: O\n", + "X O X \n", + " X \n", + "O O X \n", + "Episode 253, Total Reward: 1\n", + "Average Reward: 0.2727272727272727\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X X \n", + " \n", + " O \n", + "on move: X\n", + " X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X X \n", + "O \n", + " O \n", + "Episode 254, Total Reward: 1\n", + "Average Reward: 0.2755905511811024\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + " O \n", + " X \n", + "on move: O\n", + "X O \n", + " O X \n", + " X \n", + "on move: X\n", + "X O O \n", + " O X \n", + " X \n", + "on move: O\n", + "X O O \n", + "X O X \n", + " X \n", + "on move: X\n", + "X O O \n", + "X O X \n", + "O X \n", + "Episode 255, Total Reward: -1\n", + "Average Reward: 0.27058823529411763\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + "O X X \n", + " O \n", + " \n", + "on move: O\n", + "O X X \n", + " O \n", + "X \n", + "on move: X\n", + "O X X \n", + " O \n", + "X O \n", + "on move: O\n", + "O X X \n", + "X O \n", + "X O \n", + "on move: X\n", + "O X X \n", + "X O \n", + "X O O \n", + "on move: O\n", + "O X X \n", + "X X O \n", + "X O O \n", + "Episode 256, Total Reward: 1\n", + "Average Reward: 0.2734375\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + " O \n", + " O \n", + "on move: O\n", + "X X \n", + " O \n", + " X O \n", + "on move: X\n", + "X X \n", + " O O \n", + " X O \n", + "on move: O\n", + "X X \n", + "X O O \n", + " X O \n", + "on move: X\n", + "X O X \n", + "X O O \n", + " X O \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "X X O \n", + "Episode 257, Total Reward: 1\n", + "Average Reward: 0.27626459143968873\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " \n", + " X \n", + " X O \n", + "on move: X\n", + " O \n", + " X \n", + " X O \n", + "on move: O\n", + " O \n", + "X X \n", + " X O \n", + "on move: X\n", + " O \n", + "X X O \n", + " X O \n", + "on move: O\n", + "X O \n", + "X X O \n", + " X O \n", + "on move: X\n", + "X O \n", + "X X O \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 258, Total Reward: 0\n", + "Average Reward: 0.2751937984496124\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + " O \n", + " O \n", + "on move: O\n", + "X X \n", + "X O \n", + " O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X X \n", + "X O \n", + "O O \n", + "Episode 259, Total Reward: 1\n", + "Average Reward: 0.277992277992278\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X X \n", + " \n", + " O \n", + "on move: X\n", + " X X \n", + " O \n", + " O \n", + "on move: O\n", + "X X X \n", + " O \n", + " O \n", + "Episode 260, Total Reward: 1\n", + "Average Reward: 0.28076923076923077\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + "X \n", + " \n", + "on move: X\n", + "O O X \n", + "X \n", + " \n", + "on move: O\n", + "O O X \n", + "X \n", + "X \n", + "on move: X\n", + "O O X \n", + "X O \n", + "X \n", + "on move: O\n", + "O O X \n", + "X O \n", + "X X \n", + "on move: X\n", + "O O X \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O O X \n", + "X O O \n", + "X X X \n", + "Episode 261, Total Reward: 1\n", + "Average Reward: 0.2835249042145594\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X \n", + "O O \n", + "X \n", + "on move: X\n", + "X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O O \n", + "X O X \n", + "on move: X\n", + "X O X \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "X O X \n", + "Episode 262, Total Reward: 1\n", + "Average Reward: 0.2862595419847328\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + "O \n", + "X X \n", + " O \n", + "on move: O\n", + "O X \n", + "X X \n", + " O \n", + "on move: X\n", + "O X \n", + "X X O \n", + " O \n", + "on move: O\n", + "O X \n", + "X X O \n", + "X O \n", + "on move: X\n", + "O X O \n", + "X X O \n", + "X O \n", + "on move: O\n", + "O X O \n", + "X X O \n", + "X O X \n", + "Episode 263, Total Reward: 0\n", + "Average Reward: 0.28517110266159695\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + "O \n", + "X O \n", + "on move: O\n", + " X \n", + "O \n", + "X O X \n", + "on move: X\n", + "O X \n", + "O \n", + "X O X \n", + "on move: O\n", + "O X \n", + "O X \n", + "X O X \n", + "Episode 264, Total Reward: 1\n", + "Average Reward: 0.2878787878787879\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X O X \n", + "O \n", + " \n", + "on move: O\n", + "X O X \n", + "O \n", + " X \n", + "on move: X\n", + "X O X \n", + "O O \n", + " X \n", + "on move: O\n", + "X O X \n", + "O O \n", + " X X \n", + "on move: X\n", + "X O X \n", + "O O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "O X X \n", + "Episode 265, Total Reward: 1\n", + "Average Reward: 0.29056603773584905\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O X \n", + " X O \n", + " X \n", + "on move: X\n", + "O X O \n", + " X O \n", + " X \n", + "on move: O\n", + "O X O \n", + " X O \n", + " X X \n", + "Episode 266, Total Reward: 1\n", + "Average Reward: 0.2932330827067669\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O \n", + "O X \n", + "on move: O\n", + " X \n", + "O X \n", + "O X \n", + "on move: X\n", + "O X \n", + "O X \n", + "O X \n", + "Episode 267, Total Reward: -1\n", + "Average Reward: 0.2883895131086142\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " \n", + "X X \n", + "on move: X\n", + "O O \n", + " \n", + "X X \n", + "on move: O\n", + "O O \n", + " X \n", + "X X \n", + "on move: X\n", + "O O O \n", + " X \n", + "X X \n", + "Episode 268, Total Reward: -1\n", + "Average Reward: 0.2835820895522388\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O O \n", + " X \n", + " X \n", + "on move: O\n", + " O O \n", + " X \n", + " X X \n", + "on move: X\n", + "O O O \n", + " X \n", + " X X \n", + "Episode 269, Total Reward: -1\n", + "Average Reward: 0.2788104089219331\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " O \n", + " X \n", + "X O \n", + "on move: O\n", + " X O \n", + " X \n", + "X O \n", + "on move: X\n", + " X O \n", + " X O \n", + "X O \n", + "on move: O\n", + " X O \n", + " X O \n", + "X O X \n", + "on move: X\n", + "O X O \n", + " X O \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + "X O X \n", + "Episode 270, Total Reward: 0\n", + "Average Reward: 0.2777777777777778\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X O \n", + " \n", + " \n", + "on move: O\n", + " X O \n", + "X \n", + " \n", + "on move: X\n", + " X O \n", + "X O \n", + " \n", + "on move: O\n", + " X O \n", + "X O \n", + " X \n", + "on move: X\n", + " X O \n", + "X O \n", + "O X \n", + "Episode 271, Total Reward: -1\n", + "Average Reward: 0.2730627306273063\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X \n", + "O O \n", + "on move: O\n", + "X X \n", + "X \n", + "O O \n", + "on move: X\n", + "X X \n", + "X O \n", + "O O \n", + "on move: O\n", + "X X \n", + "X O X \n", + "O O \n", + "on move: X\n", + "X X \n", + "X O X \n", + "O O O \n", + "Episode 272, Total Reward: -1\n", + "Average Reward: 0.26838235294117646\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X \n", + "O \n", + "O \n", + "on move: O\n", + "X X X \n", + "O \n", + "O \n", + "Episode 273, Total Reward: 1\n", + "Average Reward: 0.27106227106227104\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X X \n", + "X O \n", + "O \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O \n", + "on move: O\n", + "O X X \n", + "X O \n", + "O X \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O O X \n", + "on move: O\n", + "O X X \n", + "X O X \n", + "O O X \n", + "Episode 274, Total Reward: 1\n", + "Average Reward: 0.2737226277372263\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + "X \n", + " \n", + "X O \n", + "on move: X\n", + "X \n", + "O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O \n", + "X O \n", + "on move: X\n", + "X X \n", + "O \n", + "X O O \n", + "on move: O\n", + "X X \n", + "O X \n", + "X O O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "X O O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "X O O \n", + "Episode 275, Total Reward: 0\n", + "Average Reward: 0.2727272727272727\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + "X \n", + "X \n", + "O \n", + "on move: X\n", + "X \n", + "X \n", + "O O \n", + "on move: O\n", + "X \n", + "X X \n", + "O O \n", + "on move: X\n", + "X O \n", + "X X \n", + "O O \n", + "on move: O\n", + "X O \n", + "X X X \n", + "O O \n", + "Episode 276, Total Reward: 1\n", + "Average Reward: 0.2753623188405797\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X O \n", + "X O \n", + " \n", + "on move: O\n", + "X O \n", + "X O \n", + " X \n", + "on move: X\n", + "X O O \n", + "X O \n", + " X \n", + "on move: O\n", + "X O O \n", + "X O X \n", + " X \n", + "on move: X\n", + "X O O \n", + "X O X \n", + "O X \n", + "Episode 277, Total Reward: -1\n", + "Average Reward: 0.27075812274368233\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O \n", + "X X \n", + "O \n", + "on move: O\n", + " O \n", + "X X \n", + "O X \n", + "on move: X\n", + " O \n", + "X X O \n", + "O X \n", + "on move: O\n", + "X O \n", + "X X O \n", + "O X \n", + "Episode 278, Total Reward: 1\n", + "Average Reward: 0.2733812949640288\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X \n", + " X \n", + "O X \n", + "Episode 279, Total Reward: 1\n", + "Average Reward: 0.27598566308243727\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + "O O \n", + "X X \n", + " \n", + "on move: O\n", + "O O \n", + "X X \n", + " X \n", + "on move: X\n", + "O O \n", + "X X \n", + " O X \n", + "on move: O\n", + "O O \n", + "X X X \n", + " O X \n", + "Episode 280, Total Reward: 1\n", + "Average Reward: 0.2785714285714286\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + "O X \n", + "on move: O\n", + " X X \n", + " O \n", + "O X \n", + "on move: X\n", + "O X X \n", + " O \n", + "O X \n", + "on move: O\n", + "O X X \n", + " O \n", + "O X X \n", + "on move: X\n", + "O X X \n", + "O O \n", + "O X X \n", + "Episode 281, Total Reward: -1\n", + "Average Reward: 0.27402135231316727\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + " \n", + " O O \n", + "on move: O\n", + "X X \n", + " X \n", + " O O \n", + "on move: X\n", + "X X \n", + " O X \n", + " O O \n", + "on move: O\n", + "X X \n", + "X O X \n", + " O O \n", + "on move: X\n", + "X O X \n", + "X O X \n", + " O O \n", + "Episode 282, Total Reward: -1\n", + "Average Reward: 0.2695035460992908\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O \n", + "O \n", + " X \n", + "on move: O\n", + "X O \n", + "O X \n", + " X \n", + "on move: X\n", + "X O \n", + "O X \n", + "O X \n", + "on move: O\n", + "X O \n", + "O X X \n", + "O X \n", + "Episode 283, Total Reward: 1\n", + "Average Reward: 0.27208480565371024\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + "X O \n", + " \n", + " X \n", + "on move: X\n", + "X O O \n", + " \n", + " X \n", + "on move: O\n", + "X O O \n", + "X \n", + " X \n", + "on move: X\n", + "X O O \n", + "X \n", + " X O \n", + "on move: O\n", + "X O O \n", + "X X \n", + " X O \n", + "on move: X\n", + "X O O \n", + "X X O \n", + " X O \n", + "Episode 284, Total Reward: -1\n", + "Average Reward: 0.2676056338028169\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + " X \n", + "O X \n", + "on move: O\n", + "O \n", + "X X \n", + "O X \n", + "on move: X\n", + "O O \n", + "X X \n", + "O X \n", + "on move: O\n", + "O O \n", + "X X \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "O X X \n", + "on move: O\n", + "O O X \n", + "X O X \n", + "O X X \n", + "Episode 285, Total Reward: 1\n", + "Average Reward: 0.27017543859649124\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " X \n", + "X \n", + "on move: X\n", + "O \n", + " X \n", + "X O \n", + "on move: O\n", + "O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O O \n", + "X X \n", + "X O \n", + "on move: O\n", + "O O \n", + "X X \n", + "X O X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "X O X \n", + "X O X \n", + "Episode 286, Total Reward: 0\n", + "Average Reward: 0.2692307692307692\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " \n", + " O O \n", + "X X \n", + "on move: O\n", + " \n", + " O O \n", + "X X X \n", + "Episode 287, Total Reward: 1\n", + "Average Reward: 0.27177700348432055\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + " X \n", + "O X \n", + "on move: X\n", + " \n", + " X O \n", + "O X \n", + "on move: O\n", + " \n", + "X X O \n", + "O X \n", + "on move: X\n", + " O \n", + "X X O \n", + "O X \n", + "on move: O\n", + " O \n", + "X X O \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X X O \n", + "O X X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "O X X \n", + "Episode 288, Total Reward: 0\n", + "Average Reward: 0.2708333333333333\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " O X \n", + "on move: O\n", + " X \n", + " \n", + " O X \n", + "on move: X\n", + " X \n", + "O \n", + " O X \n", + "on move: O\n", + " X \n", + "O \n", + "X O X \n", + "on move: X\n", + " X \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X X \n", + "O O \n", + "X O X \n", + "on move: X\n", + "X X \n", + "O O O \n", + "X O X \n", + "Episode 289, Total Reward: -1\n", + "Average Reward: 0.2664359861591695\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + "X X \n", + " X \n", + "O O \n", + "on move: X\n", + "X X \n", + "O X \n", + "O O \n", + "on move: O\n", + "X X \n", + "O X X \n", + "O O \n", + "on move: X\n", + "X X O \n", + "O X X \n", + "O O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O O X \n", + "Episode 290, Total Reward: 1\n", + "Average Reward: 0.2689655172413793\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + "O X \n", + " O \n", + "on move: O\n", + "X X \n", + "O X \n", + " O \n", + "on move: X\n", + "X O X \n", + "O X \n", + " O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + " O \n", + "on move: X\n", + "X O X \n", + "O X X \n", + " O O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + "X O O \n", + "Episode 291, Total Reward: 1\n", + "Average Reward: 0.27147766323024053\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " O X \n", + " \n", + " \n", + "on move: O\n", + "X O X \n", + " \n", + " \n", + "on move: X\n", + "X O X \n", + "O \n", + " \n", + "on move: O\n", + "X O X \n", + "O \n", + "X \n", + "on move: X\n", + "X O X \n", + "O \n", + "X O \n", + "on move: O\n", + "X O X \n", + "O X \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O X \n", + "X O O \n", + "on move: O\n", + "X O X \n", + "O X X \n", + "X O O \n", + "Episode 292, Total Reward: 1\n", + "Average Reward: 0.273972602739726\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + " \n", + " X X \n", + "O O \n", + "on move: O\n", + " X \n", + " X X \n", + "O O \n", + "on move: X\n", + " O X \n", + " X X \n", + "O O \n", + "on move: O\n", + "X O X \n", + " X X \n", + "O O \n", + "on move: X\n", + "X O X \n", + " X X \n", + "O O O \n", + "Episode 293, Total Reward: -1\n", + "Average Reward: 0.2696245733788396\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + "O O \n", + " X \n", + "on move: O\n", + " X X \n", + "O O \n", + " X \n", + "on move: X\n", + " X X \n", + "O O \n", + " O X \n", + "on move: O\n", + "X X X \n", + "O O \n", + " O X \n", + "Episode 294, Total Reward: 1\n", + "Average Reward: 0.272108843537415\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " X \n", + " X O \n", + " \n", + "on move: X\n", + " O X \n", + " X O \n", + " \n", + "on move: O\n", + " O X \n", + " X O \n", + "X \n", + "Episode 295, Total Reward: 1\n", + "Average Reward: 0.2745762711864407\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " O X \n", + " \n", + "on move: O\n", + "O X \n", + " O X \n", + " X \n", + "on move: X\n", + "O X O \n", + " O X \n", + " X \n", + "on move: O\n", + "O X O \n", + " O X \n", + " X X \n", + "on move: X\n", + "O X O \n", + "O O X \n", + " X X \n", + "on move: O\n", + "O X O \n", + "O O X \n", + "X X X \n", + "Episode 296, Total Reward: 1\n", + "Average Reward: 0.27702702702702703\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + "X X \n", + "on move: X\n", + " O \n", + "O \n", + "X X \n", + "on move: O\n", + " O \n", + "O X \n", + "X X \n", + "on move: X\n", + " O \n", + "O X \n", + "X O X \n", + "on move: O\n", + " X O \n", + "O X \n", + "X O X \n", + "on move: X\n", + " X O \n", + "O X O \n", + "X O X \n", + "on move: O\n", + "X X O \n", + "O X O \n", + "X O X \n", + "Episode 297, Total Reward: 1\n", + "Average Reward: 0.27946127946127947\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + " X \n", + "on move: X\n", + "X O \n", + " O \n", + " X \n", + "on move: O\n", + "X X O \n", + " O \n", + " X \n", + "on move: X\n", + "X X O \n", + " O \n", + " O X \n", + "on move: O\n", + "X X O \n", + " X O \n", + " O X \n", + "Episode 298, Total Reward: 1\n", + "Average Reward: 0.28187919463087246\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X X \n", + " O \n", + "on move: X\n", + " O \n", + " X X \n", + " O \n", + "on move: O\n", + " O \n", + "X X X \n", + " O \n", + "Episode 299, Total Reward: 1\n", + "Average Reward: 0.2842809364548495\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + "O O \n", + " X \n", + "X \n", + "on move: O\n", + "O O \n", + " X \n", + "X X \n", + "on move: X\n", + "O O \n", + " O X \n", + "X X \n", + "on move: O\n", + "O O \n", + "X O X \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "X X O \n", + "Episode 300, Total Reward: -1\n", + "Average Reward: 0.28\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + "O X \n", + " O \n", + " X \n", + "on move: O\n", + "O X \n", + "X O \n", + " X \n", + "on move: X\n", + "O X O \n", + "X O \n", + " X \n", + "on move: O\n", + "O X O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O O \n", + "X X X \n", + "Episode 301, Total Reward: 1\n", + "Average Reward: 0.2823920265780731\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + "O X \n", + "O \n", + " X \n", + "on move: O\n", + "O X \n", + "O \n", + " X X \n", + "on move: X\n", + "O X \n", + "O O \n", + " X X \n", + "on move: O\n", + "O X X \n", + "O O \n", + " X X \n", + "on move: X\n", + "O X X \n", + "O O O \n", + " X X \n", + "Episode 302, Total Reward: -1\n", + "Average Reward: 0.2781456953642384\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " \n", + " X \n", + " X O \n", + "on move: X\n", + " \n", + " X O \n", + " X O \n", + "on move: O\n", + " \n", + " X O \n", + "X X O \n", + "on move: X\n", + "O \n", + " X O \n", + "X X O \n", + "on move: O\n", + "O \n", + "X X O \n", + "X X O \n", + "on move: X\n", + "O O \n", + "X X O \n", + "X X O \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "X X O \n", + "Episode 303, Total Reward: 1\n", + "Average Reward: 0.28052805280528054\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " X \n", + " \n", + "on move: X\n", + "O X \n", + " X \n", + " O \n", + "on move: O\n", + "O X X \n", + " X \n", + " O \n", + "on move: X\n", + "O X X \n", + "O X \n", + " O \n", + "on move: O\n", + "O X X \n", + "O X \n", + " O X \n", + "on move: X\n", + "O X X \n", + "O X O \n", + " O X \n", + "on move: O\n", + "O X X \n", + "O X O \n", + "X O X \n", + "Episode 304, Total Reward: 1\n", + "Average Reward: 0.28289473684210525\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + "O O \n", + " X \n", + "on move: O\n", + " X \n", + "O O \n", + " X X \n", + "on move: X\n", + " X \n", + "O O \n", + "O X X \n", + "on move: O\n", + " X \n", + "O O X \n", + "O X X \n", + "Episode 305, Total Reward: 1\n", + "Average Reward: 0.28524590163934427\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O X \n", + " \n", + "on move: X\n", + "X \n", + "O O X \n", + " \n", + "on move: O\n", + "X \n", + "O O X \n", + " X \n", + "on move: X\n", + "X \n", + "O O X \n", + "O X \n", + "on move: O\n", + "X X \n", + "O O X \n", + "O X \n", + "on move: X\n", + "X O X \n", + "O O X \n", + "O X \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "O X X \n", + "Episode 306, Total Reward: 1\n", + "Average Reward: 0.2875816993464052\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X X \n", + "O \n", + " \n", + "on move: X\n", + " X X \n", + "O \n", + " O \n", + "on move: O\n", + " X X \n", + "O \n", + "X O \n", + "on move: X\n", + " X X \n", + "O O \n", + "X O \n", + "on move: O\n", + " X X \n", + "O O X \n", + "X O \n", + "on move: X\n", + " X X \n", + "O O X \n", + "X O O \n", + "on move: O\n", + "X X X \n", + "O O X \n", + "X O O \n", + "Episode 307, Total Reward: 1\n", + "Average Reward: 0.2899022801302932\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O O \n", + " \n", + "on move: O\n", + "X X \n", + "O O \n", + " X \n", + "on move: X\n", + "X X \n", + "O O \n", + " X O \n", + "on move: O\n", + "X X \n", + "O O \n", + "X X O \n", + "on move: X\n", + "X X \n", + "O O O \n", + "X X O \n", + "Episode 308, Total Reward: -1\n", + "Average Reward: 0.2857142857142857\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X O \n", + "X O \n", + " \n", + "on move: O\n", + "X O \n", + "X O \n", + " X \n", + "on move: X\n", + "X O O \n", + "X O \n", + " X \n", + "on move: O\n", + "X O O \n", + "X O \n", + "X X \n", + "Episode 309, Total Reward: 1\n", + "Average Reward: 0.28802588996763756\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O \n", + "X X O \n", + " X \n", + "on move: X\n", + " O \n", + "X X O \n", + "O X \n", + "on move: O\n", + "X O \n", + "X X O \n", + "O X \n", + "Episode 310, Total Reward: 1\n", + "Average Reward: 0.2903225806451613\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + "X O \n", + "X X \n", + "on move: X\n", + " O O \n", + "X O \n", + "X X \n", + "on move: O\n", + " O O \n", + "X O \n", + "X X X \n", + "Episode 311, Total Reward: 1\n", + "Average Reward: 0.29260450160771706\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + " O \n", + "on move: O\n", + "X \n", + " \n", + "X O \n", + "on move: X\n", + "X \n", + " \n", + "X O O \n", + "on move: O\n", + "X X \n", + " \n", + "X O O \n", + "on move: X\n", + "X X \n", + " O \n", + "X O O \n", + "on move: O\n", + "X X \n", + "X O \n", + "X O O \n", + "Episode 312, Total Reward: 1\n", + "Average Reward: 0.2948717948717949\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + "X O \n", + "on move: X\n", + " X \n", + "O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O \n", + "X O \n", + "on move: O\n", + "X O X \n", + "O \n", + "X X O \n", + "on move: X\n", + "X O X \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 313, Total Reward: 0\n", + "Average Reward: 0.2939297124600639\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " X \n", + " \n", + "O X \n", + "on move: X\n", + "O X \n", + " \n", + "O X \n", + "on move: O\n", + "O X \n", + " \n", + "O X X \n", + "on move: X\n", + "O X O \n", + " \n", + "O X X \n", + "on move: O\n", + "O X O \n", + "X \n", + "O X X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "O X X \n", + "Episode 314, Total Reward: -1\n", + "Average Reward: 0.2898089171974522\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + "O \n", + " X O \n", + " X \n", + "on move: O\n", + "O \n", + "X X O \n", + " X \n", + "on move: X\n", + "O \n", + "X X O \n", + "O X \n", + "on move: O\n", + "O X \n", + "X X O \n", + "O X \n", + "on move: X\n", + "O O X \n", + "X X O \n", + "O X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "O X X \n", + "Episode 315, Total Reward: 0\n", + "Average Reward: 0.28888888888888886\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O \n", + " \n", + "X X \n", + "on move: X\n", + "O \n", + " O \n", + "X X \n", + "on move: O\n", + "O \n", + " O \n", + "X X X \n", + "Episode 316, Total Reward: 1\n", + "Average Reward: 0.2911392405063291\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O \n", + "X O \n", + "on move: X\n", + "O X \n", + "X O \n", + "X O \n", + "on move: O\n", + "O X \n", + "X O \n", + "X X O \n", + "on move: X\n", + "O X O \n", + "X O \n", + "X X O \n", + "Episode 317, Total Reward: -1\n", + "Average Reward: 0.2870662460567823\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " \n", + " X \n", + " X O \n", + "on move: X\n", + " \n", + "O X \n", + " X O \n", + "on move: O\n", + " X \n", + "O X \n", + " X O \n", + "on move: X\n", + " X O \n", + "O X \n", + " X O \n", + "on move: O\n", + " X O \n", + "O X X \n", + " X O \n", + "Episode 318, Total Reward: 1\n", + "Average Reward: 0.2893081761006289\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + " X X \n", + "on move: X\n", + "O \n", + " O \n", + " X X \n", + "on move: O\n", + "O \n", + " O X \n", + " X X \n", + "on move: X\n", + "O \n", + " O X \n", + "O X X \n", + "on move: O\n", + "O X \n", + " O X \n", + "O X X \n", + "on move: X\n", + "O X \n", + "O O X \n", + "O X X \n", + "Episode 319, Total Reward: -1\n", + "Average Reward: 0.2852664576802508\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O \n", + "X O \n", + "on move: X\n", + "O X \n", + "X O \n", + "X O \n", + "on move: O\n", + "O X X \n", + "X O \n", + "X O \n", + "on move: X\n", + "O X X \n", + "X O O \n", + "X O \n", + "Episode 320, Total Reward: -1\n", + "Average Reward: 0.28125\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X X O \n", + " \n", + " \n", + "on move: X\n", + "X X O \n", + " O \n", + " \n", + "on move: O\n", + "X X O \n", + "X O \n", + " \n", + "on move: X\n", + "X X O \n", + "X O O \n", + " \n", + "on move: O\n", + "X X O \n", + "X O O \n", + " X \n", + "on move: X\n", + "X X O \n", + "X O O \n", + "O X \n", + "Episode 321, Total Reward: -1\n", + "Average Reward: 0.2772585669781931\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " X \n", + "O X \n", + "O \n", + "on move: O\n", + " X \n", + "O X \n", + "O X \n", + "on move: X\n", + " X \n", + "O X \n", + "O O X \n", + "on move: O\n", + " X \n", + "O X X \n", + "O O X \n", + "Episode 322, Total Reward: 1\n", + "Average Reward: 0.2795031055900621\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + "O X \n", + " O \n", + "X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + "X \n", + "on move: X\n", + "O X O \n", + "X X O \n", + "X O \n", + "Episode 323, Total Reward: -1\n", + "Average Reward: 0.2755417956656347\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + " \n", + "O X \n", + "on move: O\n", + "O X X \n", + " \n", + "O X \n", + "on move: X\n", + "O X X \n", + " O \n", + "O X \n", + "on move: O\n", + "O X X \n", + " X O \n", + "O X \n", + "Episode 324, Total Reward: 1\n", + "Average Reward: 0.2777777777777778\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O X \n", + " \n", + " X \n", + "on move: X\n", + " O X \n", + "O \n", + " X \n", + "on move: O\n", + " O X \n", + "O \n", + " X X \n", + "on move: X\n", + " O X \n", + "O O \n", + " X X \n", + "on move: O\n", + " O X \n", + "O O X \n", + " X X \n", + "Episode 325, Total Reward: 1\n", + "Average Reward: 0.28\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X O \n", + " \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + " \n", + " X O \n", + "O X \n", + "on move: O\n", + " X \n", + " X O \n", + "O X \n", + "on move: X\n", + " O X \n", + " X O \n", + "O X \n", + "on move: O\n", + " O X \n", + "X X O \n", + "O X \n", + "on move: X\n", + " O X \n", + "X X O \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 326, Total Reward: 0\n", + "Average Reward: 0.2791411042944785\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " \n", + " X \n", + "X O O \n", + "on move: O\n", + " \n", + "X X \n", + "X O O \n", + "on move: X\n", + " O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "X O \n", + "X X \n", + "X O O \n", + "Episode 327, Total Reward: 1\n", + "Average Reward: 0.28134556574923547\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + " X X \n", + "on move: X\n", + " \n", + " O O \n", + " X X \n", + "on move: O\n", + " X \n", + " O O \n", + " X X \n", + "on move: X\n", + " X \n", + "O O O \n", + " X X \n", + "Episode 328, Total Reward: -1\n", + "Average Reward: 0.2774390243902439\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X \n", + "O X \n", + "on move: O\n", + " O \n", + " X X \n", + "O X \n", + "on move: X\n", + " O O \n", + " X X \n", + "O X \n", + "on move: O\n", + " O O \n", + " X X \n", + "O X X \n", + "on move: X\n", + "O O O \n", + " X X \n", + "O X X \n", + "Episode 329, Total Reward: -1\n", + "Average Reward: 0.2735562310030395\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " X O \n", + " X \n", + "on move: X\n", + " \n", + "O X O \n", + " X \n", + "on move: O\n", + " \n", + "O X O \n", + " X X \n", + "on move: X\n", + " \n", + "O X O \n", + "O X X \n", + "on move: O\n", + "X \n", + "O X O \n", + "O X X \n", + "Episode 330, Total Reward: 1\n", + "Average Reward: 0.27575757575757576\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X O \n", + " O \n", + " \n", + "on move: O\n", + "X X O \n", + " O X \n", + " \n", + "on move: X\n", + "X X O \n", + " O X \n", + "O \n", + "Episode 331, Total Reward: -1\n", + "Average Reward: 0.2719033232628399\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O X \n", + " X \n", + " \n", + "on move: X\n", + " O X \n", + " X \n", + "O \n", + "on move: O\n", + " O X \n", + " X X \n", + "O \n", + "on move: X\n", + "O O X \n", + " X X \n", + "O \n", + "on move: O\n", + "O O X \n", + " X X \n", + "O X \n", + "on move: X\n", + "O O X \n", + " X X \n", + "O X O \n", + "on move: O\n", + "O O X \n", + "X X X \n", + "O X O \n", + "Episode 332, Total Reward: 1\n", + "Average Reward: 0.2740963855421687\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + " X X \n", + " O \n", + "O \n", + "on move: O\n", + " X X \n", + " O \n", + "O X \n", + "on move: X\n", + " X X \n", + " O \n", + "O O X \n", + "on move: O\n", + "X X X \n", + " O \n", + "O O X \n", + "Episode 333, Total Reward: 1\n", + "Average Reward: 0.27627627627627627\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + "X \n", + " O X \n", + " \n", + "on move: X\n", + "X \n", + " O X \n", + " O \n", + "on move: O\n", + "X \n", + " O X \n", + " X O \n", + "on move: X\n", + "X O \n", + " O X \n", + " X O \n", + "on move: O\n", + "X O \n", + "X O X \n", + " X O \n", + "on move: X\n", + "X O \n", + "X O X \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X O X \n", + "O X O \n", + "Episode 334, Total Reward: 0\n", + "Average Reward: 0.2754491017964072\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O O \n", + " X \n", + " \n", + "on move: O\n", + "X O O \n", + " X \n", + "X \n", + "on move: X\n", + "X O O \n", + " O X \n", + "X \n", + "on move: O\n", + "X O O \n", + "X O X \n", + "X \n", + "Episode 335, Total Reward: 1\n", + "Average Reward: 0.27761194029850744\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X X \n", + " \n", + "O \n", + "on move: X\n", + " X X \n", + " \n", + "O O \n", + "on move: O\n", + " X X \n", + " \n", + "O O X \n", + "on move: X\n", + " X X \n", + " O \n", + "O O X \n", + "on move: O\n", + "X X X \n", + " O \n", + "O O X \n", + "Episode 336, Total Reward: 1\n", + "Average Reward: 0.27976190476190477\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X \n", + " X \n", + "O \n", + "on move: X\n", + "X O \n", + " X \n", + "O \n", + "on move: O\n", + "X O \n", + "X X \n", + "O \n", + "on move: X\n", + "X O \n", + "X X \n", + "O O \n", + "on move: O\n", + "X O \n", + "X X \n", + "O X O \n", + "on move: X\n", + "X O O \n", + "X X \n", + "O X O \n", + "on move: O\n", + "X O O \n", + "X X X \n", + "O X O \n", + "Episode 337, Total Reward: 1\n", + "Average Reward: 0.2818991097922849\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " O X \n", + " X \n", + "O \n", + "on move: O\n", + "X O X \n", + " X \n", + "O \n", + "on move: X\n", + "X O X \n", + " X \n", + "O O \n", + "on move: O\n", + "X O X \n", + " X X \n", + "O O \n", + "on move: X\n", + "X O X \n", + " X X \n", + "O O O \n", + "Episode 338, Total Reward: -1\n", + "Average Reward: 0.2781065088757396\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O X \n", + " O \n", + "X X \n", + "on move: X\n", + " O X \n", + " O O \n", + "X X \n", + "on move: O\n", + " O X \n", + " O O \n", + "X X X \n", + "Episode 339, Total Reward: 1\n", + "Average Reward: 0.28023598820059\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X \n", + "X O \n", + " O \n", + "on move: O\n", + "X \n", + "X O \n", + " X O \n", + "on move: X\n", + "X O \n", + "X O \n", + " X O \n", + "Episode 340, Total Reward: -1\n", + "Average Reward: 0.27647058823529413\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X O \n", + " \n", + "O \n", + "on move: O\n", + "X X O \n", + " \n", + "O X \n", + "on move: X\n", + "X X O \n", + " O \n", + "O X \n", + "Episode 341, Total Reward: -1\n", + "Average Reward: 0.2727272727272727\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + "X \n", + "on move: X\n", + " \n", + "O X \n", + "X O \n", + "on move: O\n", + " \n", + "O X \n", + "X X O \n", + "on move: X\n", + " O \n", + "O X \n", + "X X O \n", + "on move: O\n", + "X O \n", + "O X \n", + "X X O \n", + "on move: X\n", + "X O \n", + "O O X \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 342, Total Reward: 0\n", + "Average Reward: 0.2719298245614035\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X X \n", + "O \n", + " \n", + "on move: X\n", + " X X \n", + "O \n", + " O \n", + "on move: O\n", + " X X \n", + "O X \n", + " O \n", + "on move: X\n", + " X X \n", + "O X \n", + " O O \n", + "on move: O\n", + " X X \n", + "O X \n", + "X O O \n", + "on move: X\n", + "O X X \n", + "O X \n", + "X O O \n", + "on move: O\n", + "O X X \n", + "O X X \n", + "X O O \n", + "Episode 343, Total Reward: 1\n", + "Average Reward: 0.27405247813411077\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + " X \n", + "O X \n", + "on move: O\n", + " O \n", + " X X \n", + "O X \n", + "on move: X\n", + " O \n", + " X X \n", + "O X O \n", + "on move: O\n", + " O \n", + "X X X \n", + "O X O \n", + "Episode 344, Total Reward: 1\n", + "Average Reward: 0.2761627906976744\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + "O \n", + " \n", + "X \n", + "on move: O\n", + "O X \n", + " \n", + "X \n", + "on move: X\n", + "O X \n", + "O \n", + "X \n", + "on move: O\n", + "O X \n", + "O X \n", + "X \n", + "on move: X\n", + "O X \n", + "O X \n", + "X O \n", + "on move: O\n", + "O X \n", + "O X X \n", + "X O \n", + "Episode 345, Total Reward: 1\n", + "Average Reward: 0.2782608695652174\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " \n", + "X \n", + "on move: X\n", + "X O \n", + "O \n", + "X \n", + "on move: O\n", + "X O \n", + "O X \n", + "X \n", + "on move: X\n", + "X O \n", + "O X \n", + "X O \n", + "on move: O\n", + "X O \n", + "O X X \n", + "X O \n", + "on move: X\n", + "X O O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X O X \n", + "Episode 346, Total Reward: 1\n", + "Average Reward: 0.28034682080924855\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + "X \n", + "X \n", + " O \n", + "on move: X\n", + "X O \n", + "X \n", + " O \n", + "on move: O\n", + "X O \n", + "X \n", + "X O \n", + "Episode 347, Total Reward: 1\n", + "Average Reward: 0.2824207492795389\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " \n", + "X \n", + "X O \n", + "on move: X\n", + " \n", + "X O \n", + "X O \n", + "on move: O\n", + "X \n", + "X O \n", + "X O \n", + "Episode 348, Total Reward: 1\n", + "Average Reward: 0.28448275862068967\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + "X X \n", + "O \n", + "on move: X\n", + " O \n", + "X X \n", + "O \n", + "on move: O\n", + " O \n", + "X X \n", + "O X \n", + "on move: X\n", + " O O \n", + "X X \n", + "O X \n", + "on move: O\n", + "X O O \n", + "X X \n", + "O X \n", + "on move: X\n", + "X O O \n", + "X X \n", + "O O X \n", + "on move: O\n", + "X O O \n", + "X X X \n", + "O O X \n", + "Episode 349, Total Reward: 1\n", + "Average Reward: 0.28653295128939826\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + "X \n", + "X \n", + "on move: X\n", + " O O \n", + "X \n", + "X \n", + "on move: O\n", + "X O O \n", + "X \n", + "X \n", + "Episode 350, Total Reward: 1\n", + "Average Reward: 0.2885714285714286\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + " X O \n", + " O \n", + "X \n", + "on move: O\n", + "X X O \n", + " O \n", + "X \n", + "on move: X\n", + "X X O \n", + " O \n", + "X O \n", + "on move: O\n", + "X X O \n", + "X O \n", + "X O \n", + "Episode 351, Total Reward: 1\n", + "Average Reward: 0.2905982905982906\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + "O O \n", + " X \n", + " X \n", + "on move: O\n", + "O O \n", + " X X \n", + " X \n", + "on move: X\n", + "O O \n", + " X X \n", + " X O \n", + "on move: O\n", + "O O \n", + "X X X \n", + " X O \n", + "Episode 352, Total Reward: 1\n", + "Average Reward: 0.29261363636363635\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X \n", + " X \n", + "on move: X\n", + "O O \n", + "X \n", + " X \n", + "on move: O\n", + "O O \n", + "X X \n", + " X \n", + "on move: X\n", + "O O \n", + "X X O \n", + " X \n", + "on move: O\n", + "O X O \n", + "X X O \n", + " X \n", + "Episode 353, Total Reward: 1\n", + "Average Reward: 0.29461756373937675\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + "X X \n", + "O \n", + " \n", + "on move: X\n", + "X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X X \n", + "O \n", + " O \n", + "Episode 354, Total Reward: 1\n", + "Average Reward: 0.2966101694915254\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X X \n", + " \n", + "on move: X\n", + " \n", + "O X X \n", + "O \n", + "on move: O\n", + " X \n", + "O X X \n", + "O \n", + "on move: X\n", + "O X \n", + "O X X \n", + "O \n", + "Episode 355, Total Reward: -1\n", + "Average Reward: 0.29295774647887324\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " \n", + "X X \n", + "O \n", + "on move: X\n", + " \n", + "X X \n", + "O O \n", + "on move: O\n", + " \n", + "X X \n", + "O O X \n", + "on move: X\n", + "O \n", + "X X \n", + "O O X \n", + "on move: O\n", + "O \n", + "X X X \n", + "O O X \n", + "Episode 356, Total Reward: 1\n", + "Average Reward: 0.2949438202247191\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " O \n", + " X \n", + "X O \n", + "on move: O\n", + " O \n", + " X X \n", + "X O \n", + "on move: X\n", + "O O \n", + " X X \n", + "X O \n", + "on move: O\n", + "O O \n", + "X X X \n", + "X O \n", + "Episode 357, Total Reward: 1\n", + "Average Reward: 0.2969187675070028\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O \n", + "X O \n", + "on move: X\n", + " X \n", + "X O O \n", + "X O \n", + "on move: O\n", + " X \n", + "X O O \n", + "X X O \n", + "on move: X\n", + "O X \n", + "X O O \n", + "X X O \n", + "Episode 358, Total Reward: -1\n", + "Average Reward: 0.29329608938547486\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + "X \n", + "X O \n", + " \n", + "on move: X\n", + "X \n", + "X O \n", + "O \n", + "on move: O\n", + "X \n", + "X X O \n", + "O \n", + "on move: X\n", + "X O \n", + "X X O \n", + "O \n", + "on move: O\n", + "X O \n", + "X X O \n", + "O X \n", + "Episode 359, Total Reward: 1\n", + "Average Reward: 0.29526462395543174\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X X O \n", + " X \n", + " O \n", + "on move: X\n", + "X X O \n", + "O X \n", + " O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + " O \n", + "on move: X\n", + "X X O \n", + "O X X \n", + "O O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 360, Total Reward: 1\n", + "Average Reward: 0.2972222222222222\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + " X \n", + " O X \n", + "O \n", + "on move: O\n", + " X \n", + "X O X \n", + "O \n", + "on move: X\n", + " X \n", + "X O X \n", + "O O \n", + "on move: O\n", + " X X \n", + "X O X \n", + "O O \n", + "on move: X\n", + "O X X \n", + "X O X \n", + "O O \n", + "on move: O\n", + "O X X \n", + "X O X \n", + "O O X \n", + "Episode 361, Total Reward: 1\n", + "Average Reward: 0.29916897506925205\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O \n", + "X X \n", + "on move: O\n", + "O O \n", + "X O X \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "X X O \n", + "Episode 362, Total Reward: -1\n", + "Average Reward: 0.2955801104972376\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + "X X \n", + " \n", + "O \n", + "on move: X\n", + "X X \n", + " \n", + "O O \n", + "on move: O\n", + "X X X \n", + " \n", + "O O \n", + "Episode 363, Total Reward: 1\n", + "Average Reward: 0.2975206611570248\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O \n", + "X O \n", + "X X \n", + "on move: X\n", + " O \n", + "X O \n", + "X X O \n", + "on move: O\n", + "X O \n", + "X O \n", + "X X O \n", + "Episode 364, Total Reward: 1\n", + "Average Reward: 0.29945054945054944\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X X \n", + " O \n", + " \n", + "on move: X\n", + " X X \n", + " O O \n", + " \n", + "on move: O\n", + " X X \n", + " O O \n", + " X \n", + "on move: X\n", + " X X \n", + " O O \n", + " O X \n", + "on move: O\n", + " X X \n", + "X O O \n", + " O X \n", + "on move: X\n", + "O X X \n", + "X O O \n", + " O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "X O X \n", + "Episode 365, Total Reward: 0\n", + "Average Reward: 0.29863013698630136\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " \n", + " X \n", + " X O \n", + "on move: X\n", + " \n", + " X \n", + "O X O \n", + "on move: O\n", + " X \n", + " X \n", + "O X O \n", + "on move: X\n", + " O X \n", + " X \n", + "O X O \n", + "on move: O\n", + " O X \n", + "X X \n", + "O X O \n", + "on move: X\n", + "O O X \n", + "X X \n", + "O X O \n", + "on move: O\n", + "O O X \n", + "X X X \n", + "O X O \n", + "Episode 366, Total Reward: 1\n", + "Average Reward: 0.3005464480874317\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " O \n", + "X X \n", + "on move: X\n", + " \n", + " O O \n", + "X X \n", + "on move: O\n", + " X \n", + " O O \n", + "X X \n", + "on move: X\n", + " O X \n", + " O O \n", + "X X \n", + "on move: O\n", + " O X \n", + " O O \n", + "X X X \n", + "Episode 367, Total Reward: 1\n", + "Average Reward: 0.3024523160762943\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + "O X \n", + " \n", + " \n", + "on move: O\n", + "O X \n", + " \n", + " X \n", + "on move: X\n", + "O X \n", + " O \n", + " X \n", + "on move: O\n", + "O X \n", + " O X \n", + " X \n", + "on move: X\n", + "O X \n", + "O O X \n", + " X \n", + "on move: O\n", + "O X \n", + "O O X \n", + " X X \n", + "on move: X\n", + "O X \n", + "O O X \n", + "O X X \n", + "Episode 368, Total Reward: -1\n", + "Average Reward: 0.29891304347826086\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O \n", + "X \n", + "on move: X\n", + "X \n", + "O \n", + "X O \n", + "on move: O\n", + "X \n", + "O \n", + "X O X \n", + "on move: X\n", + "X \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X X \n", + "O O \n", + "X O X \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X O X \n", + "on move: O\n", + "X X O \n", + "O X O \n", + "X O X \n", + "Episode 369, Total Reward: 1\n", + "Average Reward: 0.3008130081300813\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O \n", + " X X \n", + "on move: X\n", + " \n", + "O \n", + "O X X \n", + "on move: O\n", + "X \n", + "O \n", + "O X X \n", + "on move: X\n", + "X O \n", + "O \n", + "O X X \n", + "on move: O\n", + "X X O \n", + "O \n", + "O X X \n", + "on move: X\n", + "X X O \n", + "O O \n", + "O X X \n", + "Episode 370, Total Reward: -1\n", + "Average Reward: 0.2972972972972973\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + " X O \n", + "O X \n", + " X \n", + "on move: X\n", + "O X O \n", + "O X \n", + " X \n", + "on move: O\n", + "O X O \n", + "O X \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O X O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O X O \n", + "X X X \n", + "Episode 371, Total Reward: 1\n", + "Average Reward: 0.2991913746630728\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " X O \n", + " X \n", + " \n", + "on move: X\n", + " X O \n", + " O X \n", + " \n", + "on move: O\n", + " X O \n", + "X O X \n", + " \n", + "on move: X\n", + "O X O \n", + "X O X \n", + " \n", + "on move: O\n", + "O X O \n", + "X O X \n", + "X \n", + "on move: X\n", + "O X O \n", + "X O X \n", + "X O \n", + "Episode 372, Total Reward: -1\n", + "Average Reward: 0.2956989247311828\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " \n", + "O X \n", + "O X \n", + "on move: O\n", + " \n", + "O X \n", + "O X X \n", + "on move: X\n", + " O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O \n", + "O X \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "O X X \n", + "Episode 373, Total Reward: 1\n", + "Average Reward: 0.2975871313672922\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + " X \n", + "on move: X\n", + " O \n", + " O X \n", + " X \n", + "on move: O\n", + " O X \n", + " O X \n", + " X \n", + "on move: X\n", + " O X \n", + " O X \n", + " X O \n", + "on move: O\n", + " O X \n", + "X O X \n", + " X O \n", + "on move: X\n", + "O O X \n", + "X O X \n", + " X O \n", + "Episode 374, Total Reward: -1\n", + "Average Reward: 0.29411764705882354\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X \n", + " O O \n", + " \n", + "on move: O\n", + "X X \n", + " O O \n", + " X \n", + "on move: X\n", + "X X O \n", + " O O \n", + " X \n", + "on move: O\n", + "X X O \n", + " O O \n", + " X X \n", + "on move: X\n", + "X X O \n", + "O O O \n", + " X X \n", + "Episode 375, Total Reward: -1\n", + "Average Reward: 0.2906666666666667\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X \n", + "O X \n", + " O \n", + "on move: X\n", + "X X \n", + "O X \n", + "O O \n", + "on move: O\n", + "X X \n", + "O X \n", + "O O X \n", + "Episode 376, Total Reward: 1\n", + "Average Reward: 0.2925531914893617\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O \n", + " O X \n", + "on move: X\n", + "O X \n", + "X O \n", + " O X \n", + "on move: O\n", + "O X \n", + "X O \n", + "X O X \n", + "on move: X\n", + "O X \n", + "X O O \n", + "X O X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "X O X \n", + "Episode 377, Total Reward: 0\n", + "Average Reward: 0.2917771883289125\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X \n", + " X \n", + "O \n", + "on move: X\n", + "X O \n", + " X \n", + "O \n", + "on move: O\n", + "X O \n", + "X X \n", + "O \n", + "on move: X\n", + "X O \n", + "X X O \n", + "O \n", + "on move: O\n", + "X O \n", + "X X O \n", + "O X \n", + "on move: X\n", + "X O O \n", + "X X O \n", + "O X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + "O X X \n", + "Episode 378, Total Reward: 1\n", + "Average Reward: 0.29365079365079366\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + "X O \n", + " O \n", + "X X \n", + "on move: X\n", + "X O O \n", + " O \n", + "X X \n", + "on move: O\n", + "X O O \n", + "X O \n", + "X X \n", + "Episode 379, Total Reward: 1\n", + "Average Reward: 0.2955145118733509\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " X O \n", + " \n", + "on move: X\n", + "O X \n", + " X O \n", + " \n", + "on move: O\n", + "O X \n", + " X O \n", + "X \n", + "Episode 380, Total Reward: 1\n", + "Average Reward: 0.29736842105263156\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O X \n", + " \n", + "on move: X\n", + "X \n", + "O O X \n", + " \n", + "on move: O\n", + "X \n", + "O O X \n", + " X \n", + "on move: X\n", + "X O \n", + "O O X \n", + " X \n", + "on move: O\n", + "X O \n", + "O O X \n", + "X X \n", + "on move: X\n", + "X O \n", + "O O X \n", + "X X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 381, Total Reward: 0\n", + "Average Reward: 0.29658792650918636\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X \n", + "X O \n", + "O X \n", + "on move: X\n", + " X \n", + "X O O \n", + "O X \n", + "on move: O\n", + " X \n", + "X O O \n", + "O X X \n", + "on move: X\n", + "O X \n", + "X O O \n", + "O X X \n", + "on move: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Episode 382, Total Reward: 0\n", + "Average Reward: 0.29581151832460734\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + "X X \n", + " \n", + "on move: X\n", + "O O \n", + "X X \n", + " \n", + "on move: O\n", + "O O \n", + "X X \n", + "X \n", + "on move: X\n", + "O O \n", + "X X \n", + "X O \n", + "on move: O\n", + "O O \n", + "X X \n", + "X O X \n", + "on move: X\n", + "O O O \n", + "X X \n", + "X O X \n", + "Episode 383, Total Reward: -1\n", + "Average Reward: 0.2924281984334204\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O X \n", + "X \n", + "on move: X\n", + " \n", + "O X O \n", + "X \n", + "on move: O\n", + " X \n", + "O X O \n", + "X \n", + "Episode 384, Total Reward: 1\n", + "Average Reward: 0.2942708333333333\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + "O \n", + "on move: O\n", + " X \n", + " \n", + "O X \n", + "on move: X\n", + " X \n", + " O \n", + "O X \n", + "on move: O\n", + " X X \n", + " O \n", + "O X \n", + "on move: X\n", + " X X \n", + " O \n", + "O O X \n", + "on move: O\n", + " X X \n", + " X O \n", + "O O X \n", + "on move: X\n", + " X X \n", + "O X O \n", + "O O X \n", + "on move: O\n", + "X X X \n", + "O X O \n", + "O O X \n", + "Episode 385, Total Reward: 1\n", + "Average Reward: 0.2961038961038961\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + "O \n", + " \n", + " X \n", + "on move: O\n", + "O \n", + " \n", + " X X \n", + "on move: X\n", + "O \n", + " \n", + "O X X \n", + "on move: O\n", + "O X \n", + " \n", + "O X X \n", + "on move: X\n", + "O X \n", + " O \n", + "O X X \n", + "on move: O\n", + "O X \n", + "X O \n", + "O X X \n", + "on move: X\n", + "O O X \n", + "X O \n", + "O X X \n", + "on move: O\n", + "O O X \n", + "X O X \n", + "O X X \n", + "Episode 386, Total Reward: 1\n", + "Average Reward: 0.2979274611398964\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + " X O \n", + "on move: O\n", + " X \n", + " \n", + " X O \n", + "on move: X\n", + " X \n", + " O \n", + " X O \n", + "on move: O\n", + "X X \n", + " O \n", + " X O \n", + "on move: X\n", + "X X \n", + "O O \n", + " X O \n", + "on move: O\n", + "X X \n", + "O O X \n", + " X O \n", + "on move: X\n", + "X O X \n", + "O O X \n", + " X O \n", + "on move: O\n", + "X O X \n", + "O O X \n", + "X X O \n", + "Episode 387, Total Reward: 0\n", + "Average Reward: 0.2971576227390181\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + "X \n", + " X \n", + " O \n", + "on move: X\n", + "X \n", + "O X \n", + " O \n", + "on move: O\n", + "X \n", + "O X \n", + "X O \n", + "on move: X\n", + "X O \n", + "O X \n", + "X O \n", + "on move: O\n", + "X X O \n", + "O X \n", + "X O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "X O O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "X O O \n", + "Episode 388, Total Reward: 0\n", + "Average Reward: 0.2963917525773196\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + "O \n", + " X \n", + " \n", + "on move: O\n", + "O \n", + " X \n", + " X \n", + "on move: X\n", + "O \n", + "O X \n", + " X \n", + "on move: O\n", + "O \n", + "O X X \n", + " X \n", + "on move: X\n", + "O \n", + "O X X \n", + " O X \n", + "on move: O\n", + "O \n", + "O X X \n", + "X O X \n", + "on move: X\n", + "O O \n", + "O X X \n", + "X O X \n", + "on move: O\n", + "O O X \n", + "O X X \n", + "X O X \n", + "Episode 389, Total Reward: 1\n", + "Average Reward: 0.2982005141388175\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O \n", + "X X \n", + " O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "X O \n", + "X X \n", + "X O O \n", + "Episode 390, Total Reward: 1\n", + "Average Reward: 0.3\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O O \n", + "X X \n", + " \n", + "on move: O\n", + " O O \n", + "X X \n", + " X \n", + "on move: X\n", + " O O \n", + "X X \n", + " X O \n", + "on move: O\n", + "X O O \n", + "X X \n", + " X O \n", + "on move: X\n", + "X O O \n", + "X X O \n", + " X O \n", + "Episode 391, Total Reward: -1\n", + "Average Reward: 0.2966751918158568\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + "O \n", + "X O \n", + " X \n", + "on move: O\n", + "O X \n", + "X O \n", + " X \n", + "on move: X\n", + "O X \n", + "X O O \n", + " X \n", + "on move: O\n", + "O X \n", + "X O O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "X O O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O O \n", + "X X X \n", + "Episode 392, Total Reward: 1\n", + "Average Reward: 0.29846938775510207\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + " X O \n", + "X X \n", + "on move: X\n", + " O \n", + "O X O \n", + "X X \n", + "on move: O\n", + "X O \n", + "O X O \n", + "X X \n", + "on move: X\n", + "X O \n", + "O X O \n", + "X X O \n", + "Episode 393, Total Reward: -1\n", + "Average Reward: 0.2951653944020356\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + "X X \n", + " \n", + " O \n", + "on move: X\n", + "X X \n", + "O \n", + " O \n", + "on move: O\n", + "X X \n", + "O \n", + "X O \n", + "on move: X\n", + "X O X \n", + "O \n", + "X O \n", + "on move: O\n", + "X O X \n", + "O \n", + "X X O \n", + "on move: X\n", + "X O X \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "X X O \n", + "Episode 394, Total Reward: 1\n", + "Average Reward: 0.2969543147208122\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + " X O \n", + "O X \n", + " X \n", + "on move: X\n", + " X O \n", + "O X \n", + " O X \n", + "on move: O\n", + " X O \n", + "O X \n", + "X O X \n", + "on move: X\n", + "O X O \n", + "O X \n", + "X O X \n", + "on move: O\n", + "O X O \n", + "O X X \n", + "X O X \n", + "Episode 395, Total Reward: 0\n", + "Average Reward: 0.29620253164556964\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X X \n", + " \n", + "on move: X\n", + " O \n", + "X X O \n", + " \n", + "on move: O\n", + " O \n", + "X X O \n", + " X \n", + "on move: X\n", + " O \n", + "X X O \n", + " O X \n", + "on move: O\n", + "X O \n", + "X X O \n", + " O X \n", + "Episode 396, Total Reward: 1\n", + "Average Reward: 0.29797979797979796\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + "X \n", + "on move: X\n", + "O \n", + " O X \n", + "X \n", + "on move: O\n", + "O \n", + " O X \n", + "X X \n", + "on move: X\n", + "O O \n", + " O X \n", + "X X \n", + "on move: O\n", + "O O \n", + " O X \n", + "X X X \n", + "Episode 397, Total Reward: 1\n", + "Average Reward: 0.29974811083123426\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " X \n", + " O \n", + "X \n", + "on move: X\n", + " X \n", + "O O \n", + "X \n", + "on move: O\n", + " X X \n", + "O O \n", + "X \n", + "on move: X\n", + " X X \n", + "O O \n", + "X O \n", + "on move: O\n", + "X X X \n", + "O O \n", + "X O \n", + "Episode 398, Total Reward: 1\n", + "Average Reward: 0.3015075376884422\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + " X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + " O \n", + "O X \n", + "X X \n", + "on move: X\n", + "O O \n", + "O X \n", + "X X \n", + "on move: O\n", + "O O \n", + "O X \n", + "X X X \n", + "Episode 399, Total Reward: 1\n", + "Average Reward: 0.3032581453634085\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X O \n", + " X \n", + "on move: X\n", + " O \n", + "X O \n", + " X \n", + "on move: O\n", + " O \n", + "X O X \n", + " X \n", + "on move: X\n", + " O \n", + "X O X \n", + " X O \n", + "on move: O\n", + "X O \n", + "X O X \n", + " X O \n", + "on move: X\n", + "X O O \n", + "X O X \n", + " X O \n", + "on move: O\n", + "X O O \n", + "X O X \n", + "X X O \n", + "Episode 400, Total Reward: 1\n", + "Average Reward: 0.305\n" + ] + } + ], + "source": [ + "env = TicTacToeEnv()\n", + "\n", + "agent = RandomTicTacToeAgent(symbol=1)\n", + "\n", + "num_episodes = 400\n", + "collected_rewards = []\n", + "\n", + "oom = 1\n", + "\n", + "for i in range(num_episodes):\n", + " state, _ = env.reset() \n", + " total_reward = 0\n", + " done = False\n", + " om = oom \n", + "\n", + " for j in range(9): \n", + " moves = env.move_generator() \n", + "\n", + " if not moves:\n", + " break\n", + "\n", + " if len(moves) == 1:\n", + " move = moves[0]\n", + " else:\n", + " move = agent.get_action(moves)\n", + "\n", + " next_state, reward, done, info = env.step(move)\n", + " total_reward += reward\n", + " state = next_state\n", + "\n", + " env.render()\n", + "\n", + " if done:\n", + " break\n", + "\n", + " om = -om\n", + "\n", + " collected_rewards.append(total_reward)\n", + "\n", + " print(f\"Episode {i+1}, Total Reward: {total_reward}\")\n", + " average_reward = sum(collected_rewards) / len(collected_rewards)\n", + " print(f\"Average Reward: {average_reward}\")\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}