MII/mai/lab6.ipynb

13287 lines
270 KiB
Plaintext
Raw Permalink Normal View History

2024-11-30 09:53:42 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Лабораторная работа 6\n",
"\n",
"**Крестики-нолики**\thttps://github.com/nczempin/gym-tic-tac-toe/tree/master\n",
"\n",
"#### **Перевод среды на Gymnasium**"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import gymnasium as gym\n",
"from gymnasium import spaces\n",
"\n",
"class TicTacToeEnv(gym.Env):\n",
" metadata = {'render.modes': ['human']}\n",
" \n",
" symbols = ['O', ' ', 'X']\n",
"\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.action_space = spaces.Discrete(9)\n",
" self.observation_space = spaces.Discrete(9 * 3 * 2)\n",
" self.reset()\n",
"\n",
" def step(self, action):\n",
" done = False\n",
" reward = 0\n",
"\n",
" p, square = action # p - игрок (1 или -1), square - номер клетки\n",
"\n",
" board = self.state['board']\n",
" proposed = board[square] \n",
" om = self.state['on_move'] \n",
" if proposed != 0: # Клетка уже занята\n",
" print(f\"Незаконный ход: Квадрат {square} уже занят.\")\n",
" done = True\n",
" reward = -1 * om \n",
" if p != om: # Не тот игрок на ходу\n",
" print(f\"Незаконный ход: игрок {p} не находится в движении\")\n",
" done = True\n",
" reward = -1 * om\n",
" else:\n",
" board[square] = p\n",
" self.state['on_move'] = -p\n",
"\n",
" for i in range(3):\n",
" # Горизонтали и вертикали\n",
" if (board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p) or \\\n",
" (board[i] == p and board[i + 3] == p and board[i + 6] == p):\n",
" reward = p\n",
" done = True\n",
" break\n",
"\n",
" # Диагонали\n",
" if (board[0] == p and board[4] == p and board[8] == p) or \\\n",
" (board[2] == p and board[4] == p and board[6] == p):\n",
" reward = p\n",
" done = True\n",
" \n",
" return self.state, reward, done, {}\n",
"\n",
" def reset(self):\n",
" self.state = {}\n",
" self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0] \n",
" self.state['on_move'] = 1 \n",
" return self.state, {}\n",
"\n",
" def render(self, close=False):\n",
" if close:\n",
" return\n",
" print(\"on move: \" , self.symbols[self.state['on_move']+1])\n",
" for i in range (9):\n",
" print (self.symbols[self.state['board'][i]+1], end=\" \");\n",
" if ((i % 3) == 2):\n",
" print();\n",
"\n",
" def move_generator(self):\n",
" moves = []\n",
" for i in range(9):\n",
" if self.state['board'][i] == 0:\n",
" p = self.state['on_move']\n",
" m = [p, i]\n",
" moves.append(m)\n",
" return moves"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **Реализация агента**"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"class RandomTicTacToeAgent:\n",
" def __init__(self, symbol):\n",
" self.symbol = symbol # Символ игрока (1 - X, -1 - O\n",
" \n",
" def get_action(self, moves):\n",
" return random.choice(moves) # Выбираем случайный ход из доступных\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **Основной цикл обучения**"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
"X \n",
" \n",
" X O \n",
"on move: X\n",
"X O \n",
" \n",
" X O \n",
"on move: O\n",
"X X O \n",
" \n",
" X O \n",
"on move: X\n",
"X X O \n",
" \n",
"O X O \n",
"on move: O\n",
"X X O \n",
" X \n",
"O X O \n",
"on move: X\n",
"X X O \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"O X O \n",
"Episode 1, Total Reward: 1\n",
"Average Reward: 1.0\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
" O X \n",
"on move: X\n",
" \n",
"O X \n",
" O X \n",
"on move: O\n",
" \n",
"O X \n",
"X O X \n",
"on move: X\n",
" O \n",
"O X \n",
"X O X \n",
"on move: O\n",
" O \n",
"O X X \n",
"X O X \n",
"on move: X\n",
" O O \n",
"O X X \n",
"X O X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Episode 2, Total Reward: 1\n",
"Average Reward: 1.0\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O X \n",
" X \n",
"on move: X\n",
" \n",
"O X \n",
"O X \n",
"on move: O\n",
" X \n",
"O X \n",
"O X \n",
"Episode 3, Total Reward: 1\n",
"Average Reward: 1.0\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
" O X \n",
"on move: X\n",
" X \n",
" \n",
"O O X \n",
"on move: O\n",
" X \n",
"X \n",
"O O X \n",
"on move: X\n",
" X O \n",
"X \n",
"O O X \n",
"on move: O\n",
" X O \n",
"X X \n",
"O O X \n",
"on move: X\n",
" X O \n",
"X X O \n",
"O O X \n",
"on move: O\n",
"X X O \n",
"X X O \n",
"O O X \n",
"Episode 4, Total Reward: 1\n",
"Average Reward: 1.0\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X X \n",
" O \n",
" \n",
"on move: X\n",
" X X \n",
" O O \n",
" \n",
"on move: O\n",
" X X \n",
"X O O \n",
" \n",
"on move: X\n",
" X X \n",
"X O O \n",
" O \n",
"on move: O\n",
" X X \n",
"X O O \n",
" O X \n",
"on move: X\n",
" X X \n",
"X O O \n",
"O O X \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"O O X \n",
"Episode 5, Total Reward: 1\n",
"Average Reward: 1.0\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O O \n",
"X X \n",
" \n",
"on move: O\n",
" O O \n",
"X X \n",
" X \n",
"on move: X\n",
" O O \n",
"X X \n",
" X O \n",
"on move: O\n",
"X O O \n",
"X X \n",
" X O \n",
"on move: X\n",
"X O O \n",
"X X O \n",
" X O \n",
"Episode 6, Total Reward: -1\n",
"Average Reward: 0.6666666666666666\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O O \n",
" X \n",
" X \n",
"on move: O\n",
" O O \n",
" X \n",
" X X \n",
"on move: X\n",
" O O \n",
" O X \n",
" X X \n",
"on move: O\n",
"X O O \n",
" O X \n",
" X X \n",
"on move: X\n",
"X O O \n",
"O O X \n",
" X X \n",
"on move: O\n",
"X O O \n",
"O O X \n",
"X X X \n",
"Episode 7, Total Reward: 1\n",
"Average Reward: 0.7142857142857143\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O X \n",
" \n",
"on move: X\n",
" X \n",
"O O X \n",
" \n",
"on move: O\n",
" X \n",
"O O X \n",
" X \n",
"on move: X\n",
"O X \n",
"O O X \n",
" X \n",
"on move: O\n",
"O X \n",
"O O X \n",
"X X \n",
"on move: X\n",
"O X O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"O X O \n",
"O O X \n",
"X X X \n",
"Episode 8, Total Reward: 1\n",
"Average Reward: 0.75\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X \n",
" O \n",
"on move: O\n",
"X O \n",
"X X \n",
" O \n",
"on move: X\n",
"X O \n",
"X X \n",
" O O \n",
"on move: O\n",
"X O \n",
"X X X \n",
" O O \n",
"Episode 9, Total Reward: 1\n",
"Average Reward: 0.7777777777777778\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" O \n",
" O \n",
"on move: O\n",
"X X X \n",
" O \n",
" O \n",
"Episode 10, Total Reward: 1\n",
"Average Reward: 0.8\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
"O X O \n",
" X \n",
"on move: O\n",
"X \n",
"O X O \n",
" X \n",
"on move: X\n",
"X O \n",
"O X O \n",
" X \n",
"on move: O\n",
"X O \n",
"O X O \n",
"X X \n",
"on move: X\n",
"X O O \n",
"O X O \n",
"X X \n",
"on move: O\n",
"X O O \n",
"O X O \n",
"X X X \n",
"Episode 11, Total Reward: 1\n",
"Average Reward: 0.8181818181818182\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" O X \n",
" \n",
"on move: O\n",
"X \n",
" O X \n",
" \n",
"on move: X\n",
"X \n",
" O X \n",
" O \n",
"on move: O\n",
"X \n",
" O X \n",
" O X \n",
"on move: X\n",
"X \n",
" O X \n",
"O O X \n",
"on move: O\n",
"X \n",
"X O X \n",
"O O X \n",
"on move: X\n",
"X O \n",
"X O X \n",
"O O X \n",
"Episode 12, Total Reward: -1\n",
"Average Reward: 0.6666666666666666\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O X \n",
" \n",
"X \n",
"on move: X\n",
"O X \n",
" \n",
"X O \n",
"on move: O\n",
"O X \n",
" X \n",
"X O \n",
"on move: X\n",
"O O X \n",
" X \n",
"X O \n",
"on move: O\n",
"O O X \n",
" X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
"O X \n",
"X X O \n",
"on move: O\n",
"O O X \n",
"O X X \n",
"X X O \n",
"Episode 13, Total Reward: 1\n",
"Average Reward: 0.6923076923076923\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X O X \n",
" X \n",
"O O \n",
"on move: O\n",
"X O X \n",
" X \n",
"O O X \n",
"Episode 14, Total Reward: 1\n",
"Average Reward: 0.7142857142857143\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X \n",
" \n",
"O X O \n",
"on move: X\n",
"X X \n",
" O \n",
"O X O \n",
"on move: O\n",
"X X X \n",
" O \n",
"O X O \n",
"Episode 15, Total Reward: 1\n",
"Average Reward: 0.7333333333333333\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
"O O \n",
" \n",
"on move: O\n",
"X X \n",
"O O \n",
" X \n",
"on move: X\n",
"X O X \n",
"O O \n",
" X \n",
"on move: O\n",
"X O X \n",
"O O X \n",
" X \n",
"on move: X\n",
"X O X \n",
"O O X \n",
"O X \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"O X X \n",
"Episode 16, Total Reward: 1\n",
"Average Reward: 0.75\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
"O \n",
" \n",
"on move: O\n",
"X \n",
"O \n",
" X \n",
"on move: X\n",
"X \n",
"O \n",
" O X \n",
"on move: O\n",
"X X \n",
"O \n",
" O X \n",
"on move: X\n",
"X X \n",
"O O \n",
" O X \n",
"on move: O\n",
"X X X \n",
"O O \n",
" O X \n",
"Episode 17, Total Reward: 1\n",
"Average Reward: 0.7647058823529411\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X X \n",
" \n",
" \n",
"on move: X\n",
"O X X \n",
"O \n",
" \n",
"on move: O\n",
"O X X \n",
"O \n",
" X \n",
"on move: X\n",
"O X X \n",
"O O \n",
" X \n",
"on move: O\n",
"O X X \n",
"O O \n",
" X X \n",
"on move: X\n",
"O X X \n",
"O O O \n",
" X X \n",
"Episode 18, Total Reward: -1\n",
"Average Reward: 0.6666666666666666\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" \n",
"X O \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O \n",
"X O \n",
"X X \n",
"on move: X\n",
" O \n",
"X O \n",
"X O X \n",
"on move: O\n",
" X O \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 19, Total Reward: 0\n",
"Average Reward: 0.631578947368421\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"O O \n",
"X X O \n",
"X X \n",
"on move: X\n",
"O O O \n",
"X X O \n",
"X X \n",
"Episode 20, Total Reward: -1\n",
"Average Reward: 0.55\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
"X \n",
" O \n",
"X \n",
"on move: X\n",
"X \n",
" O O \n",
"X \n",
"on move: O\n",
"X \n",
" O O \n",
"X X \n",
"on move: X\n",
"X \n",
" O O \n",
"X X O \n",
"on move: O\n",
"X X \n",
" O O \n",
"X X O \n",
"on move: X\n",
"X X \n",
"O O O \n",
"X X O \n",
"Episode 21, Total Reward: -1\n",
"Average Reward: 0.47619047619047616\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
"O O \n",
"X X \n",
" \n",
"on move: O\n",
"O X O \n",
"X X \n",
" \n",
"on move: X\n",
"O X O \n",
"X X \n",
" O \n",
"on move: O\n",
"O X O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O X O \n",
"X O X \n",
"X O \n",
"Episode 22, Total Reward: -1\n",
"Average Reward: 0.4090909090909091\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X O \n",
"X O \n",
" \n",
"on move: O\n",
"X O X \n",
"X O \n",
" \n",
"on move: X\n",
"X O X \n",
"X O \n",
" O \n",
"on move: O\n",
"X O X \n",
"X O \n",
"X O \n",
"Episode 23, Total Reward: 1\n",
"Average Reward: 0.43478260869565216\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
"X X \n",
"O \n",
"on move: X\n",
" \n",
"X O X \n",
"O \n",
"on move: O\n",
" X \n",
"X O X \n",
"O \n",
"on move: X\n",
"O X \n",
"X O X \n",
"O \n",
"on move: O\n",
"O X X \n",
"X O X \n",
"O \n",
"on move: X\n",
"O X X \n",
"X O X \n",
"O O \n",
"Episode 24, Total Reward: -1\n",
"Average Reward: 0.375\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
" O X \n",
"on move: O\n",
"X O \n",
" X \n",
" O X \n",
"Episode 25, Total Reward: 1\n",
"Average Reward: 0.4\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
"O X \n",
" \n",
"on move: O\n",
"O X \n",
"O X \n",
"X \n",
"on move: X\n",
"O X \n",
"O X \n",
"X O \n",
"on move: O\n",
"O X X \n",
"O X \n",
"X O \n",
"on move: X\n",
"O X X \n",
"O O X \n",
"X O \n",
"Episode 26, Total Reward: -1\n",
"Average Reward: 0.34615384615384615\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O \n",
" X \n",
"X O X \n",
"on move: O\n",
"O O \n",
" X X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"O X X \n",
"X O X \n",
"Episode 27, Total Reward: 1\n",
"Average Reward: 0.37037037037037035\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" \n",
"X O X \n",
" \n",
"on move: X\n",
" \n",
"X O X \n",
" O \n",
"on move: O\n",
" X \n",
"X O X \n",
" O \n",
"on move: X\n",
"O X \n",
"X O X \n",
" O \n",
"on move: O\n",
"O X \n",
"X O X \n",
" O X \n",
"on move: X\n",
"O X O \n",
"X O X \n",
" O X \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 28, Total Reward: 0\n",
"Average Reward: 0.35714285714285715\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O O \n",
" X \n",
" X \n",
"on move: O\n",
" O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O O \n",
" X \n",
"X X \n",
"Episode 29, Total Reward: -1\n",
"Average Reward: 0.3103448275862069\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
"X O \n",
"X \n",
" \n",
"on move: X\n",
"X O O \n",
"X \n",
" \n",
"on move: O\n",
"X O O \n",
"X \n",
"X \n",
"Episode 30, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
" O X \n",
"on move: O\n",
" X O \n",
" X \n",
" O X \n",
"on move: X\n",
"O X O \n",
" X \n",
" O X \n",
"on move: O\n",
"O X O \n",
"X X \n",
" O X \n",
"on move: X\n",
"O X O \n",
"X X \n",
"O O X \n",
"on move: O\n",
"O X O \n",
"X X X \n",
"O O X \n",
"Episode 31, Total Reward: 1\n",
"Average Reward: 0.3548387096774194\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
"O \n",
"X O \n",
"on move: O\n",
" X \n",
"O X \n",
"X O \n",
"on move: X\n",
"O X \n",
"O X \n",
"X O \n",
"on move: O\n",
"O X X \n",
"O X \n",
"X O \n",
"on move: X\n",
"O X X \n",
"O O X \n",
"X O \n",
"Episode 32, Total Reward: -1\n",
"Average Reward: 0.3125\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O O \n",
" \n",
"on move: O\n",
"X X \n",
"X O O \n",
" \n",
"on move: X\n",
"X X O \n",
"X O O \n",
" \n",
"on move: O\n",
"X X O \n",
"X O O \n",
" X \n",
"on move: X\n",
"X X O \n",
"X O O \n",
"O X \n",
"Episode 33, Total Reward: -1\n",
"Average Reward: 0.2727272727272727\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
"X \n",
"X \n",
"on move: X\n",
" O \n",
"X O \n",
"X \n",
"on move: O\n",
" O X \n",
"X O \n",
"X \n",
"on move: X\n",
" O X \n",
"X O \n",
"X O \n",
"Episode 34, Total Reward: -1\n",
"Average Reward: 0.23529411764705882\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
" O \n",
"O O X \n",
"X X \n",
"on move: O\n",
" O X \n",
"O O X \n",
"X X \n",
"Episode 35, Total Reward: 1\n",
"Average Reward: 0.2571428571428571\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
"X \n",
" O \n",
"on move: X\n",
"X \n",
"X O \n",
" O \n",
"on move: O\n",
"X \n",
"X O \n",
"X O \n",
"Episode 36, Total Reward: 1\n",
"Average Reward: 0.2777777777777778\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
"O \n",
" X O \n",
" X \n",
"on move: O\n",
"O \n",
"X X O \n",
" X \n",
"on move: X\n",
"O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
" X \n",
"Episode 37, Total Reward: 1\n",
"Average Reward: 0.2972972972972973\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X \n",
"O \n",
"X \n",
"on move: X\n",
" X \n",
"O O \n",
"X \n",
"on move: O\n",
" X \n",
"O O \n",
"X X \n",
"on move: X\n",
"O X \n",
"O O \n",
"X X \n",
"on move: O\n",
"O X \n",
"O X O \n",
"X X \n",
"Episode 38, Total Reward: 1\n",
"Average Reward: 0.3157894736842105\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
"X \n",
" \n",
"O X \n",
"on move: X\n",
"X O \n",
" \n",
"O X \n",
"on move: O\n",
"X X O \n",
" \n",
"O X \n",
"on move: X\n",
"X X O \n",
" O \n",
"O X \n",
"on move: O\n",
"X X O \n",
"X O \n",
"O X \n",
"on move: X\n",
"X X O \n",
"X O O \n",
"O X \n",
"Episode 39, Total Reward: -1\n",
"Average Reward: 0.28205128205128205\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
"O X \n",
"on move: O\n",
"X O \n",
" X \n",
"O X \n",
"on move: X\n",
"X O O \n",
" X \n",
"O X \n",
"on move: O\n",
"X O O \n",
" X X \n",
"O X \n",
"on move: X\n",
"X O O \n",
"O X X \n",
"O X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"O X X \n",
"Episode 40, Total Reward: 1\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O \n",
"X X O \n",
" X \n",
"on move: X\n",
" O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"X O O \n",
"X X O \n",
" X \n",
"on move: X\n",
"X O O \n",
"X X O \n",
"O X \n",
"on move: O\n",
"X O O \n",
"X X O \n",
"O X X \n",
"Episode 41, Total Reward: 1\n",
"Average Reward: 0.3170731707317073\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
"X O \n",
" \n",
"X \n",
"on move: X\n",
"X O \n",
"O \n",
"X \n",
"on move: O\n",
"X O \n",
"O \n",
"X X \n",
"on move: X\n",
"X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X X O \n",
"O O \n",
"X X \n",
"on move: X\n",
"X X O \n",
"O O O \n",
"X X \n",
"Episode 42, Total Reward: -1\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X \n",
"X \n",
"on move: X\n",
" O \n",
"O X \n",
"X \n",
"on move: O\n",
" X O \n",
"O X \n",
"X \n",
"on move: X\n",
"O X O \n",
"O X \n",
"X \n",
"on move: O\n",
"O X O \n",
"O X \n",
"X X \n",
"on move: X\n",
"O X O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"O X O \n",
"O O X \n",
"X X X \n",
"Episode 43, Total Reward: 1\n",
"Average Reward: 0.3023255813953488\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
" O X \n",
"on move: O\n",
" X \n",
" X O \n",
" O X \n",
"on move: X\n",
" X \n",
"O X O \n",
" O X \n",
"on move: O\n",
"X X \n",
"O X O \n",
" O X \n",
"Episode 44, Total Reward: 1\n",
"Average Reward: 0.3181818181818182\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X \n",
" X O \n",
"on move: O\n",
"O X O \n",
"X X \n",
" X O \n",
"on move: X\n",
"O X O \n",
"X O X \n",
" X O \n",
"Episode 45, Total Reward: -1\n",
"Average Reward: 0.28888888888888886\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X O \n",
"O \n",
"on move: O\n",
" X X \n",
"X O \n",
"O \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O \n",
"on move: O\n",
"O X X \n",
"X O \n",
"O X \n",
"on move: X\n",
"O X X \n",
"X O O \n",
"O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 46, Total Reward: 0\n",
"Average Reward: 0.2826086956521739\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
"X O \n",
" \n",
"X \n",
"on move: X\n",
"X O \n",
" O \n",
"X \n",
"on move: O\n",
"X O \n",
"X O \n",
"X \n",
"Episode 47, Total Reward: 1\n",
"Average Reward: 0.2978723404255319\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
" O X \n",
"on move: O\n",
" X \n",
" O \n",
"X O X \n",
"on move: X\n",
"O X \n",
" O \n",
"X O X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X O X \n",
"Episode 48, Total Reward: 1\n",
"Average Reward: 0.3125\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
" O \n",
"O X \n",
"on move: O\n",
"X \n",
" X O \n",
"O X \n",
"Episode 49, Total Reward: 1\n",
"Average Reward: 0.32653061224489793\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
" X X \n",
"O \n",
"on move: X\n",
" \n",
" X X \n",
"O O \n",
"on move: O\n",
" X \n",
" X X \n",
"O O \n",
"on move: X\n",
" X \n",
"O X X \n",
"O O \n",
"on move: O\n",
" X X \n",
"O X X \n",
"O O \n",
"on move: X\n",
" X X \n",
"O X X \n",
"O O O \n",
"Episode 50, Total Reward: -1\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O O \n",
" \n",
"on move: O\n",
" X \n",
"X O O \n",
" X \n",
"on move: X\n",
" X \n",
"X O O \n",
"O X \n",
"on move: O\n",
" X X \n",
"X O O \n",
"O X \n",
"on move: X\n",
" X X \n",
"X O O \n",
"O O X \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"O O X \n",
"Episode 51, Total Reward: 1\n",
"Average Reward: 0.3137254901960784\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
" X \n",
" O X \n",
"X O \n",
"on move: X\n",
" X O \n",
" O X \n",
"X O \n",
"on move: O\n",
" X O \n",
" O X \n",
"X X O \n",
"on move: X\n",
"O X O \n",
" O X \n",
"X X O \n",
"Episode 52, Total Reward: -1\n",
"Average Reward: 0.28846153846153844\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" O \n",
"X \n",
"on move: X\n",
"X \n",
"O O \n",
"X \n",
"on move: O\n",
"X \n",
"O O \n",
"X X \n",
"on move: X\n",
"X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X O \n",
"O O X \n",
"X X \n",
"on move: X\n",
"X O O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"X O O \n",
"O O X \n",
"X X X \n",
"Episode 53, Total Reward: 1\n",
"Average Reward: 0.3018867924528302\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X O \n",
" \n",
" \n",
"on move: O\n",
" X O \n",
" X \n",
" \n",
"on move: X\n",
" X O \n",
" X \n",
"O \n",
"on move: O\n",
" X O \n",
" X \n",
"O X \n",
"Episode 54, Total Reward: 1\n",
"Average Reward: 0.3148148148148148\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
"O O \n",
" X X \n",
" \n",
"on move: O\n",
"O O \n",
"X X X \n",
" \n",
"Episode 55, Total Reward: 1\n",
"Average Reward: 0.32727272727272727\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X \n",
"X \n",
"on move: X\n",
"O O \n",
"X \n",
"X \n",
"on move: O\n",
"O O \n",
"X X \n",
"X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X \n",
"on move: X\n",
"O X O \n",
"X O X \n",
"X O \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 56, Total Reward: 0\n",
"Average Reward: 0.32142857142857145\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
" X X \n",
"on move: X\n",
" \n",
" O \n",
"O X X \n",
"on move: O\n",
" \n",
"X O \n",
"O X X \n",
"on move: X\n",
" \n",
"X O O \n",
"O X X \n",
"on move: O\n",
" X \n",
"X O O \n",
"O X X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"O X X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 57, Total Reward: 0\n",
"Average Reward: 0.3157894736842105\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" \n",
" O \n",
"X O X \n",
"on move: O\n",
" X \n",
" O \n",
"X O X \n",
"on move: X\n",
" X \n",
"O O \n",
"X O X \n",
"on move: O\n",
" X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
" X X \n",
"O O O \n",
"X O X \n",
"Episode 58, Total Reward: -1\n",
"Average Reward: 0.29310344827586204\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X X \n",
" \n",
"O \n",
"on move: X\n",
"O X X \n",
" \n",
"O \n",
"on move: O\n",
"O X X \n",
" X \n",
"O \n",
"on move: X\n",
"O X X \n",
" X \n",
"O O \n",
"on move: O\n",
"O X X \n",
" X \n",
"O O X \n",
"on move: X\n",
"O X X \n",
"O X \n",
"O O X \n",
"Episode 59, Total Reward: -1\n",
"Average Reward: 0.2711864406779661\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" O X \n",
" O \n",
" X \n",
"on move: O\n",
" O X \n",
" O \n",
" X X \n",
"on move: X\n",
" O X \n",
"O O \n",
" X X \n",
"on move: O\n",
" O X \n",
"O X O \n",
" X X \n",
"on move: X\n",
" O X \n",
"O X O \n",
"O X X \n",
"on move: O\n",
"X O X \n",
"O X O \n",
"O X X \n",
"Episode 60, Total Reward: 1\n",
"Average Reward: 0.2833333333333333\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" \n",
"X \n",
"O X \n",
"on move: X\n",
"O \n",
"X \n",
"O X \n",
"on move: O\n",
"O \n",
"X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"X \n",
"O X X \n",
"on move: O\n",
"O X O \n",
"X \n",
"O X X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"O X X \n",
"Episode 61, Total Reward: -1\n",
"Average Reward: 0.26229508196721313\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X \n",
"O X \n",
"on move: O\n",
" O \n",
"X \n",
"O X X \n",
"on move: X\n",
" O \n",
"X O \n",
"O X X \n",
"on move: O\n",
"X O \n",
"X O \n",
"O X X \n",
"on move: X\n",
"X O \n",
"X O O \n",
"O X X \n",
"on move: O\n",
"X O X \n",
"X O O \n",
"O X X \n",
"Episode 62, Total Reward: 0\n",
"Average Reward: 0.25806451612903225\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
" X \n",
"O X \n",
"on move: X\n",
"O \n",
" X \n",
"O X \n",
"on move: O\n",
"O X \n",
" X \n",
"O X \n",
"Episode 63, Total Reward: 1\n",
"Average Reward: 0.2698412698412698\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
" O \n",
" X \n",
"on move: O\n",
"X O \n",
" X O \n",
" X \n",
"Episode 64, Total Reward: 1\n",
"Average Reward: 0.28125\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X \n",
" X \n",
"on move: X\n",
" \n",
"O O X \n",
" X \n",
"on move: O\n",
" \n",
"O O X \n",
"X X \n",
"on move: X\n",
"O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"O X \n",
"O O X \n",
"X X \n",
"on move: X\n",
"O X O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"O X O \n",
"O O X \n",
"X X X \n",
"Episode 65, Total Reward: 1\n",
"Average Reward: 0.2923076923076923\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" X \n",
" \n",
"on move: X\n",
"X O \n",
" X \n",
" O \n",
"on move: O\n",
"X O \n",
" X \n",
"X O \n",
"on move: X\n",
"X O O \n",
" X \n",
"X O \n",
"on move: O\n",
"X O O \n",
" X \n",
"X X O \n",
"on move: X\n",
"X O O \n",
"O X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X X O \n",
"Episode 66, Total Reward: 0\n",
"Average Reward: 0.2878787878787879\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
" \n",
"O X \n",
"on move: O\n",
"X O X \n",
" \n",
"O X \n",
"on move: X\n",
"X O X \n",
" \n",
"O O X \n",
"on move: O\n",
"X O X \n",
" X \n",
"O O X \n",
"Episode 67, Total Reward: 1\n",
"Average Reward: 0.29850746268656714\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
"O \n",
" \n",
"on move: O\n",
"X \n",
"O \n",
" X \n",
"on move: X\n",
"X \n",
"O O \n",
" X \n",
"on move: O\n",
"X \n",
"O O \n",
"X X \n",
"on move: X\n",
"X \n",
"O O O \n",
"X X \n",
"Episode 68, Total Reward: -1\n",
"Average Reward: 0.27941176470588236\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" \n",
" \n",
"O X X \n",
"on move: X\n",
" O \n",
" \n",
"O X X \n",
"on move: O\n",
" O \n",
" X \n",
"O X X \n",
"on move: X\n",
" O O \n",
" X \n",
"O X X \n",
"on move: O\n",
" O O \n",
" X X \n",
"O X X \n",
"on move: X\n",
"O O O \n",
" X X \n",
"O X X \n",
"Episode 69, Total Reward: -1\n",
"Average Reward: 0.2608695652173913\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
"X \n",
"on move: X\n",
"X O O \n",
" \n",
"X \n",
"on move: O\n",
"X O O \n",
" \n",
"X X \n",
"on move: X\n",
"X O O \n",
" O \n",
"X X \n",
"on move: O\n",
"X O O \n",
"X O \n",
"X X \n",
"Episode 70, Total Reward: 1\n",
"Average Reward: 0.2714285714285714\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" X O \n",
" \n",
"on move: X\n",
" X \n",
" X O \n",
" O \n",
"on move: O\n",
" X \n",
" X O \n",
"X O \n",
"on move: X\n",
" X \n",
" X O \n",
"X O O \n",
"on move: O\n",
" X X \n",
" X O \n",
"X O O \n",
"Episode 71, Total Reward: 1\n",
"Average Reward: 0.28169014084507044\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
" O \n",
" X \n",
"on move: O\n",
"X X O \n",
" O \n",
" X \n",
"on move: X\n",
"X X O \n",
"O O \n",
" X \n",
"on move: O\n",
"X X O \n",
"O O \n",
" X X \n",
"on move: X\n",
"X X O \n",
"O O O \n",
" X X \n",
"Episode 72, Total Reward: -1\n",
"Average Reward: 0.2638888888888889\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" O X \n",
"on move: O\n",
" X \n",
" \n",
" O X \n",
"on move: X\n",
" X \n",
" \n",
"O O X \n",
"on move: O\n",
" X \n",
" X \n",
"O O X \n",
"on move: X\n",
" X O \n",
" X \n",
"O O X \n",
"on move: O\n",
"X X O \n",
" X \n",
"O O X \n",
"on move: X\n",
"X X O \n",
"O X \n",
"O O X \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"O O X \n",
"Episode 73, Total Reward: 1\n",
"Average Reward: 0.273972602739726\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
"X X \n",
"O \n",
"on move: X\n",
" \n",
"X X O \n",
"O \n",
"on move: O\n",
" \n",
"X X O \n",
"O X \n",
"on move: X\n",
" O \n",
"X X O \n",
"O X \n",
"on move: O\n",
" O \n",
"X X O \n",
"O X X \n",
"on move: X\n",
" O O \n",
"X X O \n",
"O X X \n",
"on move: O\n",
"X O O \n",
"X X O \n",
"O X X \n",
"Episode 74, Total Reward: 1\n",
"Average Reward: 0.28378378378378377\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
" X \n",
"X \n",
" O O \n",
"on move: O\n",
" X \n",
"X X \n",
" O O \n",
"on move: X\n",
"O X \n",
"X X \n",
" O O \n",
"on move: O\n",
"O X \n",
"X X X \n",
" O O \n",
"Episode 75, Total Reward: 1\n",
"Average Reward: 0.29333333333333333\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" O X \n",
" X \n",
" O \n",
"on move: O\n",
" O X \n",
" X X \n",
" O \n",
"on move: X\n",
" O X \n",
" X X \n",
"O O \n",
"on move: O\n",
" O X \n",
"X X X \n",
"O O \n",
"Episode 76, Total Reward: 1\n",
"Average Reward: 0.3026315789473684\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" \n",
"X \n",
"O X \n",
"on move: X\n",
"O \n",
"X \n",
"O X \n",
"on move: O\n",
"O X \n",
"X \n",
"O X \n",
"on move: X\n",
"O X O \n",
"X \n",
"O X \n",
"on move: O\n",
"O X O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O X O \n",
"X O X \n",
"O X \n",
"Episode 77, Total Reward: -1\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
" X \n",
"X \n",
"O O \n",
"on move: O\n",
" X \n",
"X X \n",
"O O \n",
"on move: X\n",
" X \n",
"X X O \n",
"O O \n",
"on move: O\n",
" X \n",
"X X O \n",
"O X O \n",
"Episode 78, Total Reward: 1\n",
"Average Reward: 0.2948717948717949\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
"X X \n",
"O \n",
"on move: X\n",
" \n",
"X X \n",
"O O \n",
"on move: O\n",
" \n",
"X X X \n",
"O O \n",
"Episode 79, Total Reward: 1\n",
"Average Reward: 0.3037974683544304\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
" X \n",
" X O \n",
"O X \n",
"on move: X\n",
" X \n",
"O X O \n",
"O X \n",
"on move: O\n",
" X X \n",
"O X O \n",
"O X \n",
"Episode 80, Total Reward: 1\n",
"Average Reward: 0.3125\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
"O O \n",
" \n",
"X X \n",
"on move: O\n",
"O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O O \n",
" X \n",
"X X \n",
"Episode 81, Total Reward: -1\n",
"Average Reward: 0.2962962962962963\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" X O \n",
"O \n",
"X X \n",
"on move: X\n",
" X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X X O \n",
"O O \n",
"X X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"O O X \n",
"X O X \n",
"Episode 82, Total Reward: 0\n",
"Average Reward: 0.2926829268292683\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O O \n",
"X X \n",
" \n",
"on move: O\n",
" O O \n",
"X X \n",
" X \n",
"on move: X\n",
" O O \n",
"X X \n",
" X O \n",
"on move: O\n",
" O O \n",
"X X X \n",
" X O \n",
"Episode 83, Total Reward: 1\n",
"Average Reward: 0.30120481927710846\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
" X \n",
"X O O \n",
"on move: O\n",
" \n",
"X X \n",
"X O O \n",
"on move: X\n",
"O \n",
"X X \n",
"X O O \n",
"on move: O\n",
"O X \n",
"X X \n",
"X O O \n",
"on move: X\n",
"O X O \n",
"X X \n",
"X O O \n",
"on move: O\n",
"O X O \n",
"X X X \n",
"X O O \n",
"Episode 84, Total Reward: 1\n",
"Average Reward: 0.30952380952380953\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X O \n",
" X \n",
"on move: O\n",
" O \n",
"X X O \n",
" X \n",
"on move: X\n",
" O \n",
"X X O \n",
" O X \n",
"on move: O\n",
" O \n",
"X X O \n",
"X O X \n",
"on move: X\n",
"O O \n",
"X X O \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X O X \n",
"Episode 85, Total Reward: 1\n",
"Average Reward: 0.3176470588235294\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
" O X \n",
"on move: X\n",
"O \n",
"X \n",
" O X \n",
"on move: O\n",
"O \n",
"X \n",
"X O X \n",
"on move: X\n",
"O \n",
"X O \n",
"X O X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"X O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"X O X \n",
"Episode 86, Total Reward: 0\n",
"Average Reward: 0.313953488372093\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O O \n",
" X \n",
"on move: O\n",
" X \n",
" O O \n",
"X X \n",
"on move: X\n",
"O X \n",
" O O \n",
"X X \n",
"on move: O\n",
"O X \n",
"X O O \n",
"X X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"X O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"X O X \n",
"Episode 87, Total Reward: 0\n",
"Average Reward: 0.3103448275862069\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
"O X \n",
" O \n",
" X \n",
"on move: O\n",
"O X X \n",
" O \n",
" X \n",
"on move: X\n",
"O X X \n",
" O \n",
"O X \n",
"on move: O\n",
"O X X \n",
"X O \n",
"O X \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O X O \n",
"Episode 88, Total Reward: -1\n",
"Average Reward: 0.29545454545454547\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
"X \n",
" \n",
"on move: X\n",
"X O \n",
"X \n",
" O \n",
"on move: O\n",
"X O X \n",
"X \n",
" O \n",
"on move: X\n",
"X O X \n",
"X O \n",
" O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
" O \n",
"on move: X\n",
"X O X \n",
"X X O \n",
" O O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"X O O \n",
"Episode 89, Total Reward: 1\n",
"Average Reward: 0.30337078651685395\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
"O O \n",
" X \n",
"on move: O\n",
"X \n",
"O O \n",
"X X \n",
"on move: X\n",
"X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X O X \n",
"O O \n",
"X X \n",
"on move: X\n",
"X O X \n",
"O O O \n",
"X X \n",
"Episode 90, Total Reward: -1\n",
"Average Reward: 0.28888888888888886\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X O \n",
" O \n",
" X \n",
"on move: O\n",
"X O \n",
"X O \n",
" X \n",
"on move: X\n",
"X O \n",
"X O \n",
" X O \n",
"on move: O\n",
"X O \n",
"X X O \n",
" X O \n",
"on move: X\n",
"X O O \n",
"X X O \n",
" X O \n",
"Episode 91, Total Reward: -1\n",
"Average Reward: 0.27472527472527475\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O X \n",
" X \n",
"on move: X\n",
" \n",
"O X O \n",
" X \n",
"on move: O\n",
" \n",
"O X O \n",
"X X \n",
"on move: X\n",
" O \n",
"O X O \n",
"X X \n",
"on move: O\n",
" O \n",
"O X O \n",
"X X X \n",
"Episode 92, Total Reward: 1\n",
"Average Reward: 0.2826086956521739\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X \n",
"O \n",
"X \n",
"on move: X\n",
" O X \n",
"O \n",
"X \n",
"on move: O\n",
" O X \n",
"O X \n",
"X \n",
"Episode 93, Total Reward: 1\n",
"Average Reward: 0.2903225806451613\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
"O X \n",
" X \n",
"O \n",
"on move: O\n",
"O X \n",
"X X \n",
"O \n",
"on move: X\n",
"O X \n",
"X X \n",
"O O \n",
"on move: O\n",
"O X X \n",
"X X \n",
"O O \n",
"on move: X\n",
"O X X \n",
"X X \n",
"O O O \n",
"Episode 94, Total Reward: -1\n",
"Average Reward: 0.2765957446808511\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" \n",
"X O \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O X \n",
"X O \n",
" X \n",
"on move: X\n",
" O X \n",
"X O O \n",
" X \n",
"on move: O\n",
" O X \n",
"X O O \n",
" X X \n",
"on move: X\n",
" O X \n",
"X O O \n",
"O X X \n",
"on move: O\n",
"X O X \n",
"X O O \n",
"O X X \n",
"Episode 95, Total Reward: 0\n",
"Average Reward: 0.2736842105263158\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X \n",
"X X \n",
"O O \n",
"on move: X\n",
" X O \n",
"X X \n",
"O O \n",
"on move: O\n",
" X O \n",
"X X X \n",
"O O \n",
"Episode 96, Total Reward: 1\n",
"Average Reward: 0.28125\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X O \n",
"X O \n",
" \n",
"on move: O\n",
"X O \n",
"X O \n",
"X \n",
"Episode 97, Total Reward: 1\n",
"Average Reward: 0.28865979381443296\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
" X \n",
" O X \n",
"on move: O\n",
"O X \n",
" X \n",
" O X \n",
"on move: X\n",
"O X \n",
" X \n",
"O O X \n",
"on move: O\n",
"O X \n",
"X X \n",
"O O X \n",
"on move: X\n",
"O X \n",
"X O X \n",
"O O X \n",
"on move: O\n",
"O X X \n",
"X O X \n",
"O O X \n",
"Episode 98, Total Reward: 1\n",
"Average Reward: 0.29591836734693877\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
" X \n",
"O X \n",
"on move: X\n",
"O \n",
" X \n",
"O X \n",
"on move: O\n",
"O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O X O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O X O \n",
"X X O \n",
"O X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"O X X \n",
"Episode 99, Total Reward: 1\n",
"Average Reward: 0.30303030303030304\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
"O X \n",
"X O \n",
"on move: O\n",
" \n",
"O X \n",
"X X O \n",
"on move: X\n",
" \n",
"O O X \n",
"X X O \n",
"on move: O\n",
" X \n",
"O O X \n",
"X X O \n",
"on move: X\n",
" O X \n",
"O O X \n",
"X X O \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X X O \n",
"Episode 100, Total Reward: 0\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" \n",
" X \n",
"O X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
" \n",
" X O \n",
"O X X \n",
"on move: X\n",
" \n",
"O X O \n",
"O X X \n",
"on move: O\n",
"X \n",
"O X O \n",
"O X X \n",
"Episode 101, Total Reward: 1\n",
"Average Reward: 0.3069306930693069\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" O \n",
"O \n",
"X X X \n",
"Episode 102, Total Reward: 1\n",
"Average Reward: 0.3137254901960784\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
" \n",
"X X \n",
"on move: X\n",
"O O \n",
" \n",
"X X \n",
"on move: O\n",
"O O \n",
"X \n",
"X X \n",
"on move: X\n",
"O O \n",
"X O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"X O \n",
"X X \n",
"on move: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Episode 103, Total Reward: 1\n",
"Average Reward: 0.32038834951456313\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
"X O \n",
"O \n",
"X X \n",
"on move: X\n",
"X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X X O \n",
"O O \n",
"X X \n",
"on move: X\n",
"X X O \n",
"O O O \n",
"X X \n",
"Episode 104, Total Reward: -1\n",
"Average Reward: 0.3076923076923077\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X \n",
"O O \n",
" \n",
"on move: O\n",
"X X X \n",
"O O \n",
" \n",
"Episode 105, Total Reward: 1\n",
"Average Reward: 0.3142857142857143\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
"X \n",
"O X \n",
" \n",
"on move: X\n",
"X O \n",
"O X \n",
" \n",
"on move: O\n",
"X O \n",
"O X \n",
"X \n",
"on move: X\n",
"X O O \n",
"O X \n",
"X \n",
"on move: O\n",
"X O O \n",
"O X \n",
"X X \n",
"on move: X\n",
"X O O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"X O O \n",
"O O X \n",
"X X X \n",
"Episode 106, Total Reward: 1\n",
"Average Reward: 0.32075471698113206\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
" O \n",
" X \n",
"on move: O\n",
"X O \n",
" O \n",
" X X \n",
"on move: X\n",
"X O \n",
"O O \n",
" X X \n",
"on move: O\n",
"X O \n",
"O X O \n",
" X X \n",
"Episode 107, Total Reward: 1\n",
"Average Reward: 0.32710280373831774\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
" X O \n",
"on move: X\n",
" \n",
"X \n",
"O X O \n",
"on move: O\n",
" \n",
"X X \n",
"O X O \n",
"on move: X\n",
" O \n",
"X X \n",
"O X O \n",
"on move: O\n",
" O \n",
"X X X \n",
"O X O \n",
"Episode 108, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
" O X \n",
" X \n",
" \n",
"on move: X\n",
" O X \n",
" X \n",
" O \n",
"on move: O\n",
"X O X \n",
" X \n",
" O \n",
"on move: X\n",
"X O X \n",
" X \n",
"O O \n",
"on move: O\n",
"X O X \n",
"X X \n",
"O O \n",
"on move: X\n",
"X O X \n",
"X X O \n",
"O O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Episode 109, Total Reward: 0\n",
"Average Reward: 0.3302752293577982\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
"O \n",
" X \n",
"on move: O\n",
"X O \n",
"O X \n",
" X \n",
"Episode 110, Total Reward: 1\n",
"Average Reward: 0.33636363636363636\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
" O X \n",
" O \n",
"X \n",
"on move: O\n",
" O X \n",
" O \n",
"X X \n",
"on move: X\n",
"O O X \n",
" O \n",
"X X \n",
"on move: O\n",
"O O X \n",
" X O \n",
"X X \n",
"Episode 111, Total Reward: 1\n",
"Average Reward: 0.34234234234234234\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
"O X \n",
" X \n",
"on move: O\n",
" O \n",
"O X \n",
" X X \n",
"on move: X\n",
" O \n",
"O O X \n",
" X X \n",
"on move: O\n",
" O \n",
"O O X \n",
"X X X \n",
"Episode 112, Total Reward: 1\n",
"Average Reward: 0.3482142857142857\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
" X X \n",
"O \n",
"on move: X\n",
" \n",
"O X X \n",
"O \n",
"on move: O\n",
" \n",
"O X X \n",
"O X \n",
"on move: X\n",
" O \n",
"O X X \n",
"O X \n",
"on move: O\n",
" O X \n",
"O X X \n",
"O X \n",
"on move: X\n",
" O X \n",
"O X X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O X O \n",
"Episode 113, Total Reward: 0\n",
"Average Reward: 0.34513274336283184\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X \n",
"O O \n",
"on move: O\n",
" X X \n",
"X \n",
"O O \n",
"on move: X\n",
" X X \n",
"X \n",
"O O O \n",
"Episode 114, Total Reward: -1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X X O \n",
" X \n",
"O O \n",
"on move: O\n",
"X X O \n",
"X X \n",
"O O \n",
"on move: X\n",
"X X O \n",
"X O X \n",
"O O \n",
"Episode 115, Total Reward: -1\n",
"Average Reward: 0.3217391304347826\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
" O O \n",
"on move: O\n",
" X \n",
" X X \n",
" O O \n",
"on move: X\n",
" X \n",
" X X \n",
"O O O \n",
"Episode 116, Total Reward: -1\n",
"Average Reward: 0.3103448275862069\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
"O \n",
"X X \n",
" O \n",
"on move: O\n",
"O \n",
"X X \n",
" X O \n",
"on move: X\n",
"O O \n",
"X X \n",
" X O \n",
"on move: O\n",
"O O X \n",
"X X \n",
" X O \n",
"on move: X\n",
"O O X \n",
"X X O \n",
" X O \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X X O \n",
"Episode 117, Total Reward: 1\n",
"Average Reward: 0.3162393162393162\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O \n",
"X \n",
"X \n",
"on move: X\n",
"O \n",
"X \n",
"X O \n",
"on move: O\n",
"O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O X \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O X \n",
"X O X \n",
"X O \n",
"Episode 118, Total Reward: -1\n",
"Average Reward: 0.3050847457627119\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" X \n",
" \n",
"O X \n",
"on move: X\n",
" X \n",
" O \n",
"O X \n",
"on move: O\n",
"X X \n",
" O \n",
"O X \n",
"on move: X\n",
"X X \n",
"O O \n",
"O X \n",
"on move: O\n",
"X X \n",
"O O \n",
"O X X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"O X X \n",
"Episode 119, Total Reward: -1\n",
"Average Reward: 0.29411764705882354\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" O X \n",
" \n",
"X O \n",
"on move: O\n",
"X O X \n",
" \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O \n",
"X O \n",
"on move: O\n",
"X O X \n",
"O X \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O O X \n",
"X O \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X X O \n",
"Episode 120, Total Reward: 0\n",
"Average Reward: 0.2916666666666667\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
" X O \n",
" X \n",
"on move: O\n",
"O X \n",
" X O \n",
" X \n",
"on move: X\n",
"O X \n",
"O X O \n",
" X \n",
"on move: O\n",
"O X \n",
"O X O \n",
" X X \n",
"Episode 121, Total Reward: 1\n",
"Average Reward: 0.2975206611570248\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O \n",
" O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
" X \n",
"X O \n",
"X O O \n",
"on move: O\n",
"X X \n",
"X O \n",
"X O O \n",
"Episode 122, Total Reward: 1\n",
"Average Reward: 0.30327868852459017\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
"X \n",
"X \n",
"on move: X\n",
"O \n",
"X O \n",
"X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X \n",
"on move: O\n",
"O X O \n",
"X O \n",
"X X \n",
"on move: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Episode 123, Total Reward: 1\n",
"Average Reward: 0.3089430894308943\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
" X \n",
" O \n",
"X O X \n",
"on move: X\n",
"O X \n",
" O \n",
"X O X \n",
"on move: O\n",
"O X \n",
" X O \n",
"X O X \n",
"Episode 124, Total Reward: 1\n",
"Average Reward: 0.31451612903225806\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" \n",
"X O \n",
" X \n",
"on move: X\n",
" \n",
"X O \n",
" X O \n",
"on move: O\n",
"X \n",
"X O \n",
" X O \n",
"on move: X\n",
"X \n",
"X O O \n",
" X O \n",
"on move: O\n",
"X \n",
"X O O \n",
"X X O \n",
"Episode 125, Total Reward: 1\n",
"Average Reward: 0.32\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" O \n",
"X \n",
"X O \n",
"on move: O\n",
" O \n",
"X X \n",
"X O \n",
"on move: X\n",
" O \n",
"X X \n",
"X O O \n",
"on move: O\n",
"X O \n",
"X X \n",
"X O O \n",
"Episode 126, Total Reward: 1\n",
"Average Reward: 0.3253968253968254\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
" \n",
" O X \n",
"on move: O\n",
"X O \n",
" X \n",
" O X \n",
"on move: X\n",
"X O \n",
" X \n",
"O O X \n",
"on move: O\n",
"X O X \n",
" X \n",
"O O X \n",
"Episode 127, Total Reward: 1\n",
"Average Reward: 0.33070866141732286\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
" X \n",
"X \n",
"O O \n",
"on move: O\n",
"X X \n",
"X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X X \n",
"X O \n",
"O O \n",
"Episode 128, Total Reward: 1\n",
"Average Reward: 0.3359375\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" X O \n",
" \n",
"on move: X\n",
" X \n",
"O X O \n",
" \n",
"on move: O\n",
" X \n",
"O X O \n",
"X \n",
"on move: X\n",
" X \n",
"O X O \n",
"X O \n",
"on move: O\n",
"X X \n",
"O X O \n",
"X O \n",
"on move: X\n",
"X X \n",
"O X O \n",
"X O O \n",
"on move: O\n",
"X X X \n",
"O X O \n",
"X O O \n",
"Episode 129, Total Reward: 1\n",
"Average Reward: 0.34108527131782945\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
" O O \n",
" X X \n",
" \n",
"on move: O\n",
"X O O \n",
" X X \n",
" \n",
"on move: X\n",
"X O O \n",
" X X \n",
" O \n",
"on move: O\n",
"X O O \n",
"X X X \n",
" O \n",
"Episode 130, Total Reward: 1\n",
"Average Reward: 0.34615384615384615\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O \n",
" O \n",
"on move: O\n",
"X X \n",
"X O \n",
" O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O X \n",
"O O O \n",
"Episode 131, Total Reward: -1\n",
"Average Reward: 0.33587786259541985\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X O \n",
"X O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
" X O \n",
"X O \n",
"X O \n",
"on move: O\n",
"X X O \n",
"X O \n",
"X O \n",
"Episode 132, Total Reward: 1\n",
"Average Reward: 0.3409090909090909\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
" \n",
"X X O \n",
"O X \n",
"on move: X\n",
" \n",
"X X O \n",
"O X O \n",
"on move: O\n",
" X \n",
"X X O \n",
"O X O \n",
"on move: X\n",
" O X \n",
"X X O \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Episode 133, Total Reward: 0\n",
"Average Reward: 0.3383458646616541\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
"X \n",
" X \n",
"on move: X\n",
"O \n",
"X \n",
"O X \n",
"on move: O\n",
"O X \n",
"X \n",
"O X \n",
"on move: X\n",
"O X \n",
"X O \n",
"O X \n",
"on move: O\n",
"O X X \n",
"X O \n",
"O X \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O X O \n",
"Episode 134, Total Reward: -1\n",
"Average Reward: 0.3283582089552239\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
"X \n",
" O \n",
"on move: X\n",
"X \n",
"X O \n",
" O \n",
"on move: O\n",
"X \n",
"X O \n",
" X O \n",
"on move: X\n",
"X \n",
"X O \n",
"O X O \n",
"on move: O\n",
"X \n",
"X X O \n",
"O X O \n",
"on move: X\n",
"X O \n",
"X X O \n",
"O X O \n",
"Episode 135, Total Reward: -1\n",
"Average Reward: 0.31851851851851853\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" X \n",
"O \n",
"on move: O\n",
"O X \n",
"X X \n",
"O \n",
"on move: X\n",
"O X \n",
"X X \n",
"O O \n",
"on move: O\n",
"O X \n",
"X X \n",
"O O X \n",
"on move: X\n",
"O X O \n",
"X X \n",
"O O X \n",
"on move: O\n",
"O X O \n",
"X X X \n",
"O O X \n",
"Episode 136, Total Reward: 1\n",
"Average Reward: 0.3235294117647059\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X X \n",
" \n",
"on move: X\n",
"O O \n",
" X X \n",
" \n",
"on move: O\n",
"O O \n",
" X X \n",
" X \n",
"on move: X\n",
"O O O \n",
" X X \n",
" X \n",
"Episode 137, Total Reward: -1\n",
"Average Reward: 0.31386861313868614\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O X \n",
" X \n",
" \n",
"on move: X\n",
" O X \n",
" X \n",
" O \n",
"on move: O\n",
" O X \n",
" X \n",
"X O \n",
"on move: X\n",
"O O X \n",
" X \n",
"X O \n",
"on move: O\n",
"O O X \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O X \n",
"X X \n",
"X O O \n",
"on move: O\n",
"O O X \n",
"X X X \n",
"X O O \n",
"Episode 138, Total Reward: 1\n",
"Average Reward: 0.3188405797101449\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O O \n",
" \n",
"X X \n",
"on move: O\n",
" O O \n",
"X \n",
"X X \n",
"on move: X\n",
" O O \n",
"X O \n",
"X X \n",
"on move: O\n",
" O O \n",
"X O \n",
"X X X \n",
"Episode 139, Total Reward: 1\n",
"Average Reward: 0.3237410071942446\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
" \n",
"X O \n",
"on move: X\n",
"X \n",
" O \n",
"X O \n",
"on move: O\n",
"X X \n",
" O \n",
"X O \n",
"on move: X\n",
"X O X \n",
" O \n",
"X O \n",
"on move: O\n",
"X O X \n",
"X O \n",
"X O \n",
"Episode 140, Total Reward: 1\n",
"Average Reward: 0.32857142857142857\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X O \n",
" \n",
"O \n",
"on move: O\n",
"X X O \n",
" X \n",
"O \n",
"on move: X\n",
"X X O \n",
" X \n",
"O O \n",
"on move: O\n",
"X X O \n",
" X \n",
"O O X \n",
"Episode 141, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O O \n",
" \n",
"X X \n",
"on move: O\n",
" O O \n",
" \n",
"X X X \n",
"Episode 142, Total Reward: 1\n",
"Average Reward: 0.3380281690140845\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X O \n",
"X O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
" X O \n",
"X O \n",
"X O \n",
"Episode 143, Total Reward: -1\n",
"Average Reward: 0.32867132867132864\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
" O \n",
" O \n",
"X X X \n",
"Episode 144, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" \n",
" X X \n",
"on move: X\n",
"O \n",
" \n",
"O X X \n",
"on move: O\n",
"O \n",
" X \n",
"O X X \n",
"on move: X\n",
"O \n",
"O X \n",
"O X X \n",
"Episode 145, Total Reward: -1\n",
"Average Reward: 0.32413793103448274\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" X \n",
"O \n",
"on move: O\n",
"O X X \n",
" X \n",
"O \n",
"on move: X\n",
"O X X \n",
" X \n",
"O O \n",
"on move: O\n",
"O X X \n",
" X \n",
"O X O \n",
"Episode 146, Total Reward: 1\n",
"Average Reward: 0.3287671232876712\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" X O \n",
" \n",
"on move: X\n",
"X \n",
"O X O \n",
" \n",
"on move: O\n",
"X \n",
"O X O \n",
" X \n",
"on move: X\n",
"X O \n",
"O X O \n",
" X \n",
"on move: O\n",
"X O X \n",
"O X O \n",
" X \n",
"on move: X\n",
"X O X \n",
"O X O \n",
" X O \n",
"on move: O\n",
"X O X \n",
"O X O \n",
"X X O \n",
"Episode 147, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
" O X \n",
"X O \n",
"on move: O\n",
" \n",
"X O X \n",
"X O \n",
"on move: X\n",
" \n",
"X O X \n",
"X O O \n",
"on move: O\n",
" X \n",
"X O X \n",
"X O O \n",
"on move: X\n",
"O X \n",
"X O X \n",
"X O O \n",
"Episode 148, Total Reward: -1\n",
"Average Reward: 0.32432432432432434\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X X \n",
" \n",
"on move: X\n",
"O \n",
"O X X \n",
" \n",
"on move: O\n",
"O \n",
"O X X \n",
"X \n",
"on move: X\n",
"O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O X \n",
"O X X \n",
"X O \n",
"on move: X\n",
"O X O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O X O \n",
"O X X \n",
"X O X \n",
"Episode 149, Total Reward: 0\n",
"Average Reward: 0.3221476510067114\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
" O \n",
"O X \n",
"on move: O\n",
"X X \n",
" O \n",
"O X \n",
"on move: X\n",
"X X \n",
" O O \n",
"O X \n",
"on move: O\n",
"X X \n",
"X O O \n",
"O X \n",
"on move: X\n",
"X O X \n",
"X O O \n",
"O X \n",
"on move: O\n",
"X O X \n",
"X O O \n",
"O X X \n",
"Episode 150, Total Reward: 0\n",
"Average Reward: 0.32\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" X \n",
"O \n",
"on move: O\n",
"O X X \n",
" X \n",
"O \n",
"on move: X\n",
"O X X \n",
"O X \n",
"O \n",
"Episode 151, Total Reward: -1\n",
"Average Reward: 0.31125827814569534\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
"O O \n",
" \n",
"X X \n",
"on move: O\n",
"O O \n",
"X \n",
"X X \n",
"on move: X\n",
"O O \n",
"X O \n",
"X X \n",
"on move: O\n",
"O O X \n",
"X O \n",
"X X \n",
"on move: X\n",
"O O X \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O O X \n",
"X O O \n",
"X X X \n",
"Episode 152, Total Reward: 1\n",
"Average Reward: 0.3157894736842105\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" X \n",
"O X \n",
" \n",
"on move: X\n",
" X O \n",
"O X \n",
" \n",
"on move: O\n",
" X O \n",
"O X X \n",
" \n",
"on move: X\n",
" X O \n",
"O X X \n",
" O \n",
"on move: O\n",
" X O \n",
"O X X \n",
"X O \n",
"on move: X\n",
"O X O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O X O \n",
"O X X \n",
"X X O \n",
"Episode 153, Total Reward: 1\n",
"Average Reward: 0.3202614379084967\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O \n",
" O X \n",
"X X \n",
"on move: O\n",
"O O \n",
"X O X \n",
"X X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X X O \n",
"Episode 154, Total Reward: -1\n",
"Average Reward: 0.3116883116883117\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" X \n",
"O X \n",
" \n",
"on move: X\n",
" X \n",
"O X \n",
" O \n",
"on move: O\n",
" X X \n",
"O X \n",
" O \n",
"on move: X\n",
" X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Episode 155, Total Reward: 1\n",
"Average Reward: 0.3161290322580645\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O O \n",
" \n",
" X \n",
"on move: O\n",
"X O O \n",
" X \n",
" X \n",
"on move: X\n",
"X O O \n",
" X \n",
"O X \n",
"on move: O\n",
"X O O \n",
" X X \n",
"O X \n",
"on move: X\n",
"X O O \n",
" X X \n",
"O X O \n",
"on move: O\n",
"X O O \n",
"X X X \n",
"O X O \n",
"Episode 156, Total Reward: 1\n",
"Average Reward: 0.32051282051282054\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X \n",
" O O \n",
" X \n",
"on move: O\n",
"X \n",
" O O \n",
"X X \n",
"on move: X\n",
"X O \n",
" O O \n",
"X X \n",
"on move: O\n",
"X O \n",
" O O \n",
"X X X \n",
"Episode 157, Total Reward: 1\n",
"Average Reward: 0.3248407643312102\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O X \n",
"X \n",
"on move: X\n",
" O \n",
" O X \n",
"X \n",
"on move: O\n",
" O \n",
" O X \n",
"X X \n",
"on move: X\n",
"O O \n",
" O X \n",
"X X \n",
"on move: O\n",
"O O \n",
" O X \n",
"X X X \n",
"Episode 158, Total Reward: 1\n",
"Average Reward: 0.3291139240506329\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
"X \n",
"on move: X\n",
" O \n",
"O X \n",
"X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
" O \n",
"O X \n",
"X X O \n",
"on move: O\n",
" O \n",
"O X X \n",
"X X O \n",
"on move: X\n",
" O O \n",
"O X X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X X O \n",
"Episode 159, Total Reward: 0\n",
"Average Reward: 0.3270440251572327\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X \n",
"X O O \n",
"on move: O\n",
" X \n",
"X \n",
"X O O \n",
"on move: X\n",
" X \n",
"X O \n",
"X O O \n",
"on move: O\n",
" X X \n",
"X O \n",
"X O O \n",
"on move: X\n",
" X X \n",
"X O O \n",
"X O O \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"X O O \n",
"Episode 160, Total Reward: 1\n",
"Average Reward: 0.33125\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" X O \n",
" \n",
" X \n",
"on move: X\n",
" X O \n",
" \n",
" O X \n",
"on move: O\n",
"X X O \n",
" \n",
" O X \n",
"on move: X\n",
"X X O \n",
"O \n",
" O X \n",
"on move: O\n",
"X X O \n",
"O \n",
"X O X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Episode 161, Total Reward: 1\n",
"Average Reward: 0.33540372670807456\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X \n",
" O \n",
"O \n",
"on move: O\n",
"X X \n",
" X O \n",
"O \n",
"on move: X\n",
"X X \n",
" X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X X O \n",
"O O \n",
"on move: X\n",
"X O X \n",
"X X O \n",
"O O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O O X \n",
"Episode 162, Total Reward: 1\n",
"Average Reward: 0.3395061728395062\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X X \n",
" O \n",
"on move: X\n",
" \n",
"O X X \n",
" O \n",
"on move: O\n",
" \n",
"O X X \n",
"X O \n",
"on move: X\n",
" O \n",
"O X X \n",
"X O \n",
"on move: O\n",
" O \n",
"O X X \n",
"X X O \n",
"on move: X\n",
" O O \n",
"O X X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X X O \n",
"Episode 163, Total Reward: 0\n",
"Average Reward: 0.3374233128834356\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
" O \n",
"O O X \n",
"X X \n",
"on move: O\n",
" O \n",
"O O X \n",
"X X X \n",
"Episode 164, Total Reward: 1\n",
"Average Reward: 0.34146341463414637\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
"O \n",
"X X \n",
" O \n",
"on move: O\n",
"O \n",
"X X \n",
" O X \n",
"on move: X\n",
"O O \n",
"X X \n",
" O X \n",
"on move: O\n",
"O O X \n",
"X X \n",
" O X \n",
"on move: X\n",
"O O X \n",
"X X \n",
"O O X \n",
"on move: O\n",
"O O X \n",
"X X X \n",
"O O X \n",
"Episode 165, Total Reward: 1\n",
"Average Reward: 0.34545454545454546\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O X \n",
" \n",
" X \n",
"on move: X\n",
"O X \n",
" O \n",
" X \n",
"on move: O\n",
"O X X \n",
" O \n",
" X \n",
"on move: X\n",
"O X X \n",
" O O \n",
" X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
" X \n",
"on move: X\n",
"O X X \n",
"X O O \n",
"O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 166, Total Reward: 0\n",
"Average Reward: 0.3433734939759036\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O O \n",
" X \n",
"on move: O\n",
" X \n",
" O O \n",
"X X \n",
"on move: X\n",
" X \n",
"O O O \n",
"X X \n",
"Episode 167, Total Reward: -1\n",
"Average Reward: 0.33532934131736525\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" O X \n",
" X \n",
"on move: O\n",
" O \n",
"X O X \n",
" X \n",
"on move: X\n",
" O O \n",
"X O X \n",
" X \n",
"on move: O\n",
" O O \n",
"X O X \n",
" X X \n",
"on move: X\n",
"O O O \n",
"X O X \n",
" X X \n",
"Episode 168, Total Reward: -1\n",
"Average Reward: 0.3273809523809524\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O \n",
"X X \n",
" \n",
"on move: X\n",
"O \n",
"X X \n",
"O \n",
"on move: O\n",
"O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O O \n",
"X X X \n",
"O X \n",
"Episode 169, Total Reward: 1\n",
"Average Reward: 0.33136094674556216\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
" X O \n",
"on move: X\n",
"O X \n",
" \n",
" X O \n",
"on move: O\n",
"O X \n",
" \n",
"X X O \n",
"on move: X\n",
"O O X \n",
" \n",
"X X O \n",
"on move: O\n",
"O O X \n",
" X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
" O X \n",
"X X O \n",
"Episode 170, Total Reward: -1\n",
"Average Reward: 0.3235294117647059\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" X O \n",
" \n",
" X \n",
"on move: X\n",
" X O \n",
"O \n",
" X \n",
"on move: O\n",
" X O \n",
"O \n",
"X X \n",
"on move: X\n",
" X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X X O \n",
"O O \n",
"X X \n",
"on move: X\n",
"X X O \n",
"O O O \n",
"X X \n",
"Episode 171, Total Reward: -1\n",
"Average Reward: 0.3157894736842105\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
" X \n",
" O \n",
"on move: X\n",
"X \n",
"O X \n",
" O \n",
"on move: O\n",
"X \n",
"O X \n",
" X O \n",
"on move: X\n",
"X O \n",
"O X \n",
" X O \n",
"on move: O\n",
"X O \n",
"O X \n",
"X X O \n",
"on move: X\n",
"X O O \n",
"O X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X X O \n",
"Episode 172, Total Reward: 0\n",
"Average Reward: 0.313953488372093\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" X \n",
" \n",
"on move: X\n",
"X O \n",
" X \n",
"O \n",
"on move: O\n",
"X O \n",
" X \n",
"O X \n",
"on move: X\n",
"X O \n",
"O X \n",
"O X \n",
"on move: O\n",
"X O \n",
"O X \n",
"O X X \n",
"on move: X\n",
"X O O \n",
"O X \n",
"O X X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"O X X \n",
"Episode 173, Total Reward: 1\n",
"Average Reward: 0.3179190751445087\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
"X \n",
"X \n",
"on move: X\n",
" O \n",
"X \n",
"X O \n",
"on move: O\n",
"X O \n",
"X \n",
"X O \n",
"Episode 174, Total Reward: 1\n",
"Average Reward: 0.3218390804597701\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" \n",
" X \n",
"on move: X\n",
"O X \n",
" \n",
" X O \n",
"on move: O\n",
"O X \n",
" \n",
"X X O \n",
"on move: X\n",
"O X \n",
" O \n",
"X X O \n",
"on move: O\n",
"O X \n",
"X O \n",
"X X O \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X X O \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X X O \n",
"Episode 175, Total Reward: 1\n",
"Average Reward: 0.32571428571428573\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
"O O \n",
" X X \n",
" \n",
"on move: O\n",
"O O \n",
"X X X \n",
" \n",
"Episode 176, Total Reward: 1\n",
"Average Reward: 0.32954545454545453\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X O \n",
"O \n",
" \n",
"on move: O\n",
"X X O \n",
"O X \n",
" \n",
"on move: X\n",
"X X O \n",
"O X \n",
"O \n",
"on move: O\n",
"X X O \n",
"O X \n",
"O X \n",
"Episode 177, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
"X \n",
"O \n",
" X \n",
"on move: X\n",
"X O \n",
"O \n",
" X \n",
"on move: O\n",
"X O \n",
"O \n",
"X X \n",
"on move: X\n",
"X O \n",
"O O \n",
"X X \n",
"on move: O\n",
"X X O \n",
"O O \n",
"X X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Episode 178, Total Reward: 1\n",
"Average Reward: 0.33707865168539325\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
" X \n",
" X X \n",
"on move: X\n",
"O O \n",
" X \n",
"O X X \n",
"on move: O\n",
"O O \n",
"X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"O X X \n",
"Episode 179, Total Reward: -1\n",
"Average Reward: 0.329608938547486\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
"X \n",
"O X \n",
" \n",
"on move: X\n",
"X \n",
"O X \n",
"O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O \n",
"on move: X\n",
"X X \n",
"O X O \n",
"O \n",
"on move: O\n",
"X X \n",
"O X O \n",
"O X \n",
"on move: X\n",
"X X \n",
"O X O \n",
"O X O \n",
"on move: O\n",
"X X X \n",
"O X O \n",
"O X O \n",
"Episode 180, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O O \n",
" \n",
" X \n",
"on move: O\n",
"X O O \n",
"X \n",
" X \n",
"on move: X\n",
"X O O \n",
"X O \n",
" X \n",
"on move: O\n",
"X O O \n",
"X X O \n",
" X \n",
"Episode 181, Total Reward: 1\n",
"Average Reward: 0.3370165745856354\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
"X \n",
"on move: X\n",
" O \n",
" X \n",
"X O \n",
"on move: O\n",
" O X \n",
" X \n",
"X O \n",
"Episode 182, Total Reward: 1\n",
"Average Reward: 0.34065934065934067\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
" O \n",
" X X \n",
"O \n",
"on move: O\n",
" O \n",
" X X \n",
"O X \n",
"on move: X\n",
" O \n",
" X X \n",
"O O X \n",
"on move: O\n",
" O \n",
"X X X \n",
"O O X \n",
"Episode 183, Total Reward: 1\n",
"Average Reward: 0.3442622950819672\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
" O X \n",
"on move: O\n",
" X \n",
" O \n",
"X O X \n",
"on move: X\n",
"O X \n",
" O \n",
"X O X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X O X \n",
"Episode 184, Total Reward: -1\n",
"Average Reward: 0.33695652173913043\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X \n",
"O X \n",
"on move: O\n",
" O \n",
"X \n",
"O X X \n",
"on move: X\n",
" O \n",
"X O \n",
"O X X \n",
"on move: O\n",
"X O \n",
"X O \n",
"O X X \n",
"on move: X\n",
"X O O \n",
"X O \n",
"O X X \n",
"Episode 185, Total Reward: -1\n",
"Average Reward: 0.32972972972972975\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X \n",
" X \n",
"O O X \n",
"on move: X\n",
" X \n",
" X O \n",
"O O X \n",
"on move: O\n",
"X X \n",
" X O \n",
"O O X \n",
"Episode 186, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X \n",
"O X \n",
" \n",
"on move: X\n",
" O X \n",
"O X \n",
" \n",
"on move: O\n",
" O X \n",
"O X \n",
" X \n",
"on move: X\n",
" O X \n",
"O X \n",
" O X \n",
"on move: O\n",
" O X \n",
"O X X \n",
" O X \n",
"Episode 187, Total Reward: 1\n",
"Average Reward: 0.33689839572192515\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X O \n",
"X \n",
"O \n",
"on move: O\n",
" X O \n",
"X \n",
"O X \n",
"on move: X\n",
" X O \n",
"X O \n",
"O X \n",
"Episode 188, Total Reward: -1\n",
"Average Reward: 0.32978723404255317\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O X O \n",
"on move: X\n",
"X X O \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"O X O \n",
"Episode 189, Total Reward: 1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
" O \n",
"O \n",
"on move: O\n",
"X X \n",
" O \n",
"O X \n",
"on move: X\n",
"X X \n",
"O O \n",
"O X \n",
"on move: O\n",
"X X \n",
"O O \n",
"O X X \n",
"on move: X\n",
"X O X \n",
"O O \n",
"O X X \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"O X X \n",
"Episode 190, Total Reward: 1\n",
"Average Reward: 0.3368421052631579\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
"X \n",
" X \n",
"on move: X\n",
"O \n",
"X O \n",
" X \n",
"on move: O\n",
"O \n",
"X X O \n",
" X \n",
"on move: X\n",
"O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"O O \n",
"X X O \n",
" X X \n",
"on move: X\n",
"O O \n",
"X X O \n",
"O X X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"O X X \n",
"Episode 191, Total Reward: 1\n",
"Average Reward: 0.3403141361256545\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
"O \n",
" X \n",
"X O \n",
"on move: O\n",
"O \n",
" X \n",
"X X O \n",
"on move: X\n",
"O O \n",
" X \n",
"X X O \n",
"on move: O\n",
"O O \n",
"X X \n",
"X X O \n",
"on move: X\n",
"O O O \n",
"X X \n",
"X X O \n",
"Episode 192, Total Reward: -1\n",
"Average Reward: 0.3333333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
"X \n",
" X \n",
"on move: X\n",
"O \n",
"X \n",
" X O \n",
"on move: O\n",
"O X \n",
"X \n",
" X O \n",
"on move: X\n",
"O X \n",
"X O \n",
" X O \n",
"Episode 193, Total Reward: -1\n",
"Average Reward: 0.32642487046632124\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X O \n",
"O \n",
"on move: O\n",
" X X \n",
"X O \n",
"O \n",
"on move: X\n",
" X X \n",
"X O O \n",
"O \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"O \n",
"Episode 194, Total Reward: 1\n",
"Average Reward: 0.32989690721649484\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
" X \n",
"O O \n",
"X \n",
"on move: O\n",
" X X \n",
"O O \n",
"X \n",
"on move: X\n",
"O X X \n",
"O O \n",
"X \n",
"on move: O\n",
"O X X \n",
"O O X \n",
"X \n",
"on move: X\n",
"O X X \n",
"O O X \n",
"X O \n",
"Episode 195, Total Reward: -1\n",
"Average Reward: 0.3230769230769231\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
" X O \n",
"on move: O\n",
" X \n",
" O \n",
"X X O \n",
"on move: X\n",
" X \n",
" O O \n",
"X X O \n",
"on move: O\n",
"X X \n",
" O O \n",
"X X O \n",
"on move: X\n",
"X X O \n",
" O O \n",
"X X O \n",
"Episode 196, Total Reward: -1\n",
"Average Reward: 0.3163265306122449\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
" \n",
"X X \n",
"on move: X\n",
"O O \n",
" \n",
"X X \n",
"on move: O\n",
"O O \n",
"X \n",
"X X \n",
"on move: X\n",
"O O O \n",
"X \n",
"X X \n",
"Episode 197, Total Reward: -1\n",
"Average Reward: 0.3096446700507614\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
" O X \n",
" X \n",
" \n",
"on move: X\n",
" O X \n",
" X \n",
" O \n",
"on move: O\n",
" O X \n",
" X \n",
"X O \n",
"on move: X\n",
" O X \n",
" O X \n",
"X O \n",
"Episode 198, Total Reward: -1\n",
"Average Reward: 0.30303030303030304\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O X \n",
" \n",
" \n",
"on move: X\n",
"X O X \n",
" \n",
" O \n",
"on move: O\n",
"X O X \n",
" \n",
" X O \n",
"on move: X\n",
"X O X \n",
"O \n",
" X O \n",
"on move: O\n",
"X O X \n",
"O X \n",
" X O \n",
"on move: X\n",
"X O X \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O X O \n",
"Episode 199, Total Reward: 0\n",
"Average Reward: 0.3015075376884422\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X \n",
"O \n",
" X \n",
"on move: X\n",
" X \n",
"O O \n",
" X \n",
"on move: O\n",
" X \n",
"O X O \n",
" X \n",
"Episode 200, Total Reward: 1\n",
"Average Reward: 0.305\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
" O \n",
" O \n",
"on move: O\n",
"X X \n",
" O \n",
"X O \n",
"on move: X\n",
"X O X \n",
" O \n",
"X O \n",
"on move: O\n",
"X O X \n",
" O X \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O O X \n",
"X O \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X X O \n",
"Episode 201, Total Reward: 0\n",
"Average Reward: 0.3034825870646766\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X \n",
"X \n",
"O \n",
"on move: X\n",
"X \n",
"X \n",
"O O \n",
"on move: O\n",
"X X \n",
"X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O \n",
"O X O \n",
"on move: X\n",
"X X O \n",
"X O \n",
"O X O \n",
"Episode 202, Total Reward: -1\n",
"Average Reward: 0.297029702970297\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
" O \n",
" X O \n",
"X X \n",
"on move: X\n",
" O O \n",
" X O \n",
"X X \n",
"on move: O\n",
" O O \n",
" X O \n",
"X X X \n",
"Episode 203, Total Reward: 1\n",
"Average Reward: 0.30049261083743845\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X \n",
"X O O \n",
" \n",
"on move: O\n",
"X X \n",
"X O O \n",
" \n",
"on move: X\n",
"X X \n",
"X O O \n",
" O \n",
"on move: O\n",
"X X \n",
"X O O \n",
"X O \n",
"Episode 204, Total Reward: 1\n",
"Average Reward: 0.30392156862745096\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X \n",
"O O \n",
"on move: O\n",
" X \n",
"X X \n",
"O O \n",
"on move: X\n",
" O X \n",
"X X \n",
"O O \n",
"on move: O\n",
" O X \n",
"X X X \n",
"O O \n",
"Episode 205, Total Reward: 1\n",
"Average Reward: 0.3073170731707317\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X O \n",
" \n",
" \n",
"on move: O\n",
" X O \n",
" \n",
"X \n",
"on move: X\n",
" X O \n",
" O \n",
"X \n",
"on move: O\n",
" X O \n",
"X O \n",
"X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X \n",
"on move: O\n",
"O X O \n",
"X O \n",
"X X \n",
"on move: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Episode 206, Total Reward: 1\n",
"Average Reward: 0.3106796116504854\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" \n",
"X X \n",
"on move: X\n",
"O \n",
"O \n",
"X X \n",
"on move: O\n",
"O \n",
"O \n",
"X X X \n",
"Episode 207, Total Reward: 1\n",
"Average Reward: 0.3140096618357488\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
"O \n",
"X \n",
"X O \n",
"on move: O\n",
"O X \n",
"X \n",
"X O \n",
"on move: X\n",
"O O X \n",
"X \n",
"X O \n",
"on move: O\n",
"O O X \n",
"X \n",
"X X O \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X X O \n",
"Episode 208, Total Reward: -1\n",
"Average Reward: 0.3076923076923077\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" X \n",
" \n",
"on move: X\n",
"X O \n",
" X \n",
" O \n",
"on move: O\n",
"X O \n",
" X X \n",
" O \n",
"on move: X\n",
"X O \n",
"O X X \n",
" O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
" O \n",
"on move: X\n",
"X O X \n",
"O X X \n",
"O O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O O X \n",
"Episode 209, Total Reward: 1\n",
"Average Reward: 0.31100478468899523\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" X \n",
"O \n",
" X \n",
"on move: X\n",
" X \n",
"O O \n",
" X \n",
"on move: O\n",
" X \n",
"O O X \n",
" X \n",
"on move: X\n",
"O X \n",
"O O X \n",
" X \n",
"on move: O\n",
"O X \n",
"O O X \n",
"X X \n",
"on move: X\n",
"O X \n",
"O O X \n",
"X X O \n",
"Episode 210, Total Reward: -1\n",
"Average Reward: 0.3047619047619048\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X X \n",
" X \n",
"O O \n",
"on move: X\n",
"O X X \n",
" X \n",
"O O \n",
"on move: O\n",
"O X X \n",
"X X \n",
"O O \n",
"on move: X\n",
"O X X \n",
"X X \n",
"O O O \n",
"Episode 211, Total Reward: -1\n",
"Average Reward: 0.2985781990521327\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" \n",
"X \n",
"on move: X\n",
"O X \n",
" O \n",
"X \n",
"on move: O\n",
"O X \n",
" O \n",
"X X \n",
"on move: X\n",
"O X \n",
" O O \n",
"X X \n",
"on move: O\n",
"O X \n",
" O O \n",
"X X X \n",
"Episode 212, Total Reward: 1\n",
"Average Reward: 0.3018867924528302\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
" O X \n",
"X \n",
" O \n",
"on move: O\n",
" O X \n",
"X \n",
" O X \n",
"on move: X\n",
" O X \n",
"X O \n",
" O X \n",
"on move: O\n",
"X O X \n",
"X O \n",
" O X \n",
"on move: X\n",
"X O X \n",
"X O O \n",
" O X \n",
"Episode 213, Total Reward: -1\n",
"Average Reward: 0.29577464788732394\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" \n",
" X X \n",
"on move: X\n",
" O \n",
" O \n",
" X X \n",
"on move: O\n",
"X O \n",
" O \n",
" X X \n",
"on move: X\n",
"X O \n",
" O \n",
"O X X \n",
"on move: O\n",
"X O \n",
"X O \n",
"O X X \n",
"on move: X\n",
"X O O \n",
"X O \n",
"O X X \n",
"Episode 214, Total Reward: -1\n",
"Average Reward: 0.2897196261682243\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
" X \n",
" O \n",
"X O X \n",
"on move: X\n",
" X \n",
"O O \n",
"X O X \n",
"on move: O\n",
" X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
" X X \n",
"O O O \n",
"X O X \n",
"Episode 215, Total Reward: -1\n",
"Average Reward: 0.2837209302325581\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
"X \n",
" O \n",
"X \n",
"on move: X\n",
"X O \n",
" O \n",
"X \n",
"on move: O\n",
"X O \n",
" O \n",
"X X \n",
"on move: X\n",
"X O \n",
" O \n",
"X X O \n",
"on move: O\n",
"X O \n",
" O X \n",
"X X O \n",
"on move: X\n",
"X O O \n",
" O X \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"X O X \n",
"X X O \n",
"Episode 216, Total Reward: 1\n",
"Average Reward: 0.28703703703703703\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
"X \n",
" \n",
"X O \n",
"on move: X\n",
"X \n",
"O \n",
"X O \n",
"on move: O\n",
"X \n",
"O X \n",
"X O \n",
"on move: X\n",
"X \n",
"O X \n",
"X O O \n",
"on move: O\n",
"X X \n",
"O X \n",
"X O O \n",
"on move: X\n",
"X X \n",
"O X O \n",
"X O O \n",
"on move: O\n",
"X X X \n",
"O X O \n",
"X O O \n",
"Episode 217, Total Reward: 1\n",
"Average Reward: 0.2903225806451613\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X X \n",
" O \n",
"on move: X\n",
" \n",
"O X X \n",
" O \n",
"on move: O\n",
" \n",
"O X X \n",
"X O \n",
"on move: X\n",
"O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O \n",
"O X X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"O X X \n",
"X O X \n",
"Episode 218, Total Reward: 0\n",
"Average Reward: 0.2889908256880734\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
" O X \n",
"on move: X\n",
" X \n",
"O \n",
" O X \n",
"on move: O\n",
" X \n",
"O X \n",
" O X \n",
"on move: X\n",
" X \n",
"O X \n",
"O O X \n",
"on move: O\n",
" X \n",
"O X X \n",
"O O X \n",
"on move: X\n",
"O X \n",
"O X X \n",
"O O X \n",
"Episode 219, Total Reward: -1\n",
"Average Reward: 0.2831050228310502\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X \n",
"O \n",
" X \n",
"on move: X\n",
" X \n",
"O \n",
"O X \n",
"on move: O\n",
"X X \n",
"O \n",
"O X \n",
"on move: X\n",
"X X \n",
"O \n",
"O X O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O X O \n",
"on move: X\n",
"X O X \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O X O \n",
"Episode 220, Total Reward: 0\n",
"Average Reward: 0.2818181818181818\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" \n",
" X X \n",
"on move: X\n",
"O \n",
" O \n",
" X X \n",
"on move: O\n",
"O X \n",
" O \n",
" X X \n",
"on move: X\n",
"O X \n",
"O O \n",
" X X \n",
"on move: O\n",
"O X \n",
"O X O \n",
" X X \n",
"on move: X\n",
"O X \n",
"O X O \n",
"O X X \n",
"Episode 221, Total Reward: -1\n",
"Average Reward: 0.27601809954751133\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" \n",
"O O \n",
"X X \n",
"on move: O\n",
"X \n",
"O O \n",
"X X \n",
"on move: X\n",
"X \n",
"O O \n",
"X X O \n",
"on move: O\n",
"X X \n",
"O O \n",
"X X O \n",
"on move: X\n",
"X X \n",
"O O O \n",
"X X O \n",
"Episode 222, Total Reward: -1\n",
"Average Reward: 0.2702702702702703\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X \n",
"O O \n",
" \n",
"on move: O\n",
"X X \n",
"O O \n",
" X \n",
"on move: X\n",
"X O X \n",
"O O \n",
" X \n",
"on move: O\n",
"X O X \n",
"O X O \n",
" X \n",
"on move: X\n",
"X O X \n",
"O X O \n",
" X O \n",
"on move: O\n",
"X O X \n",
"O X O \n",
"X X O \n",
"Episode 223, Total Reward: 1\n",
"Average Reward: 0.273542600896861\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X \n",
" X \n",
"O X O \n",
"on move: X\n",
" X \n",
" O X \n",
"O X O \n",
"on move: O\n",
" X \n",
"X O X \n",
"O X O \n",
"on move: X\n",
" O X \n",
"X O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X O X \n",
"O X O \n",
"Episode 224, Total Reward: 0\n",
"Average Reward: 0.27232142857142855\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" X O \n",
"X \n",
"on move: X\n",
" \n",
" X O \n",
"X O \n",
"on move: O\n",
" \n",
" X O \n",
"X O X \n",
"on move: X\n",
"O \n",
" X O \n",
"X O X \n",
"on move: O\n",
"O \n",
"X X O \n",
"X O X \n",
"on move: X\n",
"O O \n",
"X X O \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X O X \n",
"Episode 225, Total Reward: 1\n",
"Average Reward: 0.27555555555555555\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
" \n",
"X X \n",
"O O \n",
"on move: O\n",
" \n",
"X X X \n",
"O O \n",
"Episode 226, Total Reward: 1\n",
"Average Reward: 0.27876106194690264\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" O X \n",
" \n",
"on move: O\n",
" \n",
" O X \n",
"X \n",
"on move: X\n",
" \n",
"O O X \n",
"X \n",
"on move: O\n",
" X \n",
"O O X \n",
"X \n",
"on move: X\n",
" X \n",
"O O X \n",
"X O \n",
"on move: O\n",
"X X \n",
"O O X \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O O X \n",
"X O \n",
"Episode 227, Total Reward: -1\n",
"Average Reward: 0.27312775330396477\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
"X \n",
"X \n",
"on move: X\n",
" O \n",
"X \n",
"X O \n",
"on move: O\n",
" O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O \n",
"X X X \n",
"X O \n",
"Episode 228, Total Reward: 1\n",
"Average Reward: 0.27631578947368424\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
"O X \n",
"on move: O\n",
" X \n",
" O X \n",
"O X \n",
"on move: X\n",
" X \n",
"O O X \n",
"O X \n",
"on move: O\n",
" X \n",
"O O X \n",
"O X X \n",
"on move: X\n",
" X O \n",
"O O X \n",
"O X X \n",
"Episode 229, Total Reward: -1\n",
"Average Reward: 0.27074235807860264\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
"X \n",
"on move: X\n",
"O \n",
" X \n",
"X O \n",
"on move: O\n",
"O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O \n",
"X X \n",
"X X O \n",
"on move: X\n",
"O O \n",
"X X O \n",
"X X O \n",
"Episode 230, Total Reward: -1\n",
"Average Reward: 0.26521739130434785\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
" O X \n",
" \n",
" X \n",
"on move: X\n",
" O X \n",
" O \n",
" X \n",
"on move: O\n",
" O X \n",
"X O \n",
" X \n",
"on move: X\n",
"O O X \n",
"X O \n",
" X \n",
"on move: O\n",
"O O X \n",
"X O \n",
" X X \n",
"on move: X\n",
"O O X \n",
"X O O \n",
" X X \n",
"on move: O\n",
"O O X \n",
"X O O \n",
"X X X \n",
"Episode 231, Total Reward: 1\n",
"Average Reward: 0.2683982683982684\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
" X \n",
" X \n",
"O O X \n",
"on move: X\n",
" X \n",
" X O \n",
"O O X \n",
"on move: O\n",
" X \n",
"X X O \n",
"O O X \n",
"on move: X\n",
" O X \n",
"X X O \n",
"O O X \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O O X \n",
"Episode 232, Total Reward: 1\n",
"Average Reward: 0.27155172413793105\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" O X \n",
" \n",
"X O \n",
"on move: O\n",
" O X \n",
" X \n",
"X O \n",
"Episode 233, Total Reward: 1\n",
"Average Reward: 0.27467811158798283\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X \n",
"O \n",
" O \n",
"on move: O\n",
"X X \n",
"O X \n",
" O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Episode 234, Total Reward: 1\n",
"Average Reward: 0.2777777777777778\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O O \n",
"X X \n",
" \n",
"on move: O\n",
" O O \n",
"X X \n",
" X \n",
"on move: X\n",
" O O \n",
"X X \n",
"O X \n",
"on move: O\n",
" O O \n",
"X X \n",
"O X X \n",
"on move: X\n",
" O O \n",
"X X O \n",
"O X X \n",
"on move: O\n",
"X O O \n",
"X X O \n",
"O X X \n",
"Episode 235, Total Reward: 1\n",
"Average Reward: 0.28085106382978725\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
"X \n",
" X \n",
"on move: X\n",
"O \n",
"X O \n",
" X \n",
"on move: O\n",
"O \n",
"X O X \n",
" X \n",
"on move: X\n",
"O O \n",
"X O X \n",
" X \n",
"on move: O\n",
"O O X \n",
"X O X \n",
" X \n",
"on move: X\n",
"O O X \n",
"X O X \n",
" X O \n",
"Episode 236, Total Reward: -1\n",
"Average Reward: 0.2754237288135593\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O \n",
"X X \n",
" \n",
"on move: X\n",
"O O \n",
"X X \n",
" \n",
"on move: O\n",
"O X O \n",
"X X \n",
" \n",
"on move: X\n",
"O X O \n",
"X X \n",
"O \n",
"on move: O\n",
"O X O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O X O \n",
"X X \n",
"O X O \n",
"on move: O\n",
"O X O \n",
"X X X \n",
"O X O \n",
"Episode 237, Total Reward: 1\n",
"Average Reward: 0.27848101265822783\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
"O O \n",
"X \n",
" X \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O O \n",
"X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"O X X \n",
"Episode 238, Total Reward: -1\n",
"Average Reward: 0.27310924369747897\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
"X \n",
"on move: X\n",
" O \n",
"O X \n",
"X \n",
"on move: O\n",
" O \n",
"O X X \n",
"X \n",
"on move: X\n",
" O O \n",
"O X X \n",
"X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X \n",
"on move: X\n",
"X O O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Episode 239, Total Reward: 1\n",
"Average Reward: 0.27615062761506276\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X X \n",
" \n",
"O \n",
"on move: X\n",
" X X \n",
" O \n",
"O \n",
"on move: O\n",
" X X \n",
"X O \n",
"O \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O \n",
"on move: O\n",
"O X X \n",
"X O \n",
"O X \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O O X \n",
"on move: O\n",
"O X X \n",
"X O X \n",
"O O X \n",
"Episode 240, Total Reward: 1\n",
"Average Reward: 0.2791666666666667\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
"O X \n",
" O \n",
"X \n",
"on move: O\n",
"O X \n",
" O X \n",
"X \n",
"on move: X\n",
"O X \n",
"O O X \n",
"X \n",
"on move: O\n",
"O X \n",
"O O X \n",
"X X \n",
"on move: X\n",
"O X O \n",
"O O X \n",
"X X \n",
"on move: O\n",
"O X O \n",
"O O X \n",
"X X X \n",
"Episode 241, Total Reward: 1\n",
"Average Reward: 0.2821576763485477\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" O X \n",
"on move: O\n",
" \n",
" \n",
"X O X \n",
"on move: X\n",
" \n",
" O \n",
"X O X \n",
"on move: O\n",
" X \n",
" O \n",
"X O X \n",
"on move: X\n",
" X \n",
" O O \n",
"X O X \n",
"on move: O\n",
" X X \n",
" O O \n",
"X O X \n",
"on move: X\n",
" X X \n",
"O O O \n",
"X O X \n",
"Episode 242, Total Reward: -1\n",
"Average Reward: 0.2768595041322314\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
"X \n",
"O X \n",
" \n",
"on move: X\n",
"X \n",
"O O X \n",
" \n",
"on move: O\n",
"X \n",
"O O X \n",
" X \n",
"on move: X\n",
"X O \n",
"O O X \n",
" X \n",
"on move: O\n",
"X O \n",
"O O X \n",
" X X \n",
"on move: X\n",
"X O \n",
"O O X \n",
"O X X \n",
"Episode 243, Total Reward: -1\n",
"Average Reward: 0.2716049382716049\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" O \n",
"X \n",
"on move: X\n",
"X \n",
"O O \n",
"X \n",
"on move: O\n",
"X X \n",
"O O \n",
"X \n",
"on move: X\n",
"X X \n",
"O O \n",
"X O \n",
"on move: O\n",
"X X \n",
"O X O \n",
"X O \n",
"on move: X\n",
"X X \n",
"O X O \n",
"X O O \n",
"on move: O\n",
"X X X \n",
"O X O \n",
"X O O \n",
"Episode 244, Total Reward: 1\n",
"Average Reward: 0.27459016393442626\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
" X O \n",
" X \n",
"on move: O\n",
"O X \n",
" X O \n",
" X \n",
"on move: X\n",
"O X \n",
"O X O \n",
" X \n",
"on move: O\n",
"O X X \n",
"O X O \n",
" X \n",
"on move: X\n",
"O X X \n",
"O X O \n",
"O X \n",
"Episode 245, Total Reward: -1\n",
"Average Reward: 0.2693877551020408\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
" X \n",
"X O O \n",
"on move: O\n",
" X \n",
" X \n",
"X O O \n",
"on move: X\n",
" X \n",
" X O \n",
"X O O \n",
"on move: O\n",
" X X \n",
" X O \n",
"X O O \n",
"Episode 246, Total Reward: 1\n",
"Average Reward: 0.27235772357723576\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" O \n",
" O \n",
"on move: O\n",
"X X \n",
"X O \n",
" O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O X \n",
"O O O \n",
"Episode 247, Total Reward: -1\n",
"Average Reward: 0.26720647773279355\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" X \n",
"O X \n",
" \n",
"on move: X\n",
" X \n",
"O X \n",
"O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O \n",
"on move: X\n",
"X X \n",
"O O X \n",
"O \n",
"on move: O\n",
"X X \n",
"O O X \n",
"O X \n",
"on move: X\n",
"X X \n",
"O O X \n",
"O X O \n",
"on move: O\n",
"X X X \n",
"O O X \n",
"O X O \n",
"Episode 248, Total Reward: 1\n",
"Average Reward: 0.2701612903225806\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
"O O \n",
" X X \n",
" \n",
"on move: O\n",
"O O \n",
"X X X \n",
" \n",
"Episode 249, Total Reward: 1\n",
"Average Reward: 0.27309236947791166\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" \n",
" \n",
"X X O \n",
"on move: X\n",
" O \n",
" \n",
"X X O \n",
"on move: O\n",
" O \n",
" X \n",
"X X O \n",
"on move: X\n",
" O \n",
" X O \n",
"X X O \n",
"Episode 250, Total Reward: -1\n",
"Average Reward: 0.268\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" X \n",
"O \n",
"X \n",
"on move: X\n",
" X \n",
"O O \n",
"X \n",
"on move: O\n",
"X X \n",
"O O \n",
"X \n",
"on move: X\n",
"X X \n",
"O O \n",
"X O \n",
"on move: O\n",
"X X \n",
"O O \n",
"X X O \n",
"on move: X\n",
"X X O \n",
"O O \n",
"X X O \n",
"on move: O\n",
"X X O \n",
"O O X \n",
"X X O \n",
"Episode 251, Total Reward: 0\n",
"Average Reward: 0.26693227091633465\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" \n",
"X X O \n",
"on move: X\n",
" O \n",
" \n",
"X X O \n",
"on move: O\n",
" O \n",
"X \n",
"X X O \n",
"on move: X\n",
" O \n",
"X O \n",
"X X O \n",
"on move: O\n",
"X O \n",
"X O \n",
"X X O \n",
"Episode 252, Total Reward: 1\n",
"Average Reward: 0.2698412698412698\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X \n",
" \n",
"O O X \n",
"on move: X\n",
"X O X \n",
" \n",
"O O X \n",
"on move: O\n",
"X O X \n",
" X \n",
"O O X \n",
"Episode 253, Total Reward: 1\n",
"Average Reward: 0.2727272727272727\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X X \n",
" \n",
" O \n",
"on move: X\n",
" X X \n",
"O \n",
" O \n",
"on move: O\n",
"X X X \n",
"O \n",
" O \n",
"Episode 254, Total Reward: 1\n",
"Average Reward: 0.2755905511811024\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
" O \n",
" X \n",
"on move: O\n",
"X O \n",
" O X \n",
" X \n",
"on move: X\n",
"X O O \n",
" O X \n",
" X \n",
"on move: O\n",
"X O O \n",
"X O X \n",
" X \n",
"on move: X\n",
"X O O \n",
"X O X \n",
"O X \n",
"Episode 255, Total Reward: -1\n",
"Average Reward: 0.27058823529411763\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X X \n",
" O \n",
" \n",
"on move: X\n",
"O X X \n",
" O \n",
" \n",
"on move: O\n",
"O X X \n",
" O \n",
"X \n",
"on move: X\n",
"O X X \n",
" O \n",
"X O \n",
"on move: O\n",
"O X X \n",
"X O \n",
"X O \n",
"on move: X\n",
"O X X \n",
"X O \n",
"X O O \n",
"on move: O\n",
"O X X \n",
"X X O \n",
"X O O \n",
"Episode 256, Total Reward: 1\n",
"Average Reward: 0.2734375\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
" O \n",
" O \n",
"on move: O\n",
"X X \n",
" O \n",
" X O \n",
"on move: X\n",
"X X \n",
" O O \n",
" X O \n",
"on move: O\n",
"X X \n",
"X O O \n",
" X O \n",
"on move: X\n",
"X O X \n",
"X O O \n",
" X O \n",
"on move: O\n",
"X O X \n",
"X O O \n",
"X X O \n",
"Episode 257, Total Reward: 1\n",
"Average Reward: 0.27626459143968873\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" \n",
" X \n",
" X O \n",
"on move: X\n",
" O \n",
" X \n",
" X O \n",
"on move: O\n",
" O \n",
"X X \n",
" X O \n",
"on move: X\n",
" O \n",
"X X O \n",
" X O \n",
"on move: O\n",
"X O \n",
"X X O \n",
" X O \n",
"on move: X\n",
"X O \n",
"X X O \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Episode 258, Total Reward: 0\n",
"Average Reward: 0.2751937984496124\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
" O \n",
" O \n",
"on move: O\n",
"X X \n",
"X O \n",
" O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X X \n",
"X O \n",
"O O \n",
"Episode 259, Total Reward: 1\n",
"Average Reward: 0.277992277992278\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X X \n",
" \n",
" O \n",
"on move: X\n",
" X X \n",
" O \n",
" O \n",
"on move: O\n",
"X X X \n",
" O \n",
" O \n",
"Episode 260, Total Reward: 1\n",
"Average Reward: 0.28076923076923077\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
"X \n",
" \n",
"on move: X\n",
"O O X \n",
"X \n",
" \n",
"on move: O\n",
"O O X \n",
"X \n",
"X \n",
"on move: X\n",
"O O X \n",
"X O \n",
"X \n",
"on move: O\n",
"O O X \n",
"X O \n",
"X X \n",
"on move: X\n",
"O O X \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O O X \n",
"X O O \n",
"X X X \n",
"Episode 261, Total Reward: 1\n",
"Average Reward: 0.2835249042145594\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
"O O \n",
" \n",
"on move: O\n",
"X X \n",
"O O \n",
"X \n",
"on move: X\n",
"X X \n",
"O O \n",
"X O \n",
"on move: O\n",
"X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
"X O X \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X O X \n",
"O X O \n",
"X O X \n",
"Episode 262, Total Reward: 1\n",
"Average Reward: 0.2862595419847328\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X X \n",
" O \n",
"on move: X\n",
"O \n",
"X X \n",
" O \n",
"on move: O\n",
"O X \n",
"X X \n",
" O \n",
"on move: X\n",
"O X \n",
"X X O \n",
" O \n",
"on move: O\n",
"O X \n",
"X X O \n",
"X O \n",
"on move: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"X O X \n",
"Episode 263, Total Reward: 0\n",
"Average Reward: 0.28517110266159695\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
"O \n",
"X O \n",
"on move: O\n",
" X \n",
"O \n",
"X O X \n",
"on move: X\n",
"O X \n",
"O \n",
"X O X \n",
"on move: O\n",
"O X \n",
"O X \n",
"X O X \n",
"Episode 264, Total Reward: 1\n",
"Average Reward: 0.2878787878787879\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X O X \n",
"O \n",
" \n",
"on move: O\n",
"X O X \n",
"O \n",
" X \n",
"on move: X\n",
"X O X \n",
"O O \n",
" X \n",
"on move: O\n",
"X O X \n",
"O O \n",
" X X \n",
"on move: X\n",
"X O X \n",
"O O \n",
"O X X \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"O X X \n",
"Episode 265, Total Reward: 1\n",
"Average Reward: 0.29056603773584905\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
" X O \n",
" X \n",
"on move: O\n",
"O X \n",
" X O \n",
" X \n",
"on move: X\n",
"O X O \n",
" X O \n",
" X \n",
"on move: O\n",
"O X O \n",
" X O \n",
" X X \n",
"Episode 266, Total Reward: 1\n",
"Average Reward: 0.2932330827067669\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" X \n",
"O \n",
" X \n",
"on move: X\n",
" X \n",
"O \n",
"O X \n",
"on move: O\n",
" X \n",
"O X \n",
"O X \n",
"on move: X\n",
"O X \n",
"O X \n",
"O X \n",
"Episode 267, Total Reward: -1\n",
"Average Reward: 0.2883895131086142\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
" \n",
"X X \n",
"on move: X\n",
"O O \n",
" \n",
"X X \n",
"on move: O\n",
"O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O O \n",
" X \n",
"X X \n",
"Episode 268, Total Reward: -1\n",
"Average Reward: 0.2835820895522388\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O O \n",
" X \n",
" X \n",
"on move: O\n",
" O O \n",
" X \n",
" X X \n",
"on move: X\n",
"O O O \n",
" X \n",
" X X \n",
"Episode 269, Total Reward: -1\n",
"Average Reward: 0.2788104089219331\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" O \n",
" X \n",
"X O \n",
"on move: O\n",
" X O \n",
" X \n",
"X O \n",
"on move: X\n",
" X O \n",
" X O \n",
"X O \n",
"on move: O\n",
" X O \n",
" X O \n",
"X O X \n",
"on move: X\n",
"O X O \n",
" X O \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"X O X \n",
"Episode 270, Total Reward: 0\n",
"Average Reward: 0.2777777777777778\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X O \n",
" \n",
" \n",
"on move: O\n",
" X O \n",
"X \n",
" \n",
"on move: X\n",
" X O \n",
"X O \n",
" \n",
"on move: O\n",
" X O \n",
"X O \n",
" X \n",
"on move: X\n",
" X O \n",
"X O \n",
"O X \n",
"Episode 271, Total Reward: -1\n",
"Average Reward: 0.2730627306273063\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X \n",
"O O \n",
"on move: O\n",
"X X \n",
"X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O \n",
"O O \n",
"on move: O\n",
"X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"X X \n",
"X O X \n",
"O O O \n",
"Episode 272, Total Reward: -1\n",
"Average Reward: 0.26838235294117646\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X \n",
"O \n",
"O \n",
"on move: O\n",
"X X X \n",
"O \n",
"O \n",
"Episode 273, Total Reward: 1\n",
"Average Reward: 0.27106227106227104\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O \n",
"O \n",
"on move: O\n",
" X X \n",
"X O \n",
"O \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O \n",
"on move: O\n",
"O X X \n",
"X O \n",
"O X \n",
"on move: X\n",
"O X X \n",
"X O \n",
"O O X \n",
"on move: O\n",
"O X X \n",
"X O X \n",
"O O X \n",
"Episode 274, Total Reward: 1\n",
"Average Reward: 0.2737226277372263\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
"X \n",
" \n",
"X O \n",
"on move: X\n",
"X \n",
"O \n",
"X O \n",
"on move: O\n",
"X X \n",
"O \n",
"X O \n",
"on move: X\n",
"X X \n",
"O \n",
"X O O \n",
"on move: O\n",
"X X \n",
"O X \n",
"X O O \n",
"on move: X\n",
"X X O \n",
"O X \n",
"X O O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"X O O \n",
"Episode 275, Total Reward: 0\n",
"Average Reward: 0.2727272727272727\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
"X \n",
"X \n",
"O \n",
"on move: X\n",
"X \n",
"X \n",
"O O \n",
"on move: O\n",
"X \n",
"X X \n",
"O O \n",
"on move: X\n",
"X O \n",
"X X \n",
"O O \n",
"on move: O\n",
"X O \n",
"X X X \n",
"O O \n",
"Episode 276, Total Reward: 1\n",
"Average Reward: 0.2753623188405797\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X O \n",
"X O \n",
" \n",
"on move: O\n",
"X O \n",
"X O \n",
" X \n",
"on move: X\n",
"X O O \n",
"X O \n",
" X \n",
"on move: O\n",
"X O O \n",
"X O X \n",
" X \n",
"on move: X\n",
"X O O \n",
"X O X \n",
"O X \n",
"Episode 277, Total Reward: -1\n",
"Average Reward: 0.27075812274368233\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X \n",
"O \n",
"on move: O\n",
" O \n",
"X X \n",
"O X \n",
"on move: X\n",
" O \n",
"X X O \n",
"O X \n",
"on move: O\n",
"X O \n",
"X X O \n",
"O X \n",
"Episode 278, Total Reward: 1\n",
"Average Reward: 0.2733812949640288\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" X \n",
"O \n",
"on move: O\n",
"O X \n",
" X \n",
"O X \n",
"Episode 279, Total Reward: 1\n",
"Average Reward: 0.27598566308243727\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
"O O \n",
"X X \n",
" \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X \n",
" O X \n",
"on move: O\n",
"O O \n",
"X X X \n",
" O X \n",
"Episode 280, Total Reward: 1\n",
"Average Reward: 0.2785714285714286\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
" O \n",
"O X \n",
"on move: O\n",
" X X \n",
" O \n",
"O X \n",
"on move: X\n",
"O X X \n",
" O \n",
"O X \n",
"on move: O\n",
"O X X \n",
" O \n",
"O X X \n",
"on move: X\n",
"O X X \n",
"O O \n",
"O X X \n",
"Episode 281, Total Reward: -1\n",
"Average Reward: 0.27402135231316727\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" \n",
" O O \n",
"on move: O\n",
"X X \n",
" X \n",
" O O \n",
"on move: X\n",
"X X \n",
" O X \n",
" O O \n",
"on move: O\n",
"X X \n",
"X O X \n",
" O O \n",
"on move: X\n",
"X O X \n",
"X O X \n",
" O O \n",
"Episode 282, Total Reward: -1\n",
"Average Reward: 0.2695035460992908\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O \n",
"O \n",
" X \n",
"on move: O\n",
"X O \n",
"O X \n",
" X \n",
"on move: X\n",
"X O \n",
"O X \n",
"O X \n",
"on move: O\n",
"X O \n",
"O X X \n",
"O X \n",
"Episode 283, Total Reward: 1\n",
"Average Reward: 0.27208480565371024\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
"X O \n",
" \n",
" X \n",
"on move: X\n",
"X O O \n",
" \n",
" X \n",
"on move: O\n",
"X O O \n",
"X \n",
" X \n",
"on move: X\n",
"X O O \n",
"X \n",
" X O \n",
"on move: O\n",
"X O O \n",
"X X \n",
" X O \n",
"on move: X\n",
"X O O \n",
"X X O \n",
" X O \n",
"Episode 284, Total Reward: -1\n",
"Average Reward: 0.2676056338028169\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
" X \n",
"O X \n",
"on move: O\n",
"O \n",
"X X \n",
"O X \n",
"on move: X\n",
"O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"O O \n",
"X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"O X X \n",
"on move: O\n",
"O O X \n",
"X O X \n",
"O X X \n",
"Episode 285, Total Reward: 1\n",
"Average Reward: 0.27017543859649124\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
" X \n",
"X \n",
"on move: X\n",
"O \n",
" X \n",
"X O \n",
"on move: O\n",
"O \n",
"X X \n",
"X O \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O \n",
"X X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X O X \n",
"Episode 286, Total Reward: 0\n",
"Average Reward: 0.2692307692307692\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" \n",
" O O \n",
"X X \n",
"on move: O\n",
" \n",
" O O \n",
"X X X \n",
"Episode 287, Total Reward: 1\n",
"Average Reward: 0.27177700348432055\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" \n",
" X \n",
"O X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
" \n",
"X X O \n",
"O X \n",
"on move: X\n",
" O \n",
"X X O \n",
"O X \n",
"on move: O\n",
" O \n",
"X X O \n",
"O X X \n",
"on move: X\n",
"O O \n",
"X X O \n",
"O X X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"O X X \n",
"Episode 288, Total Reward: 0\n",
"Average Reward: 0.2708333333333333\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" O X \n",
"on move: O\n",
" X \n",
" \n",
" O X \n",
"on move: X\n",
" X \n",
"O \n",
" O X \n",
"on move: O\n",
" X \n",
"O \n",
"X O X \n",
"on move: X\n",
" X \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
"X X \n",
"O O O \n",
"X O X \n",
"Episode 289, Total Reward: -1\n",
"Average Reward: 0.2664359861591695\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" X \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X \n",
"O X X \n",
"O O \n",
"on move: X\n",
"X X O \n",
"O X X \n",
"O O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"O O X \n",
"Episode 290, Total Reward: 1\n",
"Average Reward: 0.2689655172413793\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" X \n",
" O \n",
"on move: X\n",
" X \n",
"O X \n",
" O \n",
"on move: O\n",
"X X \n",
"O X \n",
" O \n",
"on move: X\n",
"X O X \n",
"O X \n",
" O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
" O \n",
"on move: X\n",
"X O X \n",
"O X X \n",
" O O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"X O O \n",
"Episode 291, Total Reward: 1\n",
"Average Reward: 0.27147766323024053\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" O X \n",
" \n",
" \n",
"on move: O\n",
"X O X \n",
" \n",
" \n",
"on move: X\n",
"X O X \n",
"O \n",
" \n",
"on move: O\n",
"X O X \n",
"O \n",
"X \n",
"on move: X\n",
"X O X \n",
"O \n",
"X O \n",
"on move: O\n",
"X O X \n",
"O X \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O X \n",
"X O O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"X O O \n",
"Episode 292, Total Reward: 1\n",
"Average Reward: 0.273972602739726\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
" X X \n",
"O \n",
"on move: X\n",
" \n",
" X X \n",
"O O \n",
"on move: O\n",
" X \n",
" X X \n",
"O O \n",
"on move: X\n",
" O X \n",
" X X \n",
"O O \n",
"on move: O\n",
"X O X \n",
" X X \n",
"O O \n",
"on move: X\n",
"X O X \n",
" X X \n",
"O O O \n",
"Episode 293, Total Reward: -1\n",
"Average Reward: 0.2696245733788396\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
"O O \n",
" X \n",
"on move: O\n",
" X X \n",
"O O \n",
" X \n",
"on move: X\n",
" X X \n",
"O O \n",
" O X \n",
"on move: O\n",
"X X X \n",
"O O \n",
" O X \n",
"Episode 294, Total Reward: 1\n",
"Average Reward: 0.272108843537415\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" X \n",
" X O \n",
" \n",
"on move: X\n",
" O X \n",
" X O \n",
" \n",
"on move: O\n",
" O X \n",
" X O \n",
"X \n",
"Episode 295, Total Reward: 1\n",
"Average Reward: 0.2745762711864407\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" O X \n",
" \n",
"on move: O\n",
"O X \n",
" O X \n",
" X \n",
"on move: X\n",
"O X O \n",
" O X \n",
" X \n",
"on move: O\n",
"O X O \n",
" O X \n",
" X X \n",
"on move: X\n",
"O X O \n",
"O O X \n",
" X X \n",
"on move: O\n",
"O X O \n",
"O O X \n",
"X X X \n",
"Episode 296, Total Reward: 1\n",
"Average Reward: 0.27702702702702703\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
" O \n",
"O X \n",
"X O X \n",
"on move: O\n",
" X O \n",
"O X \n",
"X O X \n",
"on move: X\n",
" X O \n",
"O X O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Episode 297, Total Reward: 1\n",
"Average Reward: 0.27946127946127947\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" O \n",
" X \n",
"on move: X\n",
"X O \n",
" O \n",
" X \n",
"on move: O\n",
"X X O \n",
" O \n",
" X \n",
"on move: X\n",
"X X O \n",
" O \n",
" O X \n",
"on move: O\n",
"X X O \n",
" X O \n",
" O X \n",
"Episode 298, Total Reward: 1\n",
"Average Reward: 0.28187919463087246\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X X \n",
" O \n",
"on move: X\n",
" O \n",
" X X \n",
" O \n",
"on move: O\n",
" O \n",
"X X X \n",
" O \n",
"Episode 299, Total Reward: 1\n",
"Average Reward: 0.2842809364548495\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" X \n",
"X \n",
"on move: X\n",
"O O \n",
" X \n",
"X \n",
"on move: O\n",
"O O \n",
" X \n",
"X X \n",
"on move: X\n",
"O O \n",
" O X \n",
"X X \n",
"on move: O\n",
"O O \n",
"X O X \n",
"X X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X X O \n",
"Episode 300, Total Reward: -1\n",
"Average Reward: 0.28\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
"O X \n",
" O \n",
" X \n",
"on move: O\n",
"O X \n",
"X O \n",
" X \n",
"on move: X\n",
"O X O \n",
"X O \n",
" X \n",
"on move: O\n",
"O X O \n",
"X O \n",
"X X \n",
"on move: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Episode 301, Total Reward: 1\n",
"Average Reward: 0.2823920265780731\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" X \n",
"O \n",
" X \n",
"on move: X\n",
"O X \n",
"O \n",
" X \n",
"on move: O\n",
"O X \n",
"O \n",
" X X \n",
"on move: X\n",
"O X \n",
"O O \n",
" X X \n",
"on move: O\n",
"O X X \n",
"O O \n",
" X X \n",
"on move: X\n",
"O X X \n",
"O O O \n",
" X X \n",
"Episode 302, Total Reward: -1\n",
"Average Reward: 0.2781456953642384\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" \n",
" X \n",
" X O \n",
"on move: X\n",
" \n",
" X O \n",
" X O \n",
"on move: O\n",
" \n",
" X O \n",
"X X O \n",
"on move: X\n",
"O \n",
" X O \n",
"X X O \n",
"on move: O\n",
"O \n",
"X X O \n",
"X X O \n",
"on move: X\n",
"O O \n",
"X X O \n",
"X X O \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"X X O \n",
"Episode 303, Total Reward: 1\n",
"Average Reward: 0.28052805280528054\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" X \n",
" O \n",
"on move: O\n",
"O X X \n",
" X \n",
" O \n",
"on move: X\n",
"O X X \n",
"O X \n",
" O \n",
"on move: O\n",
"O X X \n",
"O X \n",
" O X \n",
"on move: X\n",
"O X X \n",
"O X O \n",
" O X \n",
"on move: O\n",
"O X X \n",
"O X O \n",
"X O X \n",
"Episode 304, Total Reward: 1\n",
"Average Reward: 0.28289473684210525\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" O \n",
" X \n",
"on move: X\n",
" X \n",
"O O \n",
" X \n",
"on move: O\n",
" X \n",
"O O \n",
" X X \n",
"on move: X\n",
" X \n",
"O O \n",
"O X X \n",
"on move: O\n",
" X \n",
"O O X \n",
"O X X \n",
"Episode 305, Total Reward: 1\n",
"Average Reward: 0.28524590163934427\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
"O \n",
" \n",
"on move: O\n",
"X \n",
"O X \n",
" \n",
"on move: X\n",
"X \n",
"O O X \n",
" \n",
"on move: O\n",
"X \n",
"O O X \n",
" X \n",
"on move: X\n",
"X \n",
"O O X \n",
"O X \n",
"on move: O\n",
"X X \n",
"O O X \n",
"O X \n",
"on move: X\n",
"X O X \n",
"O O X \n",
"O X \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"O X X \n",
"Episode 306, Total Reward: 1\n",
"Average Reward: 0.2875816993464052\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X X \n",
"O \n",
" \n",
"on move: X\n",
" X X \n",
"O \n",
" O \n",
"on move: O\n",
" X X \n",
"O \n",
"X O \n",
"on move: X\n",
" X X \n",
"O O \n",
"X O \n",
"on move: O\n",
" X X \n",
"O O X \n",
"X O \n",
"on move: X\n",
" X X \n",
"O O X \n",
"X O O \n",
"on move: O\n",
"X X X \n",
"O O X \n",
"X O O \n",
"Episode 307, Total Reward: 1\n",
"Average Reward: 0.2899022801302932\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X \n",
"O O \n",
" \n",
"on move: O\n",
"X X \n",
"O O \n",
" X \n",
"on move: X\n",
"X X \n",
"O O \n",
" X O \n",
"on move: O\n",
"X X \n",
"O O \n",
"X X O \n",
"on move: X\n",
"X X \n",
"O O O \n",
"X X O \n",
"Episode 308, Total Reward: -1\n",
"Average Reward: 0.2857142857142857\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X O \n",
"X O \n",
" \n",
"on move: O\n",
"X O \n",
"X O \n",
" X \n",
"on move: X\n",
"X O O \n",
"X O \n",
" X \n",
"on move: O\n",
"X O O \n",
"X O \n",
"X X \n",
"Episode 309, Total Reward: 1\n",
"Average Reward: 0.28802588996763756\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O \n",
"X X O \n",
" X \n",
"on move: X\n",
" O \n",
"X X O \n",
"O X \n",
"on move: O\n",
"X O \n",
"X X O \n",
"O X \n",
"Episode 310, Total Reward: 1\n",
"Average Reward: 0.2903225806451613\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
" O \n",
"X O \n",
"X X \n",
"on move: X\n",
" O O \n",
"X O \n",
"X X \n",
"on move: O\n",
" O O \n",
"X O \n",
"X X X \n",
"Episode 311, Total Reward: 1\n",
"Average Reward: 0.29260450160771706\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
" \n",
"X O \n",
"on move: X\n",
"X \n",
" \n",
"X O O \n",
"on move: O\n",
"X X \n",
" \n",
"X O O \n",
"on move: X\n",
"X X \n",
" O \n",
"X O O \n",
"on move: O\n",
"X X \n",
"X O \n",
"X O O \n",
"Episode 312, Total Reward: 1\n",
"Average Reward: 0.2948717948717949\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
"O \n",
"X O \n",
"on move: O\n",
"X X \n",
"O \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O \n",
"X O \n",
"on move: O\n",
"X O X \n",
"O \n",
"X X O \n",
"on move: X\n",
"X O X \n",
"O O \n",
"X X O \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X X O \n",
"Episode 313, Total Reward: 0\n",
"Average Reward: 0.2939297124600639\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" X \n",
" \n",
"O X \n",
"on move: X\n",
"O X \n",
" \n",
"O X \n",
"on move: O\n",
"O X \n",
" \n",
"O X X \n",
"on move: X\n",
"O X O \n",
" \n",
"O X X \n",
"on move: O\n",
"O X O \n",
"X \n",
"O X X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"O X X \n",
"Episode 314, Total Reward: -1\n",
"Average Reward: 0.2898089171974522\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
"O \n",
" X O \n",
" X \n",
"on move: O\n",
"O \n",
"X X O \n",
" X \n",
"on move: X\n",
"O \n",
"X X O \n",
"O X \n",
"on move: O\n",
"O X \n",
"X X O \n",
"O X \n",
"on move: X\n",
"O O X \n",
"X X O \n",
"O X \n",
"on move: O\n",
"O O X \n",
"X X O \n",
"O X X \n",
"Episode 315, Total Reward: 0\n",
"Average Reward: 0.28888888888888886\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O \n",
" \n",
"X X \n",
"on move: X\n",
"O \n",
" O \n",
"X X \n",
"on move: O\n",
"O \n",
" O \n",
"X X X \n",
"Episode 316, Total Reward: 1\n",
"Average Reward: 0.2911392405063291\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O \n",
" O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
"O X \n",
"X O \n",
"X O \n",
"on move: O\n",
"O X \n",
"X O \n",
"X X O \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X X O \n",
"Episode 317, Total Reward: -1\n",
"Average Reward: 0.2870662460567823\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" \n",
" X \n",
" X O \n",
"on move: X\n",
" \n",
"O X \n",
" X O \n",
"on move: O\n",
" X \n",
"O X \n",
" X O \n",
"on move: X\n",
" X O \n",
"O X \n",
" X O \n",
"on move: O\n",
" X O \n",
"O X X \n",
" X O \n",
"Episode 318, Total Reward: 1\n",
"Average Reward: 0.2893081761006289\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
" X X \n",
"on move: X\n",
"O \n",
" O \n",
" X X \n",
"on move: O\n",
"O \n",
" O X \n",
" X X \n",
"on move: X\n",
"O \n",
" O X \n",
"O X X \n",
"on move: O\n",
"O X \n",
" O X \n",
"O X X \n",
"on move: X\n",
"O X \n",
"O O X \n",
"O X X \n",
"Episode 319, Total Reward: -1\n",
"Average Reward: 0.2852664576802508\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
"X O \n",
" \n",
"on move: X\n",
" X \n",
"X O \n",
" O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
"O X \n",
"X O \n",
"X O \n",
"on move: O\n",
"O X X \n",
"X O \n",
"X O \n",
"on move: X\n",
"O X X \n",
"X O O \n",
"X O \n",
"Episode 320, Total Reward: -1\n",
"Average Reward: 0.28125\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X X O \n",
" \n",
" \n",
"on move: X\n",
"X X O \n",
" O \n",
" \n",
"on move: O\n",
"X X O \n",
"X O \n",
" \n",
"on move: X\n",
"X X O \n",
"X O O \n",
" \n",
"on move: O\n",
"X X O \n",
"X O O \n",
" X \n",
"on move: X\n",
"X X O \n",
"X O O \n",
"O X \n",
"Episode 321, Total Reward: -1\n",
"Average Reward: 0.2772585669781931\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
"O X \n",
"O \n",
"on move: O\n",
" X \n",
"O X \n",
"O X \n",
"on move: X\n",
" X \n",
"O X \n",
"O O X \n",
"on move: O\n",
" X \n",
"O X X \n",
"O O X \n",
"Episode 322, Total Reward: 1\n",
"Average Reward: 0.2795031055900621\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
"O X \n",
" O \n",
"X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"X \n",
"on move: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Episode 323, Total Reward: -1\n",
"Average Reward: 0.2755417956656347\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" \n",
" X \n",
"on move: X\n",
"O X \n",
" \n",
"O X \n",
"on move: O\n",
"O X X \n",
" \n",
"O X \n",
"on move: X\n",
"O X X \n",
" O \n",
"O X \n",
"on move: O\n",
"O X X \n",
" X O \n",
"O X \n",
"Episode 324, Total Reward: 1\n",
"Average Reward: 0.2777777777777778\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O X \n",
" \n",
" X \n",
"on move: X\n",
" O X \n",
"O \n",
" X \n",
"on move: O\n",
" O X \n",
"O \n",
" X X \n",
"on move: X\n",
" O X \n",
"O O \n",
" X X \n",
"on move: O\n",
" O X \n",
"O O X \n",
" X X \n",
"Episode 325, Total Reward: 1\n",
"Average Reward: 0.28\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
" X \n",
" X O \n",
"O X \n",
"on move: X\n",
" O X \n",
" X O \n",
"O X \n",
"on move: O\n",
" O X \n",
"X X O \n",
"O X \n",
"on move: X\n",
" O X \n",
"X X O \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Episode 326, Total Reward: 0\n",
"Average Reward: 0.2791411042944785\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
" X \n",
"X O O \n",
"on move: O\n",
" \n",
"X X \n",
"X O O \n",
"on move: X\n",
" O \n",
"X X \n",
"X O O \n",
"on move: O\n",
"X O \n",
"X X \n",
"X O O \n",
"Episode 327, Total Reward: 1\n",
"Average Reward: 0.28134556574923547\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
" X X \n",
"on move: X\n",
" \n",
" O O \n",
" X X \n",
"on move: O\n",
" X \n",
" O O \n",
" X X \n",
"on move: X\n",
" X \n",
"O O O \n",
" X X \n",
"Episode 328, Total Reward: -1\n",
"Average Reward: 0.2774390243902439\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
"O X \n",
"on move: O\n",
" O \n",
" X X \n",
"O X \n",
"on move: X\n",
" O O \n",
" X X \n",
"O X \n",
"on move: O\n",
" O O \n",
" X X \n",
"O X X \n",
"on move: X\n",
"O O O \n",
" X X \n",
"O X X \n",
"Episode 329, Total Reward: -1\n",
"Average Reward: 0.2735562310030395\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
"O X O \n",
" X \n",
"on move: O\n",
" \n",
"O X O \n",
" X X \n",
"on move: X\n",
" \n",
"O X O \n",
"O X X \n",
"on move: O\n",
"X \n",
"O X O \n",
"O X X \n",
"Episode 330, Total Reward: 1\n",
"Average Reward: 0.27575757575757576\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X O \n",
" O \n",
" \n",
"on move: O\n",
"X X O \n",
" O X \n",
" \n",
"on move: X\n",
"X X O \n",
" O X \n",
"O \n",
"Episode 331, Total Reward: -1\n",
"Average Reward: 0.2719033232628399\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O X \n",
" X \n",
" \n",
"on move: X\n",
" O X \n",
" X \n",
"O \n",
"on move: O\n",
" O X \n",
" X X \n",
"O \n",
"on move: X\n",
"O O X \n",
" X X \n",
"O \n",
"on move: O\n",
"O O X \n",
" X X \n",
"O X \n",
"on move: X\n",
"O O X \n",
" X X \n",
"O X O \n",
"on move: O\n",
"O O X \n",
"X X X \n",
"O X O \n",
"Episode 332, Total Reward: 1\n",
"Average Reward: 0.2740963855421687\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X X \n",
" O \n",
" \n",
"on move: X\n",
" X X \n",
" O \n",
"O \n",
"on move: O\n",
" X X \n",
" O \n",
"O X \n",
"on move: X\n",
" X X \n",
" O \n",
"O O X \n",
"on move: O\n",
"X X X \n",
" O \n",
"O O X \n",
"Episode 333, Total Reward: 1\n",
"Average Reward: 0.27627627627627627\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" O X \n",
" \n",
"on move: O\n",
"X \n",
" O X \n",
" \n",
"on move: X\n",
"X \n",
" O X \n",
" O \n",
"on move: O\n",
"X \n",
" O X \n",
" X O \n",
"on move: X\n",
"X O \n",
" O X \n",
" X O \n",
"on move: O\n",
"X O \n",
"X O X \n",
" X O \n",
"on move: X\n",
"X O \n",
"X O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X O X \n",
"O X O \n",
"Episode 334, Total Reward: 0\n",
"Average Reward: 0.2754491017964072\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
"X O \n",
" X \n",
" \n",
"on move: X\n",
"X O O \n",
" X \n",
" \n",
"on move: O\n",
"X O O \n",
" X \n",
"X \n",
"on move: X\n",
"X O O \n",
" O X \n",
"X \n",
"on move: O\n",
"X O O \n",
"X O X \n",
"X \n",
"Episode 335, Total Reward: 1\n",
"Average Reward: 0.27761194029850744\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X X \n",
" \n",
"O \n",
"on move: X\n",
" X X \n",
" \n",
"O O \n",
"on move: O\n",
" X X \n",
" \n",
"O O X \n",
"on move: X\n",
" X X \n",
" O \n",
"O O X \n",
"on move: O\n",
"X X X \n",
" O \n",
"O O X \n",
"Episode 336, Total Reward: 1\n",
"Average Reward: 0.27976190476190477\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X \n",
" X \n",
"O \n",
"on move: X\n",
"X O \n",
" X \n",
"O \n",
"on move: O\n",
"X O \n",
"X X \n",
"O \n",
"on move: X\n",
"X O \n",
"X X \n",
"O O \n",
"on move: O\n",
"X O \n",
"X X \n",
"O X O \n",
"on move: X\n",
"X O O \n",
"X X \n",
"O X O \n",
"on move: O\n",
"X O O \n",
"X X X \n",
"O X O \n",
"Episode 337, Total Reward: 1\n",
"Average Reward: 0.2818991097922849\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" O X \n",
" X \n",
"O \n",
"on move: O\n",
"X O X \n",
" X \n",
"O \n",
"on move: X\n",
"X O X \n",
" X \n",
"O O \n",
"on move: O\n",
"X O X \n",
" X X \n",
"O O \n",
"on move: X\n",
"X O X \n",
" X X \n",
"O O O \n",
"Episode 338, Total Reward: -1\n",
"Average Reward: 0.2781065088757396\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
" O X \n",
" O \n",
"X X \n",
"on move: X\n",
" O X \n",
" O O \n",
"X X \n",
"on move: O\n",
" O X \n",
" O O \n",
"X X X \n",
"Episode 339, Total Reward: 1\n",
"Average Reward: 0.28023598820059\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X \n",
"X O \n",
" O \n",
"on move: O\n",
"X \n",
"X O \n",
" X O \n",
"on move: X\n",
"X O \n",
"X O \n",
" X O \n",
"Episode 340, Total Reward: -1\n",
"Average Reward: 0.27647058823529413\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X O \n",
" \n",
"O \n",
"on move: O\n",
"X X O \n",
" \n",
"O X \n",
"on move: X\n",
"X X O \n",
" O \n",
"O X \n",
"Episode 341, Total Reward: -1\n",
"Average Reward: 0.2727272727272727\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X \n",
"X \n",
"on move: X\n",
" \n",
"O X \n",
"X O \n",
"on move: O\n",
" \n",
"O X \n",
"X X O \n",
"on move: X\n",
" O \n",
"O X \n",
"X X O \n",
"on move: O\n",
"X O \n",
"O X \n",
"X X O \n",
"on move: X\n",
"X O \n",
"O O X \n",
"X X O \n",
"on move: O\n",
"X X O \n",
"O O X \n",
"X X O \n",
"Episode 342, Total Reward: 0\n",
"Average Reward: 0.2719298245614035\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
" X X \n",
"O \n",
" \n",
"on move: X\n",
" X X \n",
"O \n",
" O \n",
"on move: O\n",
" X X \n",
"O X \n",
" O \n",
"on move: X\n",
" X X \n",
"O X \n",
" O O \n",
"on move: O\n",
" X X \n",
"O X \n",
"X O O \n",
"on move: X\n",
"O X X \n",
"O X \n",
"X O O \n",
"on move: O\n",
"O X X \n",
"O X X \n",
"X O O \n",
"Episode 343, Total Reward: 1\n",
"Average Reward: 0.27405247813411077\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
"O X \n",
"on move: O\n",
" O \n",
" X X \n",
"O X \n",
"on move: X\n",
" O \n",
" X X \n",
"O X O \n",
"on move: O\n",
" O \n",
"X X X \n",
"O X O \n",
"Episode 344, Total Reward: 1\n",
"Average Reward: 0.2761627906976744\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
"O \n",
" \n",
"X \n",
"on move: O\n",
"O X \n",
" \n",
"X \n",
"on move: X\n",
"O X \n",
"O \n",
"X \n",
"on move: O\n",
"O X \n",
"O X \n",
"X \n",
"on move: X\n",
"O X \n",
"O X \n",
"X O \n",
"on move: O\n",
"O X \n",
"O X X \n",
"X O \n",
"Episode 345, Total Reward: 1\n",
"Average Reward: 0.2782608695652174\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
"X \n",
"on move: X\n",
"X O \n",
"O \n",
"X \n",
"on move: O\n",
"X O \n",
"O X \n",
"X \n",
"on move: X\n",
"X O \n",
"O X \n",
"X O \n",
"on move: O\n",
"X O \n",
"O X X \n",
"X O \n",
"on move: X\n",
"X O O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Episode 346, Total Reward: 1\n",
"Average Reward: 0.28034682080924855\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
"X \n",
"X \n",
" O \n",
"on move: X\n",
"X O \n",
"X \n",
" O \n",
"on move: O\n",
"X O \n",
"X \n",
"X O \n",
"Episode 347, Total Reward: 1\n",
"Average Reward: 0.2824207492795389\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X O \n",
"X O \n",
"on move: O\n",
"X \n",
"X O \n",
"X O \n",
"Episode 348, Total Reward: 1\n",
"Average Reward: 0.28448275862068967\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
"X X \n",
"O \n",
"on move: X\n",
" O \n",
"X X \n",
"O \n",
"on move: O\n",
" O \n",
"X X \n",
"O X \n",
"on move: X\n",
" O O \n",
"X X \n",
"O X \n",
"on move: O\n",
"X O O \n",
"X X \n",
"O X \n",
"on move: X\n",
"X O O \n",
"X X \n",
"O O X \n",
"on move: O\n",
"X O O \n",
"X X X \n",
"O O X \n",
"Episode 349, Total Reward: 1\n",
"Average Reward: 0.28653295128939826\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
"X \n",
"X \n",
"on move: X\n",
" O O \n",
"X \n",
"X \n",
"on move: O\n",
"X O O \n",
"X \n",
"X \n",
"Episode 350, Total Reward: 1\n",
"Average Reward: 0.2885714285714286\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
" X O \n",
" O \n",
"X \n",
"on move: O\n",
"X X O \n",
" O \n",
"X \n",
"on move: X\n",
"X X O \n",
" O \n",
"X O \n",
"on move: O\n",
"X X O \n",
"X O \n",
"X O \n",
"Episode 351, Total Reward: 1\n",
"Average Reward: 0.2905982905982906\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
" X X \n",
" X \n",
"on move: X\n",
"O O \n",
" X X \n",
" X O \n",
"on move: O\n",
"O O \n",
"X X X \n",
" X O \n",
"Episode 352, Total Reward: 1\n",
"Average Reward: 0.29261363636363635\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O \n",
"X \n",
" X \n",
"on move: X\n",
"O O \n",
"X \n",
" X \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
" X \n",
"Episode 353, Total Reward: 1\n",
"Average Reward: 0.29461756373937675\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X \n",
"O \n",
" O \n",
"on move: O\n",
"X X X \n",
"O \n",
" O \n",
"Episode 354, Total Reward: 1\n",
"Average Reward: 0.2966101694915254\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X X \n",
" \n",
"on move: X\n",
" \n",
"O X X \n",
"O \n",
"on move: O\n",
" X \n",
"O X X \n",
"O \n",
"on move: X\n",
"O X \n",
"O X X \n",
"O \n",
"Episode 355, Total Reward: -1\n",
"Average Reward: 0.29295774647887324\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" \n",
"X X \n",
"O \n",
"on move: X\n",
" \n",
"X X \n",
"O O \n",
"on move: O\n",
" \n",
"X X \n",
"O O X \n",
"on move: X\n",
"O \n",
"X X \n",
"O O X \n",
"on move: O\n",
"O \n",
"X X X \n",
"O O X \n",
"Episode 356, Total Reward: 1\n",
"Average Reward: 0.2949438202247191\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" O \n",
" X \n",
"X O \n",
"on move: O\n",
" O \n",
" X X \n",
"X O \n",
"on move: X\n",
"O O \n",
" X X \n",
"X O \n",
"on move: O\n",
"O O \n",
"X X X \n",
"X O \n",
"Episode 357, Total Reward: 1\n",
"Average Reward: 0.2969187675070028\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
" X \n",
"X O \n",
" O \n",
"on move: O\n",
" X \n",
"X O \n",
"X O \n",
"on move: X\n",
" X \n",
"X O O \n",
"X O \n",
"on move: O\n",
" X \n",
"X O O \n",
"X X O \n",
"on move: X\n",
"O X \n",
"X O O \n",
"X X O \n",
"Episode 358, Total Reward: -1\n",
"Average Reward: 0.29329608938547486\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
"X \n",
"X O \n",
" \n",
"on move: X\n",
"X \n",
"X O \n",
"O \n",
"on move: O\n",
"X \n",
"X X O \n",
"O \n",
"on move: X\n",
"X O \n",
"X X O \n",
"O \n",
"on move: O\n",
"X O \n",
"X X O \n",
"O X \n",
"Episode 359, Total Reward: 1\n",
"Average Reward: 0.29526462395543174\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" X \n",
" \n",
"on move: X\n",
"X O \n",
" X \n",
" O \n",
"on move: O\n",
"X X O \n",
" X \n",
" O \n",
"on move: X\n",
"X X O \n",
"O X \n",
" O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
" O \n",
"on move: X\n",
"X X O \n",
"O X X \n",
"O O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"O X O \n",
"Episode 360, Total Reward: 1\n",
"Average Reward: 0.2972222222222222\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" O X \n",
"O \n",
"on move: O\n",
" X \n",
"X O X \n",
"O \n",
"on move: X\n",
" X \n",
"X O X \n",
"O O \n",
"on move: O\n",
" X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"O X X \n",
"X O X \n",
"O O \n",
"on move: O\n",
"O X X \n",
"X O X \n",
"O O X \n",
"Episode 361, Total Reward: 1\n",
"Average Reward: 0.29916897506925205\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
" O \n",
"X O \n",
"X X \n",
"on move: X\n",
"O O \n",
"X O \n",
"X X \n",
"on move: O\n",
"O O \n",
"X O X \n",
"X X \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X X O \n",
"Episode 362, Total Reward: -1\n",
"Average Reward: 0.2955801104972376\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
"X X \n",
" \n",
"O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X X \n",
" \n",
"O O \n",
"Episode 363, Total Reward: 1\n",
"Average Reward: 0.2975206611570248\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O \n",
"X O \n",
"X X \n",
"on move: X\n",
" O \n",
"X O \n",
"X X O \n",
"on move: O\n",
"X O \n",
"X O \n",
"X X O \n",
"Episode 364, Total Reward: 1\n",
"Average Reward: 0.29945054945054944\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X X \n",
" O \n",
" \n",
"on move: X\n",
" X X \n",
" O O \n",
" \n",
"on move: O\n",
" X X \n",
" O O \n",
" X \n",
"on move: X\n",
" X X \n",
" O O \n",
" O X \n",
"on move: O\n",
" X X \n",
"X O O \n",
" O X \n",
"on move: X\n",
"O X X \n",
"X O O \n",
" O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"X O X \n",
"Episode 365, Total Reward: 0\n",
"Average Reward: 0.29863013698630136\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" \n",
" X \n",
" X O \n",
"on move: X\n",
" \n",
" X \n",
"O X O \n",
"on move: O\n",
" X \n",
" X \n",
"O X O \n",
"on move: X\n",
" O X \n",
" X \n",
"O X O \n",
"on move: O\n",
" O X \n",
"X X \n",
"O X O \n",
"on move: X\n",
"O O X \n",
"X X \n",
"O X O \n",
"on move: O\n",
"O O X \n",
"X X X \n",
"O X O \n",
"Episode 366, Total Reward: 1\n",
"Average Reward: 0.3005464480874317\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" \n",
" O O \n",
"X X \n",
"on move: O\n",
" X \n",
" O O \n",
"X X \n",
"on move: X\n",
" O X \n",
" O O \n",
"X X \n",
"on move: O\n",
" O X \n",
" O O \n",
"X X X \n",
"Episode 367, Total Reward: 1\n",
"Average Reward: 0.3024523160762943\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
" \n",
" X \n",
"on move: X\n",
"O X \n",
" O \n",
" X \n",
"on move: O\n",
"O X \n",
" O X \n",
" X \n",
"on move: X\n",
"O X \n",
"O O X \n",
" X \n",
"on move: O\n",
"O X \n",
"O O X \n",
" X X \n",
"on move: X\n",
"O X \n",
"O O X \n",
"O X X \n",
"Episode 368, Total Reward: -1\n",
"Average Reward: 0.29891304347826086\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
"O \n",
" \n",
"on move: O\n",
"X \n",
"O \n",
"X \n",
"on move: X\n",
"X \n",
"O \n",
"X O \n",
"on move: O\n",
"X \n",
"O \n",
"X O X \n",
"on move: X\n",
"X \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X \n",
"O O \n",
"X O X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"on move: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Episode 369, Total Reward: 1\n",
"Average Reward: 0.3008130081300813\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O \n",
" X X \n",
"on move: X\n",
" \n",
"O \n",
"O X X \n",
"on move: O\n",
"X \n",
"O \n",
"O X X \n",
"on move: X\n",
"X O \n",
"O \n",
"O X X \n",
"on move: O\n",
"X X O \n",
"O \n",
"O X X \n",
"on move: X\n",
"X X O \n",
"O O \n",
"O X X \n",
"Episode 370, Total Reward: -1\n",
"Average Reward: 0.2972972972972973\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
"O X \n",
" X \n",
"on move: O\n",
" X O \n",
"O X \n",
" X \n",
"on move: X\n",
"O X O \n",
"O X \n",
" X \n",
"on move: O\n",
"O X O \n",
"O X \n",
"X X \n",
"on move: X\n",
"O X O \n",
"O X O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"O X O \n",
"X X X \n",
"Episode 371, Total Reward: 1\n",
"Average Reward: 0.2991913746630728\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" X O \n",
" X \n",
" \n",
"on move: X\n",
" X O \n",
" O X \n",
" \n",
"on move: O\n",
" X O \n",
"X O X \n",
" \n",
"on move: X\n",
"O X O \n",
"X O X \n",
" \n",
"on move: O\n",
"O X O \n",
"X O X \n",
"X \n",
"on move: X\n",
"O X O \n",
"X O X \n",
"X O \n",
"Episode 372, Total Reward: -1\n",
"Average Reward: 0.2956989247311828\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X \n",
" X \n",
"on move: X\n",
" \n",
"O X \n",
"O X \n",
"on move: O\n",
" \n",
"O X \n",
"O X X \n",
"on move: X\n",
" O \n",
"O X \n",
"O X X \n",
"on move: O\n",
"X O \n",
"O X \n",
"O X X \n",
"on move: X\n",
"X O O \n",
"O X \n",
"O X X \n",
"on move: O\n",
"X O O \n",
"O X X \n",
"O X X \n",
"Episode 373, Total Reward: 1\n",
"Average Reward: 0.2975871313672922\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" O X \n",
" \n",
"on move: O\n",
" \n",
" O X \n",
" X \n",
"on move: X\n",
" O \n",
" O X \n",
" X \n",
"on move: O\n",
" O X \n",
" O X \n",
" X \n",
"on move: X\n",
" O X \n",
" O X \n",
" X O \n",
"on move: O\n",
" O X \n",
"X O X \n",
" X O \n",
"on move: X\n",
"O O X \n",
"X O X \n",
" X O \n",
"Episode 374, Total Reward: -1\n",
"Average Reward: 0.29411764705882354\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
"X X \n",
" O \n",
" \n",
"on move: X\n",
"X X \n",
" O O \n",
" \n",
"on move: O\n",
"X X \n",
" O O \n",
" X \n",
"on move: X\n",
"X X O \n",
" O O \n",
" X \n",
"on move: O\n",
"X X O \n",
" O O \n",
" X X \n",
"on move: X\n",
"X X O \n",
"O O O \n",
" X X \n",
"Episode 375, Total Reward: -1\n",
"Average Reward: 0.2906666666666667\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
"O \n",
" O \n",
"on move: O\n",
"X X \n",
"O X \n",
" O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O O X \n",
"Episode 376, Total Reward: 1\n",
"Average Reward: 0.2925531914893617\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
"X \n",
" O \n",
"on move: X\n",
" X \n",
"X O \n",
" O \n",
"on move: O\n",
" X \n",
"X O \n",
" O X \n",
"on move: X\n",
"O X \n",
"X O \n",
" O X \n",
"on move: O\n",
"O X \n",
"X O \n",
"X O X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"X O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"X O X \n",
"Episode 377, Total Reward: 0\n",
"Average Reward: 0.2917771883289125\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
"O \n",
"on move: O\n",
"X \n",
" X \n",
"O \n",
"on move: X\n",
"X O \n",
" X \n",
"O \n",
"on move: O\n",
"X O \n",
"X X \n",
"O \n",
"on move: X\n",
"X O \n",
"X X O \n",
"O \n",
"on move: O\n",
"X O \n",
"X X O \n",
"O X \n",
"on move: X\n",
"X O O \n",
"X X O \n",
"O X \n",
"on move: O\n",
"X O O \n",
"X X O \n",
"O X X \n",
"Episode 378, Total Reward: 1\n",
"Average Reward: 0.29365079365079366\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
"X O \n",
" O \n",
"X X \n",
"on move: X\n",
"X O O \n",
" O \n",
"X X \n",
"on move: O\n",
"X O O \n",
"X O \n",
"X X \n",
"Episode 379, Total Reward: 1\n",
"Average Reward: 0.2955145118733509\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" O \n",
" \n",
"on move: O\n",
" X \n",
" X O \n",
" \n",
"on move: X\n",
"O X \n",
" X O \n",
" \n",
"on move: O\n",
"O X \n",
" X O \n",
"X \n",
"Episode 380, Total Reward: 1\n",
"Average Reward: 0.29736842105263156\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" O \n",
" \n",
"on move: O\n",
"X \n",
" O X \n",
" \n",
"on move: X\n",
"X \n",
"O O X \n",
" \n",
"on move: O\n",
"X \n",
"O O X \n",
" X \n",
"on move: X\n",
"X O \n",
"O O X \n",
" X \n",
"on move: O\n",
"X O \n",
"O O X \n",
"X X \n",
"on move: X\n",
"X O \n",
"O O X \n",
"X X O \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X X O \n",
"Episode 381, Total Reward: 0\n",
"Average Reward: 0.29658792650918636\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X O \n",
"O \n",
"on move: O\n",
" X \n",
"X O \n",
"O X \n",
"on move: X\n",
" X \n",
"X O O \n",
"O X \n",
"on move: O\n",
" X \n",
"X O O \n",
"O X X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"O X X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 382, Total Reward: 0\n",
"Average Reward: 0.29581151832460734\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
"X X \n",
" \n",
"on move: X\n",
"O O \n",
"X X \n",
" \n",
"on move: O\n",
"O O \n",
"X X \n",
"X \n",
"on move: X\n",
"O O \n",
"X X \n",
"X O \n",
"on move: O\n",
"O O \n",
"X X \n",
"X O X \n",
"on move: X\n",
"O O O \n",
"X X \n",
"X O X \n",
"Episode 383, Total Reward: -1\n",
"Average Reward: 0.2924281984334204\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O X \n",
"X \n",
"on move: X\n",
" \n",
"O X O \n",
"X \n",
"on move: O\n",
" X \n",
"O X O \n",
"X \n",
"Episode 384, Total Reward: 1\n",
"Average Reward: 0.2942708333333333\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" \n",
"O X \n",
"on move: X\n",
" X \n",
" O \n",
"O X \n",
"on move: O\n",
" X X \n",
" O \n",
"O X \n",
"on move: X\n",
" X X \n",
" O \n",
"O O X \n",
"on move: O\n",
" X X \n",
" X O \n",
"O O X \n",
"on move: X\n",
" X X \n",
"O X O \n",
"O O X \n",
"on move: O\n",
"X X X \n",
"O X O \n",
"O O X \n",
"Episode 385, Total Reward: 1\n",
"Average Reward: 0.2961038961038961\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" \n",
" X X \n",
"on move: X\n",
"O \n",
" \n",
"O X X \n",
"on move: O\n",
"O X \n",
" \n",
"O X X \n",
"on move: X\n",
"O X \n",
" O \n",
"O X X \n",
"on move: O\n",
"O X \n",
"X O \n",
"O X X \n",
"on move: X\n",
"O O X \n",
"X O \n",
"O X X \n",
"on move: O\n",
"O O X \n",
"X O X \n",
"O X X \n",
"Episode 386, Total Reward: 1\n",
"Average Reward: 0.2979274611398964\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" X \n",
" \n",
" X O \n",
"on move: X\n",
" X \n",
" O \n",
" X O \n",
"on move: O\n",
"X X \n",
" O \n",
" X O \n",
"on move: X\n",
"X X \n",
"O O \n",
" X O \n",
"on move: O\n",
"X X \n",
"O O X \n",
" X O \n",
"on move: X\n",
"X O X \n",
"O O X \n",
" X O \n",
"on move: O\n",
"X O X \n",
"O O X \n",
"X X O \n",
"Episode 387, Total Reward: 0\n",
"Average Reward: 0.2971576227390181\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
"X \n",
" X \n",
" O \n",
"on move: X\n",
"X \n",
"O X \n",
" O \n",
"on move: O\n",
"X \n",
"O X \n",
"X O \n",
"on move: X\n",
"X O \n",
"O X \n",
"X O \n",
"on move: O\n",
"X X O \n",
"O X \n",
"X O \n",
"on move: X\n",
"X X O \n",
"O X \n",
"X O O \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"X O O \n",
"Episode 388, Total Reward: 0\n",
"Average Reward: 0.2963917525773196\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
"O X \n",
" X \n",
"on move: O\n",
"O \n",
"O X X \n",
" X \n",
"on move: X\n",
"O \n",
"O X X \n",
" O X \n",
"on move: O\n",
"O \n",
"O X X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"O X X \n",
"X O X \n",
"Episode 389, Total Reward: 1\n",
"Average Reward: 0.2982005141388175\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X \n",
" O \n",
"on move: O\n",
" O \n",
"X X \n",
"X O \n",
"on move: X\n",
" O \n",
"X X \n",
"X O O \n",
"on move: O\n",
"X O \n",
"X X \n",
"X O O \n",
"Episode 390, Total Reward: 1\n",
"Average Reward: 0.3\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O O \n",
"X X \n",
" \n",
"on move: O\n",
" O O \n",
"X X \n",
" X \n",
"on move: X\n",
" O O \n",
"X X \n",
" X O \n",
"on move: O\n",
"X O O \n",
"X X \n",
" X O \n",
"on move: X\n",
"X O O \n",
"X X O \n",
" X O \n",
"Episode 391, Total Reward: -1\n",
"Average Reward: 0.2966751918158568\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" \n",
"X O \n",
" X \n",
"on move: X\n",
"O \n",
"X O \n",
" X \n",
"on move: O\n",
"O X \n",
"X O \n",
" X \n",
"on move: X\n",
"O X \n",
"X O O \n",
" X \n",
"on move: O\n",
"O X \n",
"X O O \n",
"X X \n",
"on move: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"on move: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Episode 392, Total Reward: 1\n",
"Average Reward: 0.29846938775510207\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O \n",
" O \n",
"X X \n",
"on move: O\n",
" O \n",
" X O \n",
"X X \n",
"on move: X\n",
" O \n",
"O X O \n",
"X X \n",
"on move: O\n",
"X O \n",
"O X O \n",
"X X \n",
"on move: X\n",
"X O \n",
"O X O \n",
"X X O \n",
"Episode 393, Total Reward: -1\n",
"Average Reward: 0.2951653944020356\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
"O \n",
" O \n",
"on move: O\n",
"X X \n",
"O \n",
"X O \n",
"on move: X\n",
"X O X \n",
"O \n",
"X O \n",
"on move: O\n",
"X O X \n",
"O \n",
"X X O \n",
"on move: X\n",
"X O X \n",
"O O \n",
"X X O \n",
"on move: O\n",
"X O X \n",
"O X O \n",
"X X O \n",
"Episode 394, Total Reward: 1\n",
"Average Reward: 0.2969543147208122\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
"O X \n",
" \n",
"on move: O\n",
" \n",
"O X \n",
" X \n",
"on move: X\n",
" O \n",
"O X \n",
" X \n",
"on move: O\n",
" X O \n",
"O X \n",
" X \n",
"on move: X\n",
" X O \n",
"O X \n",
" O X \n",
"on move: O\n",
" X O \n",
"O X \n",
"X O X \n",
"on move: X\n",
"O X O \n",
"O X \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"O X X \n",
"X O X \n",
"Episode 395, Total Reward: 0\n",
"Average Reward: 0.29620253164556964\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X X \n",
" \n",
"on move: X\n",
" O \n",
"X X O \n",
" \n",
"on move: O\n",
" O \n",
"X X O \n",
" X \n",
"on move: X\n",
" O \n",
"X X O \n",
" O X \n",
"on move: O\n",
"X O \n",
"X X O \n",
" O X \n",
"Episode 396, Total Reward: 1\n",
"Average Reward: 0.29797979797979796\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" O X \n",
" \n",
"on move: O\n",
" \n",
" O X \n",
"X \n",
"on move: X\n",
"O \n",
" O X \n",
"X \n",
"on move: O\n",
"O \n",
" O X \n",
"X X \n",
"on move: X\n",
"O O \n",
" O X \n",
"X X \n",
"on move: O\n",
"O O \n",
" O X \n",
"X X X \n",
"Episode 397, Total Reward: 1\n",
"Average Reward: 0.29974811083123426\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" X \n",
" O \n",
"X \n",
"on move: X\n",
" X \n",
"O O \n",
"X \n",
"on move: O\n",
" X X \n",
"O O \n",
"X \n",
"on move: X\n",
" X X \n",
"O O \n",
"X O \n",
"on move: O\n",
"X X X \n",
"O O \n",
"X O \n",
"Episode 398, Total Reward: 1\n",
"Average Reward: 0.3015075376884422\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
"O X \n",
" X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
"O O \n",
"O X \n",
"X X \n",
"on move: O\n",
"O O \n",
"O X \n",
"X X X \n",
"Episode 399, Total Reward: 1\n",
"Average Reward: 0.3032581453634085\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X O \n",
" \n",
"on move: O\n",
" \n",
"X O \n",
" X \n",
"on move: X\n",
" O \n",
"X O \n",
" X \n",
"on move: O\n",
" O \n",
"X O X \n",
" X \n",
"on move: X\n",
" O \n",
"X O X \n",
" X O \n",
"on move: O\n",
"X O \n",
"X O X \n",
" X O \n",
"on move: X\n",
"X O O \n",
"X O X \n",
" X O \n",
"on move: O\n",
"X O O \n",
"X O X \n",
"X X O \n",
"Episode 400, Total Reward: 1\n",
"Average Reward: 0.305\n"
]
}
],
"source": [
"env = TicTacToeEnv()\n",
"\n",
"agent = RandomTicTacToeAgent(symbol=1)\n",
"\n",
"num_episodes = 400\n",
"collected_rewards = []\n",
"\n",
"oom = 1\n",
"\n",
"for i in range(num_episodes):\n",
" state, _ = env.reset() \n",
" total_reward = 0\n",
" done = False\n",
" om = oom \n",
"\n",
" for j in range(9): \n",
" moves = env.move_generator() \n",
"\n",
" if not moves:\n",
" break\n",
"\n",
" if len(moves) == 1:\n",
" move = moves[0]\n",
" else:\n",
" move = agent.get_action(moves)\n",
"\n",
" next_state, reward, done, info = env.step(move)\n",
" total_reward += reward\n",
" state = next_state\n",
"\n",
" env.render()\n",
"\n",
" if done:\n",
" break\n",
"\n",
" om = -om\n",
"\n",
" collected_rewards.append(total_reward)\n",
"\n",
" print(f\"Episode {i+1}, Total Reward: {total_reward}\")\n",
" average_reward = sum(collected_rewards) / len(collected_rewards)\n",
" print(f\"Average Reward: {average_reward}\")\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}