From 8601ef4a95d8172465100fa07416ccb46d27bba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A2=D0=B0=D0=B1=D0=B5=D0=B5=D0=B2=20=D0=90=D0=BB=D0=B5?= =?UTF-8?q?=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= Date: Wed, 11 Dec 2024 21:11:06 +0400 Subject: [PATCH] lab 6 is done --- lab_6/lab6.ipynb | 3451 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3451 insertions(+) create mode 100644 lab_6/lab6.ipynb diff --git a/lab_6/lab6.ipynb b/lab_6/lab6.ipynb new file mode 100644 index 0000000..3e21d9b --- /dev/null +++ b/lab_6/lab6.ipynb @@ -0,0 +1,3451 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Перевод среды на gymnasium" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "from gymnasium import spaces\n", + "\n", + "class TicTacToeEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + " \n", + " symbols = ['O', ' ', 'X']\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.action_space = spaces.Discrete(9)\n", + " self.observation_space = spaces.Discrete(9 * 3 * 2)\n", + " self.reset()\n", + "\n", + " def step(self, action):\n", + " finished = False\n", + " score = 0\n", + "\n", + " player, cell = action # player - игрок (1 или -1), cell - номер клетки\n", + "\n", + " board = self.state['board']\n", + " current_cell = board[cell]\n", + " current_player = self.state['current_turn']\n", + " if current_cell != 0: # Клетка занята\n", + " print(f\"Некорректный ход: Клетка {cell} уже занята.\")\n", + " finished = True\n", + " score = -1 * current_player\n", + " elif player != current_player: # Ход сделан не тем игроком\n", + " print(f\"Некорректный ход: игрок {player} не на очереди.\")\n", + " finished = True\n", + " score = -1 * current_player\n", + " else:\n", + " board[cell] = player\n", + " self.state['current_turn'] = -player\n", + "\n", + " for row in range(3):\n", + " # Проверяем строки и столбцы\n", + " if (board[row * 3] == player and board[row * 3 + 1] == player and board[row * 3 + 2] == player) or \\\n", + " (board[row] == player and board[row + 3] == player and board[row + 6] == player):\n", + " score = player\n", + " finished = True\n", + " break\n", + "\n", + " # Проверяем диагонали\n", + " if (board[0] == player and board[4] == player and board[8] == player) or \\\n", + " (board[2] == player and board[4] == player and board[6] == player):\n", + " score = player\n", + " finished = True\n", + " \n", + " return self.state, score, finished, {}\n", + "\n", + " def reset(self):\n", + " self.state = {\n", + " 'board': [0] * 9, # Поле 3x3\n", + " 'current_turn': 1 # Начинает первый игрок\n", + " }\n", + " return self.state, {}\n", + "\n", + " def render(self, close=False):\n", + " if close:\n", + " return\n", + " print(\"Current turn:\", self.symbols[self.state['current_turn'] + 1])\n", + " for idx in range(9):\n", + " print(self.symbols[self.state['board'][idx] + 1], end=\" \")\n", + " if (idx % 3) == 2:\n", + " print()\n", + "\n", + " def available_moves(self):\n", + " moves = []\n", + " for idx in range(9):\n", + " if self.state['board'][idx] == 0:\n", + " player = self.state['current_turn']\n", + " moves.append([player, idx])\n", + " return moves" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Агент\n", + "Агент - система, которая взаимодействует с окружающей средой, чтобы достичь определенной цели. Задача агента: выработка стратегии, которая максимизирует награду в долгосрочной перспективе.\n", + "Роль агента: агент принимает решение, основываясь на текущем состоянии среды и получает обратную свзяь от среды.\n", + "Функционал агента: принятие решения - использование алгоритмов или стратегий для дальнейшего принятия решения; обучение - обновление знаний или стратегий основываясь на полученный опыт; интерактивность - адаптация к изменениям в среде." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "# Агент, взаимодействующий со средой для выбора стратегий на основе доступных ходов\n", + "class GameAgent:\n", + " def __init__(self, token):\n", + " self.token = token # Символ игрока (1 - X, -1 - O)\n", + " \n", + " def select_move(self, moves):\n", + " return random.choice(moves) # Выбор случайного хода из доступных" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Основной цикл обучения" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " \n", + "X O \n", + "Current turn: O\n", + " \n", + "X \n", + "X O \n", + "Current turn: X\n", + " O \n", + "X \n", + "X O \n", + "Current turn: O\n", + " O \n", + "X X \n", + "X O \n", + "Current turn: X\n", + " O \n", + "X X \n", + "X O O \n", + "Current turn: O\n", + "X O \n", + "X X \n", + "X O O \n", + "Эпизод 1, Итоговая награда: 1\n", + "Средняя награда: 1.00\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " \n", + "X O \n", + "Current turn: O\n", + " \n", + " \n", + "X O X \n", + "Current turn: X\n", + " O \n", + " \n", + "X O X \n", + "Current turn: O\n", + "X O \n", + " \n", + "X O X \n", + "Current turn: X\n", + "X O \n", + "O \n", + "X O X \n", + "Current turn: O\n", + "X O \n", + "O X \n", + "X O X \n", + "Current turn: X\n", + "X O O \n", + "O X \n", + "X O X \n", + "Current turn: O\n", + "X O O \n", + "O X X \n", + "X O X \n", + "Эпизод 2, Итоговая награда: 1\n", + "Средняя награда: 1.00\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + " O \n", + "Current turn: O\n", + " X \n", + " \n", + "X O \n", + "Current turn: X\n", + " X \n", + " O \n", + "X O \n", + "Current turn: O\n", + " X \n", + " X O \n", + "X O \n", + "Эпизод 3, Итоговая награда: 1\n", + "Средняя награда: 1.00\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + "O X \n", + " \n", + "Current turn: O\n", + " \n", + "O X X \n", + " \n", + "Current turn: X\n", + " \n", + "O X X \n", + "O \n", + "Current turn: O\n", + " X \n", + "O X X \n", + "O \n", + "Current turn: X\n", + "O X \n", + "O X X \n", + "O \n", + "Эпизод 4, Итоговая награда: -1\n", + "Средняя награда: 0.50\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " O \n", + "X \n", + "Current turn: X\n", + " X \n", + " O \n", + "X O \n", + "Current turn: O\n", + " X \n", + " O X \n", + "X O \n", + "Current turn: X\n", + " X \n", + "O O X \n", + "X O \n", + "Current turn: O\n", + " X X \n", + "O O X \n", + "X O \n", + "Current turn: X\n", + " X X \n", + "O O X \n", + "X O O \n", + "Current turn: O\n", + "X X X \n", + "O O X \n", + "X O O \n", + "Эпизод 5, Итоговая награда: 1\n", + "Средняя награда: 0.60\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + "O \n", + "Current turn: O\n", + " \n", + "X X \n", + "O \n", + "Current turn: X\n", + " O \n", + "X X \n", + "O \n", + "Current turn: O\n", + " O X \n", + "X X \n", + "O \n", + "Current turn: X\n", + "O O X \n", + "X X \n", + "O \n", + "Current turn: O\n", + "O O X \n", + "X X \n", + "O X \n", + "Current turn: X\n", + "O O X \n", + "X O X \n", + "O X \n", + "Current turn: O\n", + "O O X \n", + "X O X \n", + "O X X \n", + "Эпизод 6, Итоговая награда: 1\n", + "Средняя награда: 0.67\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " O \n", + " \n", + "Current turn: O\n", + "X \n", + "X O \n", + " \n", + "Current turn: X\n", + "X O \n", + "X O \n", + " \n", + "Current turn: O\n", + "X O \n", + "X O \n", + "X \n", + "Эпизод 7, Итоговая награда: 1\n", + "Средняя награда: 0.71\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + "O \n", + " \n", + "Current turn: O\n", + "X \n", + "O X \n", + " \n", + "Current turn: X\n", + "X O \n", + "O X \n", + " \n", + "Current turn: O\n", + "X O X \n", + "O X \n", + " \n", + "Current turn: X\n", + "X O X \n", + "O X \n", + " O \n", + "Current turn: O\n", + "X O X \n", + "O X X \n", + " O \n", + "Current turn: X\n", + "X O X \n", + "O X X \n", + "O O \n", + "Current turn: O\n", + "X O X \n", + "O X X \n", + "O O X \n", + "Эпизод 8, Итоговая награда: 1\n", + "Средняя награда: 0.75\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " O \n", + " \n", + " X \n", + "Current turn: O\n", + "X O \n", + " \n", + " X \n", + "Current turn: X\n", + "X O \n", + " O \n", + " X \n", + "Current turn: O\n", + "X O X \n", + " O \n", + " X \n", + "Current turn: X\n", + "X O X \n", + "O O \n", + " X \n", + "Current turn: O\n", + "X O X \n", + "O O \n", + " X X \n", + "Current turn: X\n", + "X O X \n", + "O O O \n", + " X X \n", + "Эпизод 9, Итоговая награда: -1\n", + "Средняя награда: 0.56\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + " O \n", + "Current turn: O\n", + " X X \n", + " \n", + " O \n", + "Current turn: X\n", + " X X \n", + " \n", + "O O \n", + "Current turn: O\n", + " X X \n", + " X \n", + "O O \n", + "Current turn: X\n", + "O X X \n", + " X \n", + "O O \n", + "Current turn: O\n", + "O X X \n", + " X X \n", + "O O \n", + "Current turn: X\n", + "O X X \n", + "O X X \n", + "O O \n", + "Эпизод 10, Итоговая награда: -1\n", + "Средняя награда: 0.40\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " O \n", + " X \n", + " \n", + "Current turn: O\n", + "X O \n", + " X \n", + " \n", + "Current turn: X\n", + "X O \n", + " X \n", + " O \n", + "Current turn: O\n", + "X O \n", + "X X \n", + " O \n", + "Current turn: X\n", + "X O \n", + "X X \n", + " O O \n", + "Current turn: O\n", + "X O \n", + "X X X \n", + " O O \n", + "Эпизод 11, Итоговая награда: 1\n", + "Средняя награда: 0.45\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " \n", + "O X \n", + "Current turn: O\n", + " \n", + " X \n", + "O X \n", + "Current turn: X\n", + " \n", + " X \n", + "O O X \n", + "Current turn: O\n", + "X \n", + " X \n", + "O O X \n", + "Current turn: X\n", + "X O \n", + " X \n", + "O O X \n", + "Current turn: O\n", + "X O \n", + "X X \n", + "O O X \n", + "Current turn: X\n", + "X O O \n", + "X X \n", + "O O X \n", + "Current turn: O\n", + "X O O \n", + "X X X \n", + "O O X \n", + "Эпизод 12, Итоговая награда: 1\n", + "Средняя награда: 0.50\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + "O \n", + " X \n", + "Current turn: O\n", + " X \n", + "O \n", + " X \n", + "Current turn: X\n", + "O X \n", + "O \n", + " X \n", + "Current turn: O\n", + "O X \n", + "O X \n", + " X \n", + "Current turn: X\n", + "O X \n", + "O X \n", + " O X \n", + "Current turn: O\n", + "O X X \n", + "O X \n", + " O X \n", + "Current turn: X\n", + "O X X \n", + "O X \n", + "O O X \n", + "Эпизод 13, Итоговая награда: -1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " O \n", + "X \n", + "Current turn: O\n", + " \n", + "X O \n", + "X \n", + "Current turn: X\n", + "O \n", + "X O \n", + "X \n", + "Current turn: O\n", + "O \n", + "X X O \n", + "X \n", + "Current turn: X\n", + "O \n", + "X X O \n", + "X O \n", + "Current turn: O\n", + "O X \n", + "X X O \n", + "X O \n", + "Current turn: X\n", + "O X O \n", + "X X O \n", + "X O \n", + "Эпизод 14, Итоговая награда: -1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + "O \n", + " \n", + " X \n", + "Current turn: O\n", + "O \n", + " X \n", + " X \n", + "Current turn: X\n", + "O \n", + " X \n", + "O X \n", + "Current turn: O\n", + "O \n", + "X X \n", + "O X \n", + "Current turn: X\n", + "O \n", + "X X \n", + "O X O \n", + "Current turn: O\n", + "O X \n", + "X X \n", + "O X O \n", + "Current turn: X\n", + "O X O \n", + "X X \n", + "O X O \n", + "Current turn: O\n", + "O X O \n", + "X X X \n", + "O X O \n", + "Эпизод 15, Итоговая награда: 1\n", + "Средняя награда: 0.33\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " O \n", + " \n", + "Current turn: O\n", + "X \n", + " O \n", + " X \n", + "Current turn: X\n", + "X \n", + " O \n", + "O X \n", + "Current turn: O\n", + "X \n", + "X O \n", + "O X \n", + "Current turn: X\n", + "X O \n", + "X O \n", + "O X \n", + "Current turn: O\n", + "X O \n", + "X O \n", + "O X X \n", + "Current turn: X\n", + "X O O \n", + "X O \n", + "O X X \n", + "Current turn: O\n", + "X O O \n", + "X X O \n", + "O X X \n", + "Эпизод 16, Итоговая награда: 1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + " \n", + "X X O \n", + " \n", + "Current turn: X\n", + " \n", + "X X O \n", + " O \n", + "Current turn: O\n", + " \n", + "X X O \n", + "X O \n", + "Current turn: X\n", + "O \n", + "X X O \n", + "X O \n", + "Current turn: O\n", + "O \n", + "X X O \n", + "X X O \n", + "Current turn: X\n", + "O O \n", + "X X O \n", + "X X O \n", + "Эпизод 17, Итоговая награда: -1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " \n", + "O X \n", + "Current turn: O\n", + " \n", + " \n", + "O X X \n", + "Current turn: X\n", + " O \n", + " \n", + "O X X \n", + "Current turn: O\n", + " X O \n", + " \n", + "O X X \n", + "Current turn: X\n", + " X O \n", + "O \n", + "O X X \n", + "Current turn: O\n", + " X O \n", + "O X \n", + "O X X \n", + "Current turn: X\n", + " X O \n", + "O O X \n", + "O X X \n", + "Эпизод 18, Итоговая награда: -1\n", + "Средняя награда: 0.22\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " X O \n", + " \n", + "Current turn: X\n", + " X \n", + " X O \n", + " O \n", + "Current turn: O\n", + " X X \n", + " X O \n", + " O \n", + "Current turn: X\n", + " X X \n", + " X O \n", + "O O \n", + "Current turn: O\n", + " X X \n", + "X X O \n", + "O O \n", + "Current turn: X\n", + " X X \n", + "X X O \n", + "O O O \n", + "Эпизод 19, Итоговая награда: -1\n", + "Средняя награда: 0.16\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + "O X \n", + " \n", + " \n", + "Current turn: O\n", + "O X \n", + " X \n", + " \n", + "Current turn: X\n", + "O X \n", + " X O \n", + " \n", + "Current turn: O\n", + "O X \n", + " X O \n", + " X \n", + "Current turn: X\n", + "O X \n", + " X O \n", + " O X \n", + "Current turn: O\n", + "O X X \n", + " X O \n", + " O X \n", + "Current turn: X\n", + "O X X \n", + "O X O \n", + " O X \n", + "Current turn: O\n", + "O X X \n", + "O X O \n", + "X O X \n", + "Эпизод 20, Итоговая награда: 1\n", + "Средняя награда: 0.20\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + " O \n", + "Current turn: O\n", + " X \n", + " X \n", + " O \n", + "Current turn: X\n", + " X \n", + " X \n", + " O O \n", + "Current turn: O\n", + " X \n", + " X X \n", + " O O \n", + "Current turn: X\n", + " X O \n", + " X X \n", + " O O \n", + "Current turn: O\n", + " X O \n", + " X X \n", + "X O O \n", + "Current turn: X\n", + " X O \n", + "O X X \n", + "X O O \n", + "Current turn: O\n", + "X X O \n", + "O X X \n", + "X O O \n", + "Эпизод 21, Итоговая награда: 0\n", + "Средняя награда: 0.19\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + " O \n", + "Current turn: O\n", + " X \n", + " X \n", + " O \n", + "Current turn: X\n", + " X \n", + "O X \n", + " O \n", + "Current turn: O\n", + "X X \n", + "O X \n", + " O \n", + "Current turn: X\n", + "X X \n", + "O X \n", + "O O \n", + "Current turn: O\n", + "X X X \n", + "O X \n", + "O O \n", + "Эпизод 22, Итоговая награда: 1\n", + "Средняя награда: 0.23\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " O \n", + " X \n", + "Current turn: O\n", + " X \n", + " O \n", + " X \n", + "Current turn: X\n", + "O X \n", + " O \n", + " X \n", + "Current turn: O\n", + "O X \n", + "X O \n", + " X \n", + "Current turn: X\n", + "O X \n", + "X O \n", + " X O \n", + "Current turn: O\n", + "O X \n", + "X X O \n", + " X O \n", + "Эпизод 23, Итоговая награда: 1\n", + "Средняя награда: 0.26\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " \n", + " O \n", + "Current turn: O\n", + "X X \n", + " \n", + " O \n", + "Current turn: X\n", + "X X \n", + " O \n", + " O \n", + "Current turn: O\n", + "X X \n", + "X O \n", + " O \n", + "Current turn: X\n", + "X X \n", + "X O O \n", + " O \n", + "Current turn: O\n", + "X X X \n", + "X O O \n", + " O \n", + "Эпизод 24, Итоговая награда: 1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + "O \n", + " \n", + "Current turn: O\n", + " X \n", + "O X \n", + " \n", + "Current turn: X\n", + "O X \n", + "O X \n", + " \n", + "Current turn: O\n", + "O X \n", + "O X \n", + "X \n", + "Эпизод 25, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X \n", + "O \n", + "Current turn: O\n", + " \n", + "X \n", + "O X \n", + "Current turn: X\n", + " O \n", + "X \n", + "O X \n", + "Current turn: O\n", + " O \n", + "X X \n", + "O X \n", + "Current turn: X\n", + " O \n", + "X X O \n", + "O X \n", + "Current turn: O\n", + "X O \n", + "X X O \n", + "O X \n", + "Current turn: X\n", + "X O \n", + "X X O \n", + "O X O \n", + "Current turn: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Эпизод 26, Итоговая награда: 0\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + " O \n", + "Current turn: O\n", + " X \n", + " \n", + "X O \n", + "Current turn: X\n", + "O X \n", + " \n", + "X O \n", + "Current turn: O\n", + "O X \n", + " X \n", + "X O \n", + "Current turn: X\n", + "O O X \n", + " X \n", + "X O \n", + "Current turn: O\n", + "O O X \n", + "X X \n", + "X O \n", + "Current turn: X\n", + "O O X \n", + "X X \n", + "X O O \n", + "Current turn: O\n", + "O O X \n", + "X X X \n", + "X O O \n", + "Эпизод 27, Итоговая награда: 1\n", + "Средняя награда: 0.33\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + "X \n", + "X O \n", + " \n", + "Current turn: X\n", + "X \n", + "X O O \n", + " \n", + "Current turn: O\n", + "X \n", + "X O O \n", + " X \n", + "Current turn: X\n", + "X O \n", + "X O O \n", + " X \n", + "Current turn: O\n", + "X O \n", + "X O O \n", + "X X \n", + "Эпизод 28, Итоговая награда: 1\n", + "Средняя награда: 0.36\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + "O X \n", + " \n", + " \n", + "Current turn: O\n", + "O X X \n", + " \n", + " \n", + "Current turn: X\n", + "O X X \n", + " \n", + "O \n", + "Current turn: O\n", + "O X X \n", + " X \n", + "O \n", + "Current turn: X\n", + "O X X \n", + " X \n", + "O O \n", + "Current turn: O\n", + "O X X \n", + " X X \n", + "O O \n", + "Current turn: X\n", + "O X X \n", + " X X \n", + "O O O \n", + "Эпизод 29, Итоговая награда: -1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " \n", + "O X \n", + "Current turn: O\n", + " \n", + " \n", + "O X X \n", + "Current turn: X\n", + " \n", + "O \n", + "O X X \n", + "Current turn: O\n", + " \n", + "O X \n", + "O X X \n", + "Current turn: X\n", + " \n", + "O X O \n", + "O X X \n", + "Current turn: O\n", + " X \n", + "O X O \n", + "O X X \n", + "Эпизод 30, Итоговая награда: 1\n", + "Средняя награда: 0.33\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + " O \n", + "Current turn: O\n", + " \n", + " X \n", + " O X \n", + "Current turn: X\n", + " O \n", + " X \n", + " O X \n", + "Current turn: O\n", + " O \n", + " X \n", + "X O X \n", + "Current turn: X\n", + " O O \n", + " X \n", + "X O X \n", + "Current turn: O\n", + " O O \n", + "X X \n", + "X O X \n", + "Current turn: X\n", + " O O \n", + "X X O \n", + "X O X \n", + "Current turn: O\n", + "X O O \n", + "X X O \n", + "X O X \n", + "Эпизод 31, Итоговая награда: 1\n", + "Средняя награда: 0.35\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X \n", + "O \n", + "Current turn: O\n", + " \n", + "X X \n", + "O \n", + "Current turn: X\n", + " O \n", + "X X \n", + "O \n", + "Current turn: O\n", + " O \n", + "X X X \n", + "O \n", + "Эпизод 32, Итоговая награда: 1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + "O \n", + " \n", + "X \n", + "Current turn: O\n", + "O \n", + " X \n", + "X \n", + "Current turn: X\n", + "O \n", + " X O \n", + "X \n", + "Current turn: O\n", + "O \n", + "X X O \n", + "X \n", + "Current turn: X\n", + "O \n", + "X X O \n", + "X O \n", + "Current turn: O\n", + "O X \n", + "X X O \n", + "X O \n", + "Current turn: X\n", + "O X \n", + "X X O \n", + "X O O \n", + "Current turn: O\n", + "O X X \n", + "X X O \n", + "X O O \n", + "Эпизод 33, Итоговая награда: 1\n", + "Средняя награда: 0.39\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " \n", + "O X \n", + "Current turn: O\n", + " \n", + " \n", + "O X X \n", + "Current turn: X\n", + " \n", + "O \n", + "O X X \n", + "Current turn: O\n", + " \n", + "O X \n", + "O X X \n", + "Current turn: X\n", + "O \n", + "O X \n", + "O X X \n", + "Эпизод 34, Итоговая награда: -1\n", + "Средняя награда: 0.35\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + "O \n", + " \n", + "Current turn: O\n", + "X X \n", + "O \n", + " \n", + "Current turn: X\n", + "X X \n", + "O \n", + " O \n", + "Current turn: O\n", + "X X \n", + "O \n", + " O X \n", + "Current turn: X\n", + "X X O \n", + "O \n", + " O X \n", + "Current turn: O\n", + "X X O \n", + "O \n", + "X O X \n", + "Current turn: X\n", + "X X O \n", + "O O \n", + "X O X \n", + "Current turn: O\n", + "X X O \n", + "O O X \n", + "X O X \n", + "Эпизод 35, Итоговая награда: 0\n", + "Средняя награда: 0.34\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " \n", + " O \n", + "Current turn: O\n", + "X \n", + "X \n", + " O \n", + "Current turn: X\n", + "X \n", + "X \n", + "O O \n", + "Current turn: O\n", + "X \n", + "X \n", + "O O X \n", + "Current turn: X\n", + "X \n", + "X O \n", + "O O X \n", + "Current turn: O\n", + "X X \n", + "X O \n", + "O O X \n", + "Current turn: X\n", + "X X \n", + "X O O \n", + "O O X \n", + "Current turn: O\n", + "X X X \n", + "X O O \n", + "O O X \n", + "Эпизод 36, Итоговая награда: 1\n", + "Средняя награда: 0.36\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + "O \n", + "Current turn: O\n", + " X \n", + " X \n", + "O \n", + "Current turn: X\n", + " X \n", + " X \n", + "O O \n", + "Current turn: O\n", + "X X \n", + " X \n", + "O O \n", + "Current turn: X\n", + "X X \n", + "O X \n", + "O O \n", + "Current turn: O\n", + "X X X \n", + "O X \n", + "O O \n", + "Эпизод 37, Итоговая награда: 1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " O X \n", + " \n", + "Current turn: X\n", + " X \n", + " O X \n", + " O \n", + "Current turn: O\n", + " X \n", + " O X \n", + "X O \n", + "Current turn: X\n", + " X \n", + "O O X \n", + "X O \n", + "Current turn: O\n", + "X X \n", + "O O X \n", + "X O \n", + "Current turn: X\n", + "X X \n", + "O O X \n", + "X O O \n", + "Current turn: O\n", + "X X X \n", + "O O X \n", + "X O O \n", + "Эпизод 38, Итоговая награда: 1\n", + "Средняя награда: 0.39\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X X \n", + " O \n", + " \n", + "Current turn: X\n", + " X X \n", + " O \n", + " O \n", + "Current turn: O\n", + " X X \n", + " O \n", + "X O \n", + "Current turn: X\n", + " X X \n", + " O \n", + "X O O \n", + "Current turn: O\n", + " X X \n", + " X O \n", + "X O O \n", + "Эпизод 39, Итоговая награда: 1\n", + "Средняя награда: 0.41\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " O \n", + "X \n", + "Current turn: O\n", + " \n", + " X O \n", + "X \n", + "Current turn: X\n", + " O \n", + " X O \n", + "X \n", + "Current turn: O\n", + "X O \n", + " X O \n", + "X \n", + "Current turn: X\n", + "X O \n", + "O X O \n", + "X \n", + "Current turn: O\n", + "X O \n", + "O X O \n", + "X X \n", + "Current turn: X\n", + "X O \n", + "O X O \n", + "X X O \n", + "Current turn: O\n", + "X O X \n", + "O X O \n", + "X X O \n", + "Эпизод 40, Итоговая награда: 1\n", + "Средняя награда: 0.42\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " O \n", + "X \n", + "Current turn: X\n", + " X \n", + " O \n", + "X O \n", + "Current turn: O\n", + " X \n", + " X O \n", + "X O \n", + "Эпизод 41, Итоговая награда: 1\n", + "Средняя награда: 0.44\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + "O \n", + " \n", + "X \n", + "Current turn: O\n", + "O X \n", + " \n", + "X \n", + "Current turn: X\n", + "O X \n", + " O \n", + "X \n", + "Current turn: O\n", + "O X \n", + " O \n", + "X X \n", + "Current turn: X\n", + "O X \n", + " O O \n", + "X X \n", + "Current turn: O\n", + "O X \n", + "X O O \n", + "X X \n", + "Current turn: X\n", + "O X O \n", + "X O O \n", + "X X \n", + "Current turn: O\n", + "O X O \n", + "X O O \n", + "X X X \n", + "Эпизод 42, Итоговая награда: 1\n", + "Средняя награда: 0.45\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X O \n", + " \n", + "Current turn: O\n", + "X \n", + " X O \n", + " \n", + "Current turn: X\n", + "X \n", + "O X O \n", + " \n", + "Current turn: O\n", + "X \n", + "O X O \n", + "X \n", + "Current turn: X\n", + "X O \n", + "O X O \n", + "X \n", + "Current turn: O\n", + "X X O \n", + "O X O \n", + "X \n", + "Current turn: X\n", + "X X O \n", + "O X O \n", + "X O \n", + "Current turn: O\n", + "X X O \n", + "O X O \n", + "X O X \n", + "Эпизод 43, Итоговая награда: 1\n", + "Средняя награда: 0.47\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + " O \n", + "Current turn: O\n", + " X \n", + " \n", + " O X \n", + "Current turn: X\n", + " X O \n", + " \n", + " O X \n", + "Current turn: O\n", + " X O \n", + " X \n", + " O X \n", + "Current turn: X\n", + " X O \n", + "O X \n", + " O X \n", + "Current turn: O\n", + "X X O \n", + "O X \n", + " O X \n", + "Эпизод 44, Итоговая награда: 1\n", + "Средняя награда: 0.48\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X \n", + "O \n", + "Current turn: O\n", + " \n", + "X \n", + "O X \n", + "Current turn: X\n", + " O \n", + "X \n", + "O X \n", + "Current turn: O\n", + "X O \n", + "X \n", + "O X \n", + "Current turn: X\n", + "X O \n", + "X O \n", + "O X \n", + "Эпизод 45, Итоговая награда: -1\n", + "Средняя награда: 0.44\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + " O \n", + "Current turn: O\n", + " \n", + " X \n", + " O X \n", + "Current turn: X\n", + " \n", + " O X \n", + " O X \n", + "Current turn: O\n", + " \n", + "X O X \n", + " O X \n", + "Current turn: X\n", + " O \n", + "X O X \n", + " O X \n", + "Эпизод 46, Итоговая награда: -1\n", + "Средняя награда: 0.41\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " \n", + " X O \n", + "Current turn: O\n", + " \n", + "X \n", + " X O \n", + "Current turn: X\n", + " O \n", + "X \n", + " X O \n", + "Current turn: O\n", + " O \n", + "X \n", + "X X O \n", + "Current turn: X\n", + " O \n", + "X O \n", + "X X O \n", + "Current turn: O\n", + " O \n", + "X O X \n", + "X X O \n", + "Current turn: X\n", + "O O \n", + "X O X \n", + "X X O \n", + "Эпизод 47, Итоговая награда: -1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " O \n", + " \n", + "X \n", + "Current turn: O\n", + " O \n", + " \n", + "X X \n", + "Current turn: X\n", + " O O \n", + " \n", + "X X \n", + "Current turn: O\n", + "X O O \n", + " \n", + "X X \n", + "Current turn: X\n", + "X O O \n", + " O \n", + "X X \n", + "Current turn: O\n", + "X O O \n", + " O \n", + "X X X \n", + "Эпизод 48, Итоговая награда: 1\n", + "Средняя награда: 0.40\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X O \n", + " \n", + " \n", + "Current turn: O\n", + "X O \n", + " \n", + " X \n", + "Current turn: X\n", + "X O \n", + "O \n", + " X \n", + "Current turn: O\n", + "X O \n", + "O \n", + "X X \n", + "Current turn: X\n", + "X O \n", + "O O \n", + "X X \n", + "Current turn: O\n", + "X O \n", + "O O \n", + "X X X \n", + "Эпизод 49, Итоговая награда: 1\n", + "Средняя награда: 0.41\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " O \n", + "X \n", + " \n", + "Current turn: O\n", + " O \n", + "X X \n", + " \n", + "Current turn: X\n", + " O \n", + "X X \n", + " O \n", + "Current turn: O\n", + " X O \n", + "X X \n", + " O \n", + "Current turn: X\n", + " X O \n", + "X X \n", + " O O \n", + "Current turn: O\n", + " X O \n", + "X X \n", + "X O O \n", + "Current turn: X\n", + " X O \n", + "X O X \n", + "X O O \n", + "Current turn: O\n", + "X X O \n", + "X O X \n", + "X O O \n", + "Эпизод 50, Итоговая награда: 1\n", + "Средняя награда: 0.42\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + "X O \n", + " \n", + "Current turn: X\n", + " X \n", + "X O \n", + "O \n", + "Current turn: O\n", + " X \n", + "X O X \n", + "O \n", + "Current turn: X\n", + "O X \n", + "X O X \n", + "O \n", + "Current turn: O\n", + "O X \n", + "X O X \n", + "O X \n", + "Current turn: X\n", + "O X O \n", + "X O X \n", + "O X \n", + "Эпизод 51, Итоговая награда: -1\n", + "Средняя награда: 0.39\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + " \n", + "X X O \n", + " \n", + "Current turn: X\n", + " O \n", + "X X O \n", + " \n", + "Current turn: O\n", + " X O \n", + "X X O \n", + " \n", + "Current turn: X\n", + "O X O \n", + "X X O \n", + " \n", + "Current turn: O\n", + "O X O \n", + "X X O \n", + "X \n", + "Current turn: X\n", + "O X O \n", + "X X O \n", + "X O \n", + "Current turn: O\n", + "O X O \n", + "X X O \n", + "X O X \n", + "Эпизод 52, Итоговая награда: 0\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + " O \n", + "Current turn: O\n", + " \n", + "X X \n", + " O \n", + "Current turn: X\n", + " \n", + "X X \n", + "O O \n", + "Current turn: O\n", + " X \n", + "X X \n", + "O O \n", + "Current turn: X\n", + "O X \n", + "X X \n", + "O O \n", + "Current turn: O\n", + "O X \n", + "X X X \n", + "O O \n", + "Эпизод 53, Итоговая награда: 1\n", + "Средняя награда: 0.40\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " X O \n", + " \n", + "Current turn: X\n", + " O X \n", + " X O \n", + " \n", + "Current turn: O\n", + " O X \n", + "X X O \n", + " \n", + "Current turn: X\n", + "O O X \n", + "X X O \n", + " \n", + "Current turn: O\n", + "O O X \n", + "X X O \n", + "X \n", + "Эпизод 54, Итоговая награда: 1\n", + "Средняя награда: 0.41\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + " \n", + "X O \n", + " X \n", + "Current turn: X\n", + " O \n", + "X O \n", + " X \n", + "Current turn: O\n", + " X O \n", + "X O \n", + " X \n", + "Current turn: X\n", + " X O \n", + "X O \n", + " O X \n", + "Current turn: O\n", + " X O \n", + "X O X \n", + " O X \n", + "Current turn: X\n", + " X O \n", + "X O X \n", + "O O X \n", + "Эпизод 55, Итоговая награда: -1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + "O \n", + "Current turn: O\n", + " X \n", + " \n", + "O X \n", + "Current turn: X\n", + " X \n", + "O \n", + "O X \n", + "Current turn: O\n", + " X \n", + "O \n", + "O X X \n", + "Current turn: X\n", + " X \n", + "O O \n", + "O X X \n", + "Current turn: O\n", + " X \n", + "O O X \n", + "O X X \n", + "Эпизод 56, Итоговая награда: 1\n", + "Средняя награда: 0.39\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X O \n", + " \n", + "Current turn: O\n", + " \n", + " X O \n", + " X \n", + "Current turn: X\n", + "O \n", + " X O \n", + " X \n", + "Current turn: O\n", + "O \n", + "X X O \n", + " X \n", + "Current turn: X\n", + "O \n", + "X X O \n", + "O X \n", + "Current turn: O\n", + "O \n", + "X X O \n", + "O X X \n", + "Current turn: X\n", + "O O \n", + "X X O \n", + "O X X \n", + "Current turn: O\n", + "O O X \n", + "X X O \n", + "O X X \n", + "Эпизод 57, Итоговая награда: 0\n", + "Средняя награда: 0.39\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " O \n", + " \n", + "X \n", + "Current turn: O\n", + " O \n", + " X \n", + "X \n", + "Current turn: X\n", + " O \n", + " X O \n", + "X \n", + "Current turn: O\n", + " O X \n", + " X O \n", + "X \n", + "Эпизод 58, Итоговая награда: 1\n", + "Средняя награда: 0.40\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + " X \n", + "X O \n", + " \n", + "Current turn: X\n", + "O X \n", + "X O \n", + " \n", + "Current turn: O\n", + "O X \n", + "X O \n", + " X \n", + "Current turn: X\n", + "O X \n", + "X O \n", + " X O \n", + "Current turn: O\n", + "O X \n", + "X O \n", + "X X O \n", + "Current turn: X\n", + "O X \n", + "X O O \n", + "X X O \n", + "Эпизод 59, Итоговая награда: -1\n", + "Средняя награда: 0.37\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " O X \n", + " \n", + "Current turn: O\n", + " X \n", + " O X \n", + " \n", + "Current turn: X\n", + " X \n", + " O X \n", + "O \n", + "Current turn: O\n", + " X X \n", + " O X \n", + "O \n", + "Current turn: X\n", + "O X X \n", + " O X \n", + "O \n", + "Current turn: O\n", + "O X X \n", + " O X \n", + "O X \n", + "Эпизод 60, Итоговая награда: 1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + "O \n", + "X \n", + " \n", + "Current turn: O\n", + "O \n", + "X \n", + " X \n", + "Current turn: X\n", + "O \n", + "X \n", + "O X \n", + "Current turn: O\n", + "O \n", + "X X \n", + "O X \n", + "Current turn: X\n", + "O \n", + "X X \n", + "O X O \n", + "Current turn: O\n", + "O \n", + "X X X \n", + "O X O \n", + "Эпизод 61, Итоговая награда: 1\n", + "Средняя награда: 0.39\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + "O \n", + "X \n", + " \n", + "Current turn: O\n", + "O X \n", + "X \n", + " \n", + "Current turn: X\n", + "O X \n", + "X O \n", + " \n", + "Current turn: O\n", + "O X \n", + "X X O \n", + " \n", + "Current turn: X\n", + "O X O \n", + "X X O \n", + " \n", + "Current turn: O\n", + "O X O \n", + "X X O \n", + "X \n", + "Current turn: X\n", + "O X O \n", + "X X O \n", + "X O \n", + "Эпизод 62, Итоговая награда: -1\n", + "Средняя награда: 0.37\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + "X O \n", + " \n", + "Current turn: X\n", + " X \n", + "X O \n", + " O \n", + "Current turn: O\n", + " X X \n", + "X O \n", + " O \n", + "Current turn: X\n", + " X X \n", + "X O \n", + "O O \n", + "Current turn: O\n", + "X X X \n", + "X O \n", + "O O \n", + "Эпизод 63, Итоговая награда: 1\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + " X \n", + "X O \n", + " \n", + "Current turn: X\n", + " X \n", + "X O \n", + "O \n", + "Current turn: O\n", + "X X \n", + "X O \n", + "O \n", + "Current turn: X\n", + "X X \n", + "X O O \n", + "O \n", + "Current turn: O\n", + "X X \n", + "X O O \n", + "O X \n", + "Current turn: X\n", + "X O X \n", + "X O O \n", + "O X \n", + "Current turn: O\n", + "X O X \n", + "X O O \n", + "O X X \n", + "Эпизод 64, Итоговая награда: 0\n", + "Средняя награда: 0.38\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " \n", + "X O \n", + "Current turn: O\n", + "X \n", + " \n", + "X O \n", + "Current turn: X\n", + "X \n", + " O \n", + "X O \n", + "Current turn: O\n", + "X X \n", + " O \n", + "X O \n", + "Current turn: X\n", + "X X \n", + " O O \n", + "X O \n", + "Current turn: O\n", + "X X \n", + " O O \n", + "X O X \n", + "Current turn: X\n", + "X O X \n", + " O O \n", + "X O X \n", + "Эпизод 65, Итоговая награда: -1\n", + "Средняя награда: 0.35\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + "O \n", + " \n", + " X \n", + "Current turn: O\n", + "O \n", + " X \n", + " X \n", + "Current turn: X\n", + "O \n", + " O X \n", + " X \n", + "Current turn: O\n", + "O X \n", + " O X \n", + " X \n", + "Эпизод 66, Итоговая награда: 1\n", + "Средняя награда: 0.36\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " \n", + " O X \n", + "Current turn: O\n", + " \n", + "X \n", + " O X \n", + "Current turn: X\n", + " O \n", + "X \n", + " O X \n", + "Current turn: O\n", + " O \n", + "X \n", + "X O X \n", + "Current turn: X\n", + " O \n", + "X O \n", + "X O X \n", + "Эпизод 67, Итоговая награда: -1\n", + "Средняя награда: 0.34\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " O \n", + " X \n", + " \n", + "Current turn: O\n", + " O \n", + " X \n", + " X \n", + "Current turn: X\n", + "O O \n", + " X \n", + " X \n", + "Current turn: O\n", + "O O \n", + " X \n", + " X X \n", + "Current turn: X\n", + "O O \n", + "O X \n", + " X X \n", + "Current turn: O\n", + "O O X \n", + "O X \n", + " X X \n", + "Current turn: X\n", + "O O X \n", + "O X \n", + "O X X \n", + "Эпизод 68, Итоговая награда: -1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " O X \n", + " \n", + " \n", + "Current turn: O\n", + " O X \n", + " X \n", + " \n", + "Current turn: X\n", + " O X \n", + " X \n", + " O \n", + "Current turn: O\n", + " O X \n", + " X \n", + "X O \n", + "Эпизод 69, Итоговая награда: 1\n", + "Средняя награда: 0.33\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + " X \n", + "X O \n", + " \n", + "Current turn: X\n", + "O X \n", + "X O \n", + " \n", + "Current turn: O\n", + "O X \n", + "X O \n", + " X \n", + "Current turn: X\n", + "O X \n", + "X O O \n", + " X \n", + "Current turn: O\n", + "O X X \n", + "X O O \n", + " X \n", + "Current turn: X\n", + "O X X \n", + "X O O \n", + "O X \n", + "Current turn: O\n", + "O X X \n", + "X O O \n", + "O X X \n", + "Эпизод 70, Итоговая награда: 0\n", + "Средняя награда: 0.33\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " O \n", + " X \n", + "Current turn: O\n", + "X \n", + " O \n", + " X \n", + "Current turn: X\n", + "X \n", + " O \n", + " O X \n", + "Current turn: O\n", + "X \n", + "X O \n", + " O X \n", + "Current turn: X\n", + "X \n", + "X O \n", + "O O X \n", + "Current turn: O\n", + "X X \n", + "X O \n", + "O O X \n", + "Current turn: X\n", + "X O X \n", + "X O \n", + "O O X \n", + "Эпизод 71, Итоговая награда: -1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " O \n", + " X \n", + "Current turn: X\n", + " X \n", + " O \n", + "O X \n", + "Current turn: O\n", + " X \n", + " O \n", + "O X X \n", + "Current turn: X\n", + " X \n", + "O O \n", + "O X X \n", + "Current turn: O\n", + "X X \n", + "O O \n", + "O X X \n", + "Current turn: X\n", + "X X \n", + "O O O \n", + "O X X \n", + "Эпизод 72, Итоговая награда: -1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " O \n", + " X \n", + " \n", + "Current turn: O\n", + "X O \n", + " X \n", + " \n", + "Current turn: X\n", + "X O \n", + " X \n", + "O \n", + "Current turn: O\n", + "X O \n", + " X \n", + "O X \n", + "Эпизод 73, Итоговая награда: 1\n", + "Средняя награда: 0.30\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + "O \n", + "X \n", + "Current turn: O\n", + " \n", + "O X \n", + "X \n", + "Current turn: X\n", + "O \n", + "O X \n", + "X \n", + "Current turn: O\n", + "O \n", + "O X \n", + "X X \n", + "Current turn: X\n", + "O \n", + "O X \n", + "X X O \n", + "Current turn: O\n", + "O X \n", + "O X \n", + "X X O \n", + "Эпизод 74, Итоговая награда: 1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " O \n", + "X \n", + "Current turn: O\n", + " \n", + " O \n", + "X X \n", + "Current turn: X\n", + "O \n", + " O \n", + "X X \n", + "Current turn: O\n", + "O X \n", + " O \n", + "X X \n", + "Current turn: X\n", + "O X \n", + " O \n", + "X X O \n", + "Current turn: O\n", + "O X X \n", + " O \n", + "X X O \n", + "Current turn: X\n", + "O X X \n", + "O O \n", + "X X O \n", + "Current turn: O\n", + "O X X \n", + "O X O \n", + "X X O \n", + "Эпизод 75, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + "O \n", + "Current turn: O\n", + " X \n", + " X \n", + "O \n", + "Current turn: X\n", + " O X \n", + " X \n", + "O \n", + "Current turn: O\n", + " O X \n", + "X X \n", + "O \n", + "Current turn: X\n", + " O X \n", + "X O X \n", + "O \n", + "Current turn: O\n", + " O X \n", + "X O X \n", + "O X \n", + "Эпизод 76, Итоговая награда: 1\n", + "Средняя награда: 0.33\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X X \n", + " O \n", + " \n", + "Current turn: X\n", + " X X \n", + " O \n", + "O \n", + "Current turn: O\n", + " X X \n", + "X O \n", + "O \n", + "Current turn: X\n", + " X X \n", + "X O \n", + "O O \n", + "Current turn: O\n", + " X X \n", + "X O \n", + "O X O \n", + "Current turn: X\n", + "O X X \n", + "X O \n", + "O X O \n", + "Эпизод 77, Итоговая награда: -1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " \n", + " O \n", + "Current turn: O\n", + "X \n", + " \n", + "X O \n", + "Current turn: X\n", + "X O \n", + " \n", + "X O \n", + "Current turn: O\n", + "X O \n", + " X \n", + "X O \n", + "Current turn: X\n", + "X O \n", + "O X \n", + "X O \n", + "Current turn: O\n", + "X O \n", + "O X \n", + "X X O \n", + "Current turn: X\n", + "X O \n", + "O X O \n", + "X X O \n", + "Эпизод 78, Итоговая награда: -1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " \n", + " O \n", + "Current turn: O\n", + "X \n", + " X \n", + " O \n", + "Current turn: X\n", + "X \n", + " X \n", + "O O \n", + "Current turn: O\n", + "X \n", + "X X \n", + "O O \n", + "Current turn: X\n", + "X O \n", + "X X \n", + "O O \n", + "Current turn: O\n", + "X O \n", + "X X X \n", + "O O \n", + "Эпизод 79, Итоговая награда: 1\n", + "Средняя награда: 0.30\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " O \n", + " X \n", + "Current turn: O\n", + " \n", + " O \n", + "X X \n", + "Current turn: X\n", + " O \n", + " O \n", + "X X \n", + "Current turn: O\n", + " O \n", + " O X \n", + "X X \n", + "Current turn: X\n", + "O O \n", + " O X \n", + "X X \n", + "Current turn: O\n", + "O O \n", + "X O X \n", + "X X \n", + "Current turn: X\n", + "O O O \n", + "X O X \n", + "X X \n", + "Эпизод 80, Итоговая награда: -1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " O \n", + " X \n", + " \n", + "Current turn: O\n", + " O \n", + " X \n", + " X \n", + "Current turn: X\n", + " O \n", + " X O \n", + " X \n", + "Current turn: O\n", + " O X \n", + " X O \n", + " X \n", + "Current turn: X\n", + "O O X \n", + " X O \n", + " X \n", + "Current turn: O\n", + "O O X \n", + "X X O \n", + " X \n", + "Current turn: X\n", + "O O X \n", + "X X O \n", + " O X \n", + "Current turn: O\n", + "O O X \n", + "X X O \n", + "X O X \n", + "Эпизод 81, Итоговая награда: 1\n", + "Средняя награда: 0.30\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " O \n", + " X \n", + " \n", + "Current turn: O\n", + " O \n", + " X X \n", + " \n", + "Current turn: X\n", + " O \n", + " X X \n", + "O \n", + "Current turn: O\n", + "X O \n", + " X X \n", + "O \n", + "Current turn: X\n", + "X O \n", + " X X \n", + "O O \n", + "Current turn: O\n", + "X O X \n", + " X X \n", + "O O \n", + "Current turn: X\n", + "X O X \n", + "O X X \n", + "O O \n", + "Current turn: O\n", + "X O X \n", + "O X X \n", + "O O X \n", + "Эпизод 82, Итоговая награда: 1\n", + "Средняя награда: 0.30\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " O X \n", + " \n", + "Current turn: O\n", + "X \n", + " O X \n", + " \n", + "Current turn: X\n", + "X \n", + " O X \n", + " O \n", + "Current turn: O\n", + "X \n", + "X O X \n", + " O \n", + "Current turn: X\n", + "X O \n", + "X O X \n", + " O \n", + "Эпизод 83, Итоговая награда: -1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " \n", + " \n", + "X \n", + "Current turn: X\n", + " \n", + " \n", + "X O \n", + "Current turn: O\n", + " \n", + " X \n", + "X O \n", + "Current turn: X\n", + " \n", + "O X \n", + "X O \n", + "Current turn: O\n", + "X \n", + "O X \n", + "X O \n", + "Current turn: X\n", + "X \n", + "O X O \n", + "X O \n", + "Current turn: O\n", + "X X \n", + "O X O \n", + "X O \n", + "Эпизод 84, Итоговая награда: 1\n", + "Средняя награда: 0.30\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " \n", + "X O \n", + " \n", + "Current turn: O\n", + "X \n", + "X O \n", + " \n", + "Current turn: X\n", + "X \n", + "X O \n", + " O \n", + "Current turn: O\n", + "X X \n", + "X O \n", + " O \n", + "Current turn: X\n", + "X X \n", + "X O \n", + "O O \n", + "Current turn: O\n", + "X X \n", + "X O \n", + "O X O \n", + "Current turn: X\n", + "X X O \n", + "X O \n", + "O X O \n", + "Эпизод 85, Итоговая награда: -1\n", + "Средняя награда: 0.28\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + "O X \n", + " \n", + " \n", + "Current turn: O\n", + "O X \n", + " \n", + " X \n", + "Current turn: X\n", + "O X O \n", + " \n", + " X \n", + "Current turn: O\n", + "O X O \n", + "X \n", + " X \n", + "Current turn: X\n", + "O X O \n", + "X O \n", + " X \n", + "Current turn: O\n", + "O X O \n", + "X O X \n", + " X \n", + "Current turn: X\n", + "O X O \n", + "X O X \n", + "O X \n", + "Эпизод 86, Итоговая награда: -1\n", + "Средняя награда: 0.27\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + " \n", + " O \n", + "Current turn: O\n", + "X \n", + " \n", + " X O \n", + "Current turn: X\n", + "X \n", + " O \n", + " X O \n", + "Current turn: O\n", + "X X \n", + " O \n", + " X O \n", + "Current turn: X\n", + "X X \n", + " O \n", + "O X O \n", + "Current turn: O\n", + "X X X \n", + " O \n", + "O X O \n", + "Эпизод 87, Итоговая награда: 1\n", + "Средняя награда: 0.28\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + "O X \n", + " \n", + "Current turn: O\n", + " \n", + "O X X \n", + " \n", + "Current turn: X\n", + " O \n", + "O X X \n", + " \n", + "Current turn: O\n", + "X O \n", + "O X X \n", + " \n", + "Current turn: X\n", + "X O \n", + "O X X \n", + " O \n", + "Current turn: O\n", + "X O \n", + "O X X \n", + "X O \n", + "Current turn: X\n", + "X O O \n", + "O X X \n", + "X O \n", + "Current turn: O\n", + "X O O \n", + "O X X \n", + "X O X \n", + "Эпизод 88, Итоговая награда: 1\n", + "Средняя награда: 0.28\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X \n", + " O \n", + " X \n", + "Current turn: X\n", + " X \n", + " O \n", + " O X \n", + "Current turn: O\n", + " X \n", + "X O \n", + " O X \n", + "Current turn: X\n", + " X \n", + "X O O \n", + " O X \n", + "Current turn: O\n", + " X \n", + "X O O \n", + "X O X \n", + "Current turn: X\n", + " X O \n", + "X O O \n", + "X O X \n", + "Current turn: O\n", + "X X O \n", + "X O O \n", + "X O X \n", + "Эпизод 89, Итоговая награда: 1\n", + "Средняя награда: 0.29\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + "O \n", + "Current turn: O\n", + " \n", + " X \n", + "O X \n", + "Current turn: X\n", + " \n", + " O X \n", + "O X \n", + "Current turn: O\n", + " X \n", + " O X \n", + "O X \n", + "Эпизод 90, Итоговая награда: 1\n", + "Средняя награда: 0.30\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + "O X \n", + " \n", + "Current turn: O\n", + " \n", + "O X X \n", + " \n", + "Current turn: X\n", + "O \n", + "O X X \n", + " \n", + "Current turn: O\n", + "O \n", + "O X X \n", + " X \n", + "Current turn: X\n", + "O O \n", + "O X X \n", + " X \n", + "Current turn: O\n", + "O O X \n", + "O X X \n", + " X \n", + "Эпизод 91, Итоговая награда: 1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X \n", + "O \n", + " \n", + "Current turn: O\n", + "X X \n", + "O \n", + " \n", + "Current turn: X\n", + "X X \n", + "O O \n", + " \n", + "Current turn: O\n", + "X X \n", + "O O X \n", + " \n", + "Current turn: X\n", + "X X \n", + "O O X \n", + "O \n", + "Current turn: O\n", + "X X X \n", + "O O X \n", + "O \n", + "Эпизод 92, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " O \n", + " X \n", + " \n", + "Current turn: O\n", + " O \n", + " X \n", + " X \n", + "Current turn: X\n", + " O \n", + " X \n", + "O X \n", + "Current turn: O\n", + "X O \n", + " X \n", + "O X \n", + "Current turn: X\n", + "X O \n", + " O X \n", + "O X \n", + "Current turn: O\n", + "X O X \n", + " O X \n", + "O X \n", + "Эпизод 93, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " \n", + "X \n", + " \n", + "Current turn: X\n", + " O \n", + "X \n", + " \n", + "Current turn: O\n", + " O \n", + "X \n", + " X \n", + "Current turn: X\n", + " O \n", + "X O \n", + " X \n", + "Current turn: O\n", + "X O \n", + "X O \n", + " X \n", + "Current turn: X\n", + "X O \n", + "X O \n", + "O X \n", + "Эпизод 94, Итоговая награда: -1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " O \n", + " \n", + "Current turn: O\n", + " X X \n", + " O \n", + " \n", + "Current turn: X\n", + " X X \n", + " O \n", + " O \n", + "Current turn: O\n", + "X X X \n", + " O \n", + " O \n", + "Эпизод 95, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + "X \n", + " \n", + " \n", + "Current turn: X\n", + "X O \n", + " \n", + " \n", + "Current turn: O\n", + "X O \n", + " \n", + " X \n", + "Current turn: X\n", + "X O \n", + " O \n", + " X \n", + "Current turn: O\n", + "X O \n", + " O X \n", + " X \n", + "Current turn: X\n", + "X O \n", + " O X \n", + " X O \n", + "Current turn: O\n", + "X O \n", + "X O X \n", + " X O \n", + "Current turn: X\n", + "X O O \n", + "X O X \n", + " X O \n", + "Current turn: O\n", + "X O O \n", + "X O X \n", + "X X O \n", + "Эпизод 96, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " \n", + " \n", + " X \n", + "Current turn: X\n", + " \n", + " O \n", + " X \n", + "Current turn: O\n", + " \n", + "X O \n", + " X \n", + "Current turn: X\n", + "O \n", + "X O \n", + " X \n", + "Current turn: O\n", + "O \n", + "X O X \n", + " X \n", + "Current turn: X\n", + "O O \n", + "X O X \n", + " X \n", + "Current turn: O\n", + "O O \n", + "X O X \n", + "X X \n", + "Current turn: X\n", + "O O O \n", + "X O X \n", + "X X \n", + "Эпизод 97, Итоговая награда: -1\n", + "Средняя награда: 0.31\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + "O \n", + "Current turn: O\n", + " \n", + "X X \n", + "O \n", + "Current turn: X\n", + " O \n", + "X X \n", + "O \n", + "Current turn: O\n", + " O X \n", + "X X \n", + "O \n", + "Current turn: X\n", + " O X \n", + "X X \n", + "O O \n", + "Current turn: O\n", + "X O X \n", + "X X \n", + "O O \n", + "Current turn: X\n", + "X O X \n", + "X X O \n", + "O O \n", + "Current turn: O\n", + "X O X \n", + "X X O \n", + "O O X \n", + "Эпизод 98, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " X \n", + " \n", + " \n", + "Current turn: X\n", + " X \n", + " \n", + " O \n", + "Current turn: O\n", + " X \n", + " \n", + "X O \n", + "Current turn: X\n", + " X \n", + " O \n", + "X O \n", + "Current turn: O\n", + "X X \n", + " O \n", + "X O \n", + "Current turn: X\n", + "X X \n", + " O O \n", + "X O \n", + "Current turn: O\n", + "X X \n", + "X O O \n", + "X O \n", + "Эпизод 99, Итоговая награда: 1\n", + "Средняя награда: 0.32\n", + "Current turn: O\n", + " \n", + " X \n", + " \n", + "Current turn: X\n", + " \n", + " X \n", + " O \n", + "Current turn: O\n", + " X \n", + " X \n", + " O \n", + "Current turn: X\n", + "O X \n", + " X \n", + " O \n", + "Current turn: O\n", + "O X \n", + " X \n", + "X O \n", + "Current turn: X\n", + "O X \n", + " X \n", + "X O O \n", + "Current turn: O\n", + "O X \n", + " X X \n", + "X O O \n", + "Current turn: X\n", + "O X \n", + "O X X \n", + "X O O \n", + "Current turn: O\n", + "O X X \n", + "O X X \n", + "X O O \n", + "Эпизод 100, Итоговая награда: 1\n", + "Средняя награда: 0.33\n" + ] + } + ], + "source": [ + "# Основной цикл обучения агента\n", + "\n", + "# Создаём игровую среду\n", + "game_env = TicTacToeEnv()\n", + "\n", + "# Создаём агента, играющего крестиками\n", + "player_agent = GameAgent(token=1)\n", + "\n", + "total_episodes = 100 # Количество эпизодов (игр) для обучения\n", + "reward_history = [] # Для хранения результатов эпизодов\n", + "\n", + "# Переменная для отслеживания символа текущего игрока\n", + "initial_turn = 1\n", + "\n", + "for episode in range(total_episodes):\n", + " # Сбрасываем состояние игры перед началом нового эпизода\n", + " game_state, _ = game_env.reset()\n", + "\n", + " # Общая награда за эпизод\n", + " episode_reward = 0\n", + "\n", + " # Флаг завершения игры\n", + " game_finished = False\n", + " current_turn = initial_turn\n", + "\n", + " # Игровой цикл (до 9 ходов для поля 3x3)\n", + " for move_count in range(9): \n", + " moves = game_env.available_moves() # Получаем доступные ходы\n", + "\n", + " # Если ходов нет, игра завершается\n", + " if not moves:\n", + " break\n", + "\n", + " # Агент делает выбор\n", + " chosen_move = player_agent.select_move(moves) if len(moves) > 1 else moves[0]\n", + "\n", + " # Выполняем ход и обновляем состояние игры\n", + " next_state, reward, game_finished, _ = game_env.step(chosen_move)\n", + " episode_reward += reward\n", + " game_state = next_state\n", + "\n", + " # Отображаем текущее состояние\n", + " game_env.render()\n", + "\n", + " # Если игра завершена, выходим\n", + " if game_finished:\n", + " break\n", + "\n", + " current_turn = -current_turn # Смена игрока\n", + "\n", + " reward_history.append(episode_reward)\n", + "\n", + " # Выводим статистику\n", + " print(f\"Эпизод {episode + 1}, Итоговая награда: {episode_reward}\")\n", + " avg_reward = sum(reward_history) / len(reward_history)\n", + " print(f\"Средняя награда: {avg_reward:.2f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- 2.25.1