{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Перевод среды на gymnasium" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import gymnasium as gym\n", "from gymnasium import spaces\n", "\n", "class TicTacToeEnv(gym.Env):\n", " metadata = {'render.modes': ['human']}\n", " \n", " symbols = ['O', ' ', 'X']\n", "\n", " def __init__(self):\n", " super().__init__()\n", " self.action_space = spaces.Discrete(9)\n", " self.observation_space = spaces.Discrete(9 * 3 * 2)\n", " self.reset()\n", "\n", " def step(self, action):\n", " finished = False\n", " score = 0\n", "\n", " player, cell = action # player - игрок (1 или -1), cell - номер клетки\n", "\n", " board = self.state['board']\n", " current_cell = board[cell]\n", " current_player = self.state['current_turn']\n", " if current_cell != 0: # Клетка занята\n", " print(f\"Некорректный ход: Клетка {cell} уже занята.\")\n", " finished = True\n", " score = -1 * current_player\n", " elif player != current_player: # Ход сделан не тем игроком\n", " print(f\"Некорректный ход: игрок {player} не на очереди.\")\n", " finished = True\n", " score = -1 * current_player\n", " else:\n", " board[cell] = player\n", " self.state['current_turn'] = -player\n", "\n", " for row in range(3):\n", " # Проверяем строки и столбцы\n", " if (board[row * 3] == player and board[row * 3 + 1] == player and board[row * 3 + 2] == player) or \\\n", " (board[row] == player and board[row + 3] == player and board[row + 6] == player):\n", " score = player\n", " finished = True\n", " break\n", "\n", " # Проверяем диагонали\n", " if (board[0] == player and board[4] == player and board[8] == player) or \\\n", " (board[2] == player and board[4] == player and board[6] == player):\n", " score = player\n", " finished = True\n", " \n", " return self.state, score, finished, {}\n", "\n", " def reset(self):\n", " self.state = {\n", " 'board': [0] * 9, # Поле 3x3\n", " 'current_turn': 1 # Начинает первый игрок\n", " }\n", " return self.state, {}\n", "\n", " def render(self, close=False):\n", " if close:\n", " return\n", " print(\"Current turn:\", self.symbols[self.state['current_turn'] + 1])\n", " for idx in range(9):\n", " print(self.symbols[self.state['board'][idx] + 1], end=\" \")\n", " if (idx % 3) == 2:\n", " print()\n", "\n", " def available_moves(self):\n", " moves = []\n", " for idx in range(9):\n", " if self.state['board'][idx] == 0:\n", " player = self.state['current_turn']\n", " moves.append([player, idx])\n", " return moves" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Агент\n", "Агент - система, которая взаимодействует с окружающей средой, чтобы достичь определенной цели. Задача агента: выработка стратегии, которая максимизирует награду в долгосрочной перспективе.\n", "Роль агента: агент принимает решение, основываясь на текущем состоянии среды и получает обратную свзяь от среды.\n", "Функционал агента: принятие решения - использование алгоритмов или стратегий для дальнейшего принятия решения; обучение - обновление знаний или стратегий основываясь на полученный опыт; интерактивность - адаптация к изменениям в среде." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import random\n", "\n", "# Агент, взаимодействующий со средой для выбора стратегий на основе доступных ходов\n", "class GameAgent:\n", " def __init__(self, token):\n", " self.token = token # Символ игрока (1 - X, -1 - O)\n", " \n", " def select_move(self, moves):\n", " return random.choice(moves) # Выбор случайного хода из доступных" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Основной цикл обучения" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " \n", "X O \n", "Current turn: O\n", " \n", "X \n", "X O \n", "Current turn: X\n", " O \n", "X \n", "X O \n", "Current turn: O\n", " O \n", "X X \n", "X O \n", "Current turn: X\n", " O \n", "X X \n", "X O O \n", "Current turn: O\n", "X O \n", "X X \n", "X O O \n", "Эпизод 1, Итоговая награда: 1\n", "Средняя награда: 1.00\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " \n", "X O \n", "Current turn: O\n", " \n", " \n", "X O X \n", "Current turn: X\n", " O \n", " \n", "X O X \n", "Current turn: O\n", "X O \n", " \n", "X O X \n", "Current turn: X\n", "X O \n", "O \n", "X O X \n", "Current turn: O\n", "X O \n", "O X \n", "X O X \n", "Current turn: X\n", "X O O \n", "O X \n", "X O X \n", "Current turn: O\n", "X O O \n", "O X X \n", "X O X \n", "Эпизод 2, Итоговая награда: 1\n", "Средняя награда: 1.00\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", " O \n", "Current turn: O\n", " X \n", " \n", "X O \n", "Current turn: X\n", " X \n", " O \n", "X O \n", "Current turn: O\n", " X \n", " X O \n", "X O \n", "Эпизод 3, Итоговая награда: 1\n", "Средняя награда: 1.00\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", "O X \n", " \n", "Current turn: O\n", " \n", "O X X \n", " \n", "Current turn: X\n", " \n", "O X X \n", "O \n", "Current turn: O\n", " X \n", "O X X \n", "O \n", "Current turn: X\n", "O X \n", "O X X \n", "O \n", "Эпизод 4, Итоговая награда: -1\n", "Средняя награда: 0.50\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " O \n", "X \n", "Current turn: X\n", " X \n", " O \n", "X O \n", "Current turn: O\n", " X \n", " O X \n", "X O \n", "Current turn: X\n", " X \n", "O O X \n", "X O \n", "Current turn: O\n", " X X \n", "O O X \n", "X O \n", "Current turn: X\n", " X X \n", "O O X \n", "X O O \n", "Current turn: O\n", "X X X \n", "O O X \n", "X O O \n", "Эпизод 5, Итоговая награда: 1\n", "Средняя награда: 0.60\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", "O \n", "Current turn: O\n", " \n", "X X \n", "O \n", "Current turn: X\n", " O \n", "X X \n", "O \n", "Current turn: O\n", " O X \n", "X X \n", "O \n", "Current turn: X\n", "O O X \n", "X X \n", "O \n", "Current turn: O\n", "O O X \n", "X X \n", "O X \n", "Current turn: X\n", "O O X \n", "X O X \n", "O X \n", "Current turn: O\n", "O O X \n", "X O X \n", "O X X \n", "Эпизод 6, Итоговая награда: 1\n", "Средняя награда: 0.67\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " O \n", " \n", "Current turn: O\n", "X \n", "X O \n", " \n", "Current turn: X\n", "X O \n", "X O \n", " \n", "Current turn: O\n", "X O \n", "X O \n", "X \n", "Эпизод 7, Итоговая награда: 1\n", "Средняя награда: 0.71\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", "O \n", " \n", "Current turn: O\n", "X \n", "O X \n", " \n", "Current turn: X\n", "X O \n", "O X \n", " \n", "Current turn: O\n", "X O X \n", "O X \n", " \n", "Current turn: X\n", "X O X \n", "O X \n", " O \n", "Current turn: O\n", "X O X \n", "O X X \n", " O \n", "Current turn: X\n", "X O X \n", "O X X \n", "O O \n", "Current turn: O\n", "X O X \n", "O X X \n", "O O X \n", "Эпизод 8, Итоговая награда: 1\n", "Средняя награда: 0.75\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " O \n", " \n", " X \n", "Current turn: O\n", "X O \n", " \n", " X \n", "Current turn: X\n", "X O \n", " O \n", " X \n", "Current turn: O\n", "X O X \n", " O \n", " X \n", "Current turn: X\n", "X O X \n", "O O \n", " X \n", "Current turn: O\n", "X O X \n", "O O \n", " X X \n", "Current turn: X\n", "X O X \n", "O O O \n", " X X \n", "Эпизод 9, Итоговая награда: -1\n", "Средняя награда: 0.56\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", " O \n", "Current turn: O\n", " X X \n", " \n", " O \n", "Current turn: X\n", " X X \n", " \n", "O O \n", "Current turn: O\n", " X X \n", " X \n", "O O \n", "Current turn: X\n", "O X X \n", " X \n", "O O \n", "Current turn: O\n", "O X X \n", " X X \n", "O O \n", "Current turn: X\n", "O X X \n", "O X X \n", "O O \n", "Эпизод 10, Итоговая награда: -1\n", "Средняя награда: 0.40\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " O \n", " X \n", " \n", "Current turn: O\n", "X O \n", " X \n", " \n", "Current turn: X\n", "X O \n", " X \n", " O \n", "Current turn: O\n", "X O \n", "X X \n", " O \n", "Current turn: X\n", "X O \n", "X X \n", " O O \n", "Current turn: O\n", "X O \n", "X X X \n", " O O \n", "Эпизод 11, Итоговая награда: 1\n", "Средняя награда: 0.45\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " \n", "O X \n", "Current turn: O\n", " \n", " X \n", "O X \n", "Current turn: X\n", " \n", " X \n", "O O X \n", "Current turn: O\n", "X \n", " X \n", "O O X \n", "Current turn: X\n", "X O \n", " X \n", "O O X \n", "Current turn: O\n", "X O \n", "X X \n", "O O X \n", "Current turn: X\n", "X O O \n", "X X \n", "O O X \n", "Current turn: O\n", "X O O \n", "X X X \n", "O O X \n", "Эпизод 12, Итоговая награда: 1\n", "Средняя награда: 0.50\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", "O \n", " X \n", "Current turn: O\n", " X \n", "O \n", " X \n", "Current turn: X\n", "O X \n", "O \n", " X \n", "Current turn: O\n", "O X \n", "O X \n", " X \n", "Current turn: X\n", "O X \n", "O X \n", " O X \n", "Current turn: O\n", "O X X \n", "O X \n", " O X \n", "Current turn: X\n", "O X X \n", "O X \n", "O O X \n", "Эпизод 13, Итоговая награда: -1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " O \n", "X \n", "Current turn: O\n", " \n", "X O \n", "X \n", "Current turn: X\n", "O \n", "X O \n", "X \n", "Current turn: O\n", "O \n", "X X O \n", "X \n", "Current turn: X\n", "O \n", "X X O \n", "X O \n", "Current turn: O\n", "O X \n", "X X O \n", "X O \n", "Current turn: X\n", "O X O \n", "X X O \n", "X O \n", "Эпизод 14, Итоговая награда: -1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", "O \n", " \n", " X \n", "Current turn: O\n", "O \n", " X \n", " X \n", "Current turn: X\n", "O \n", " X \n", "O X \n", "Current turn: O\n", "O \n", "X X \n", "O X \n", "Current turn: X\n", "O \n", "X X \n", "O X O \n", "Current turn: O\n", "O X \n", "X X \n", "O X O \n", "Current turn: X\n", "O X O \n", "X X \n", "O X O \n", "Current turn: O\n", "O X O \n", "X X X \n", "O X O \n", "Эпизод 15, Итоговая награда: 1\n", "Средняя награда: 0.33\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " O \n", " \n", "Current turn: O\n", "X \n", " O \n", " X \n", "Current turn: X\n", "X \n", " O \n", "O X \n", "Current turn: O\n", "X \n", "X O \n", "O X \n", "Current turn: X\n", "X O \n", "X O \n", "O X \n", "Current turn: O\n", "X O \n", "X O \n", "O X X \n", "Current turn: X\n", "X O O \n", "X O \n", "O X X \n", "Current turn: O\n", "X O O \n", "X X O \n", "O X X \n", "Эпизод 16, Итоговая награда: 1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", " \n", "X X O \n", " \n", "Current turn: X\n", " \n", "X X O \n", " O \n", "Current turn: O\n", " \n", "X X O \n", "X O \n", "Current turn: X\n", "O \n", "X X O \n", "X O \n", "Current turn: O\n", "O \n", "X X O \n", "X X O \n", "Current turn: X\n", "O O \n", "X X O \n", "X X O \n", "Эпизод 17, Итоговая награда: -1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " \n", "O X \n", "Current turn: O\n", " \n", " \n", "O X X \n", "Current turn: X\n", " O \n", " \n", "O X X \n", "Current turn: O\n", " X O \n", " \n", "O X X \n", "Current turn: X\n", " X O \n", "O \n", "O X X \n", "Current turn: O\n", " X O \n", "O X \n", "O X X \n", "Current turn: X\n", " X O \n", "O O X \n", "O X X \n", "Эпизод 18, Итоговая награда: -1\n", "Средняя награда: 0.22\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " X O \n", " \n", "Current turn: X\n", " X \n", " X O \n", " O \n", "Current turn: O\n", " X X \n", " X O \n", " O \n", "Current turn: X\n", " X X \n", " X O \n", "O O \n", "Current turn: O\n", " X X \n", "X X O \n", "O O \n", "Current turn: X\n", " X X \n", "X X O \n", "O O O \n", "Эпизод 19, Итоговая награда: -1\n", "Средняя награда: 0.16\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", "O X \n", " \n", " \n", "Current turn: O\n", "O X \n", " X \n", " \n", "Current turn: X\n", "O X \n", " X O \n", " \n", "Current turn: O\n", "O X \n", " X O \n", " X \n", "Current turn: X\n", "O X \n", " X O \n", " O X \n", "Current turn: O\n", "O X X \n", " X O \n", " O X \n", "Current turn: X\n", "O X X \n", "O X O \n", " O X \n", "Current turn: O\n", "O X X \n", "O X O \n", "X O X \n", "Эпизод 20, Итоговая награда: 1\n", "Средняя награда: 0.20\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", " O \n", "Current turn: O\n", " X \n", " X \n", " O \n", "Current turn: X\n", " X \n", " X \n", " O O \n", "Current turn: O\n", " X \n", " X X \n", " O O \n", "Current turn: X\n", " X O \n", " X X \n", " O O \n", "Current turn: O\n", " X O \n", " X X \n", "X O O \n", "Current turn: X\n", " X O \n", "O X X \n", "X O O \n", "Current turn: O\n", "X X O \n", "O X X \n", "X O O \n", "Эпизод 21, Итоговая награда: 0\n", "Средняя награда: 0.19\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", " O \n", "Current turn: O\n", " X \n", " X \n", " O \n", "Current turn: X\n", " X \n", "O X \n", " O \n", "Current turn: O\n", "X X \n", "O X \n", " O \n", "Current turn: X\n", "X X \n", "O X \n", "O O \n", "Current turn: O\n", "X X X \n", "O X \n", "O O \n", "Эпизод 22, Итоговая награда: 1\n", "Средняя награда: 0.23\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " O \n", " X \n", "Current turn: O\n", " X \n", " O \n", " X \n", "Current turn: X\n", "O X \n", " O \n", " X \n", "Current turn: O\n", "O X \n", "X O \n", " X \n", "Current turn: X\n", "O X \n", "X O \n", " X O \n", "Current turn: O\n", "O X \n", "X X O \n", " X O \n", "Эпизод 23, Итоговая награда: 1\n", "Средняя награда: 0.26\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " \n", " O \n", "Current turn: O\n", "X X \n", " \n", " O \n", "Current turn: X\n", "X X \n", " O \n", " O \n", "Current turn: O\n", "X X \n", "X O \n", " O \n", "Current turn: X\n", "X X \n", "X O O \n", " O \n", "Current turn: O\n", "X X X \n", "X O O \n", " O \n", "Эпизод 24, Итоговая награда: 1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", "O \n", " \n", "Current turn: O\n", " X \n", "O X \n", " \n", "Current turn: X\n", "O X \n", "O X \n", " \n", "Current turn: O\n", "O X \n", "O X \n", "X \n", "Эпизод 25, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X \n", "O \n", "Current turn: O\n", " \n", "X \n", "O X \n", "Current turn: X\n", " O \n", "X \n", "O X \n", "Current turn: O\n", " O \n", "X X \n", "O X \n", "Current turn: X\n", " O \n", "X X O \n", "O X \n", "Current turn: O\n", "X O \n", "X X O \n", "O X \n", "Current turn: X\n", "X O \n", "X X O \n", "O X O \n", "Current turn: O\n", "X O X \n", "X X O \n", "O X O \n", "Эпизод 26, Итоговая награда: 0\n", "Средняя награда: 0.31\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", " O \n", "Current turn: O\n", " X \n", " \n", "X O \n", "Current turn: X\n", "O X \n", " \n", "X O \n", "Current turn: O\n", "O X \n", " X \n", "X O \n", "Current turn: X\n", "O O X \n", " X \n", "X O \n", "Current turn: O\n", "O O X \n", "X X \n", "X O \n", "Current turn: X\n", "O O X \n", "X X \n", "X O O \n", "Current turn: O\n", "O O X \n", "X X X \n", "X O O \n", "Эпизод 27, Итоговая награда: 1\n", "Средняя награда: 0.33\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", "X \n", "X O \n", " \n", "Current turn: X\n", "X \n", "X O O \n", " \n", "Current turn: O\n", "X \n", "X O O \n", " X \n", "Current turn: X\n", "X O \n", "X O O \n", " X \n", "Current turn: O\n", "X O \n", "X O O \n", "X X \n", "Эпизод 28, Итоговая награда: 1\n", "Средняя награда: 0.36\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", "O X \n", " \n", " \n", "Current turn: O\n", "O X X \n", " \n", " \n", "Current turn: X\n", "O X X \n", " \n", "O \n", "Current turn: O\n", "O X X \n", " X \n", "O \n", "Current turn: X\n", "O X X \n", " X \n", "O O \n", "Current turn: O\n", "O X X \n", " X X \n", "O O \n", "Current turn: X\n", "O X X \n", " X X \n", "O O O \n", "Эпизод 29, Итоговая награда: -1\n", "Средняя награда: 0.31\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " \n", "O X \n", "Current turn: O\n", " \n", " \n", "O X X \n", "Current turn: X\n", " \n", "O \n", "O X X \n", "Current turn: O\n", " \n", "O X \n", "O X X \n", "Current turn: X\n", " \n", "O X O \n", "O X X \n", "Current turn: O\n", " X \n", "O X O \n", "O X X \n", "Эпизод 30, Итоговая награда: 1\n", "Средняя награда: 0.33\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", " O \n", "Current turn: O\n", " \n", " X \n", " O X \n", "Current turn: X\n", " O \n", " X \n", " O X \n", "Current turn: O\n", " O \n", " X \n", "X O X \n", "Current turn: X\n", " O O \n", " X \n", "X O X \n", "Current turn: O\n", " O O \n", "X X \n", "X O X \n", "Current turn: X\n", " O O \n", "X X O \n", "X O X \n", "Current turn: O\n", "X O O \n", "X X O \n", "X O X \n", "Эпизод 31, Итоговая награда: 1\n", "Средняя награда: 0.35\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X \n", "O \n", "Current turn: O\n", " \n", "X X \n", "O \n", "Current turn: X\n", " O \n", "X X \n", "O \n", "Current turn: O\n", " O \n", "X X X \n", "O \n", "Эпизод 32, Итоговая награда: 1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", "O \n", " \n", "X \n", "Current turn: O\n", "O \n", " X \n", "X \n", "Current turn: X\n", "O \n", " X O \n", "X \n", "Current turn: O\n", "O \n", "X X O \n", "X \n", "Current turn: X\n", "O \n", "X X O \n", "X O \n", "Current turn: O\n", "O X \n", "X X O \n", "X O \n", "Current turn: X\n", "O X \n", "X X O \n", "X O O \n", "Current turn: O\n", "O X X \n", "X X O \n", "X O O \n", "Эпизод 33, Итоговая награда: 1\n", "Средняя награда: 0.39\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " \n", "O X \n", "Current turn: O\n", " \n", " \n", "O X X \n", "Current turn: X\n", " \n", "O \n", "O X X \n", "Current turn: O\n", " \n", "O X \n", "O X X \n", "Current turn: X\n", "O \n", "O X \n", "O X X \n", "Эпизод 34, Итоговая награда: -1\n", "Средняя награда: 0.35\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", "O \n", " \n", "Current turn: O\n", "X X \n", "O \n", " \n", "Current turn: X\n", "X X \n", "O \n", " O \n", "Current turn: O\n", "X X \n", "O \n", " O X \n", "Current turn: X\n", "X X O \n", "O \n", " O X \n", "Current turn: O\n", "X X O \n", "O \n", "X O X \n", "Current turn: X\n", "X X O \n", "O O \n", "X O X \n", "Current turn: O\n", "X X O \n", "O O X \n", "X O X \n", "Эпизод 35, Итоговая награда: 0\n", "Средняя награда: 0.34\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " \n", " O \n", "Current turn: O\n", "X \n", "X \n", " O \n", "Current turn: X\n", "X \n", "X \n", "O O \n", "Current turn: O\n", "X \n", "X \n", "O O X \n", "Current turn: X\n", "X \n", "X O \n", "O O X \n", "Current turn: O\n", "X X \n", "X O \n", "O O X \n", "Current turn: X\n", "X X \n", "X O O \n", "O O X \n", "Current turn: O\n", "X X X \n", "X O O \n", "O O X \n", "Эпизод 36, Итоговая награда: 1\n", "Средняя награда: 0.36\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", "O \n", "Current turn: O\n", " X \n", " X \n", "O \n", "Current turn: X\n", " X \n", " X \n", "O O \n", "Current turn: O\n", "X X \n", " X \n", "O O \n", "Current turn: X\n", "X X \n", "O X \n", "O O \n", "Current turn: O\n", "X X X \n", "O X \n", "O O \n", "Эпизод 37, Итоговая награда: 1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " O X \n", " \n", "Current turn: X\n", " X \n", " O X \n", " O \n", "Current turn: O\n", " X \n", " O X \n", "X O \n", "Current turn: X\n", " X \n", "O O X \n", "X O \n", "Current turn: O\n", "X X \n", "O O X \n", "X O \n", "Current turn: X\n", "X X \n", "O O X \n", "X O O \n", "Current turn: O\n", "X X X \n", "O O X \n", "X O O \n", "Эпизод 38, Итоговая награда: 1\n", "Средняя награда: 0.39\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X X \n", " O \n", " \n", "Current turn: X\n", " X X \n", " O \n", " O \n", "Current turn: O\n", " X X \n", " O \n", "X O \n", "Current turn: X\n", " X X \n", " O \n", "X O O \n", "Current turn: O\n", " X X \n", " X O \n", "X O O \n", "Эпизод 39, Итоговая награда: 1\n", "Средняя награда: 0.41\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " O \n", "X \n", "Current turn: O\n", " \n", " X O \n", "X \n", "Current turn: X\n", " O \n", " X O \n", "X \n", "Current turn: O\n", "X O \n", " X O \n", "X \n", "Current turn: X\n", "X O \n", "O X O \n", "X \n", "Current turn: O\n", "X O \n", "O X O \n", "X X \n", "Current turn: X\n", "X O \n", "O X O \n", "X X O \n", "Current turn: O\n", "X O X \n", "O X O \n", "X X O \n", "Эпизод 40, Итоговая награда: 1\n", "Средняя награда: 0.42\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " O \n", "X \n", "Current turn: X\n", " X \n", " O \n", "X O \n", "Current turn: O\n", " X \n", " X O \n", "X O \n", "Эпизод 41, Итоговая награда: 1\n", "Средняя награда: 0.44\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", "O \n", " \n", "X \n", "Current turn: O\n", "O X \n", " \n", "X \n", "Current turn: X\n", "O X \n", " O \n", "X \n", "Current turn: O\n", "O X \n", " O \n", "X X \n", "Current turn: X\n", "O X \n", " O O \n", "X X \n", "Current turn: O\n", "O X \n", "X O O \n", "X X \n", "Current turn: X\n", "O X O \n", "X O O \n", "X X \n", "Current turn: O\n", "O X O \n", "X O O \n", "X X X \n", "Эпизод 42, Итоговая награда: 1\n", "Средняя награда: 0.45\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X O \n", " \n", "Current turn: O\n", "X \n", " X O \n", " \n", "Current turn: X\n", "X \n", "O X O \n", " \n", "Current turn: O\n", "X \n", "O X O \n", "X \n", "Current turn: X\n", "X O \n", "O X O \n", "X \n", "Current turn: O\n", "X X O \n", "O X O \n", "X \n", "Current turn: X\n", "X X O \n", "O X O \n", "X O \n", "Current turn: O\n", "X X O \n", "O X O \n", "X O X \n", "Эпизод 43, Итоговая награда: 1\n", "Средняя награда: 0.47\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", " O \n", "Current turn: O\n", " X \n", " \n", " O X \n", "Current turn: X\n", " X O \n", " \n", " O X \n", "Current turn: O\n", " X O \n", " X \n", " O X \n", "Current turn: X\n", " X O \n", "O X \n", " O X \n", "Current turn: O\n", "X X O \n", "O X \n", " O X \n", "Эпизод 44, Итоговая награда: 1\n", "Средняя награда: 0.48\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X \n", "O \n", "Current turn: O\n", " \n", "X \n", "O X \n", "Current turn: X\n", " O \n", "X \n", "O X \n", "Current turn: O\n", "X O \n", "X \n", "O X \n", "Current turn: X\n", "X O \n", "X O \n", "O X \n", "Эпизод 45, Итоговая награда: -1\n", "Средняя награда: 0.44\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", " O \n", "Current turn: O\n", " \n", " X \n", " O X \n", "Current turn: X\n", " \n", " O X \n", " O X \n", "Current turn: O\n", " \n", "X O X \n", " O X \n", "Current turn: X\n", " O \n", "X O X \n", " O X \n", "Эпизод 46, Итоговая награда: -1\n", "Средняя награда: 0.41\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " \n", " X O \n", "Current turn: O\n", " \n", "X \n", " X O \n", "Current turn: X\n", " O \n", "X \n", " X O \n", "Current turn: O\n", " O \n", "X \n", "X X O \n", "Current turn: X\n", " O \n", "X O \n", "X X O \n", "Current turn: O\n", " O \n", "X O X \n", "X X O \n", "Current turn: X\n", "O O \n", "X O X \n", "X X O \n", "Эпизод 47, Итоговая награда: -1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " O \n", " \n", "X \n", "Current turn: O\n", " O \n", " \n", "X X \n", "Current turn: X\n", " O O \n", " \n", "X X \n", "Current turn: O\n", "X O O \n", " \n", "X X \n", "Current turn: X\n", "X O O \n", " O \n", "X X \n", "Current turn: O\n", "X O O \n", " O \n", "X X X \n", "Эпизод 48, Итоговая награда: 1\n", "Средняя награда: 0.40\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X O \n", " \n", " \n", "Current turn: O\n", "X O \n", " \n", " X \n", "Current turn: X\n", "X O \n", "O \n", " X \n", "Current turn: O\n", "X O \n", "O \n", "X X \n", "Current turn: X\n", "X O \n", "O O \n", "X X \n", "Current turn: O\n", "X O \n", "O O \n", "X X X \n", "Эпизод 49, Итоговая награда: 1\n", "Средняя награда: 0.41\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " O \n", "X \n", " \n", "Current turn: O\n", " O \n", "X X \n", " \n", "Current turn: X\n", " O \n", "X X \n", " O \n", "Current turn: O\n", " X O \n", "X X \n", " O \n", "Current turn: X\n", " X O \n", "X X \n", " O O \n", "Current turn: O\n", " X O \n", "X X \n", "X O O \n", "Current turn: X\n", " X O \n", "X O X \n", "X O O \n", "Current turn: O\n", "X X O \n", "X O X \n", "X O O \n", "Эпизод 50, Итоговая награда: 1\n", "Средняя награда: 0.42\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", "X O \n", " \n", "Current turn: X\n", " X \n", "X O \n", "O \n", "Current turn: O\n", " X \n", "X O X \n", "O \n", "Current turn: X\n", "O X \n", "X O X \n", "O \n", "Current turn: O\n", "O X \n", "X O X \n", "O X \n", "Current turn: X\n", "O X O \n", "X O X \n", "O X \n", "Эпизод 51, Итоговая награда: -1\n", "Средняя награда: 0.39\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", " \n", "X X O \n", " \n", "Current turn: X\n", " O \n", "X X O \n", " \n", "Current turn: O\n", " X O \n", "X X O \n", " \n", "Current turn: X\n", "O X O \n", "X X O \n", " \n", "Current turn: O\n", "O X O \n", "X X O \n", "X \n", "Current turn: X\n", "O X O \n", "X X O \n", "X O \n", "Current turn: O\n", "O X O \n", "X X O \n", "X O X \n", "Эпизод 52, Итоговая награда: 0\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", " O \n", "Current turn: O\n", " \n", "X X \n", " O \n", "Current turn: X\n", " \n", "X X \n", "O O \n", "Current turn: O\n", " X \n", "X X \n", "O O \n", "Current turn: X\n", "O X \n", "X X \n", "O O \n", "Current turn: O\n", "O X \n", "X X X \n", "O O \n", "Эпизод 53, Итоговая награда: 1\n", "Средняя награда: 0.40\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " X O \n", " \n", "Current turn: X\n", " O X \n", " X O \n", " \n", "Current turn: O\n", " O X \n", "X X O \n", " \n", "Current turn: X\n", "O O X \n", "X X O \n", " \n", "Current turn: O\n", "O O X \n", "X X O \n", "X \n", "Эпизод 54, Итоговая награда: 1\n", "Средняя награда: 0.41\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", " \n", "X O \n", " X \n", "Current turn: X\n", " O \n", "X O \n", " X \n", "Current turn: O\n", " X O \n", "X O \n", " X \n", "Current turn: X\n", " X O \n", "X O \n", " O X \n", "Current turn: O\n", " X O \n", "X O X \n", " O X \n", "Current turn: X\n", " X O \n", "X O X \n", "O O X \n", "Эпизод 55, Итоговая награда: -1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", "O \n", "Current turn: O\n", " X \n", " \n", "O X \n", "Current turn: X\n", " X \n", "O \n", "O X \n", "Current turn: O\n", " X \n", "O \n", "O X X \n", "Current turn: X\n", " X \n", "O O \n", "O X X \n", "Current turn: O\n", " X \n", "O O X \n", "O X X \n", "Эпизод 56, Итоговая награда: 1\n", "Средняя награда: 0.39\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X O \n", " \n", "Current turn: O\n", " \n", " X O \n", " X \n", "Current turn: X\n", "O \n", " X O \n", " X \n", "Current turn: O\n", "O \n", "X X O \n", " X \n", "Current turn: X\n", "O \n", "X X O \n", "O X \n", "Current turn: O\n", "O \n", "X X O \n", "O X X \n", "Current turn: X\n", "O O \n", "X X O \n", "O X X \n", "Current turn: O\n", "O O X \n", "X X O \n", "O X X \n", "Эпизод 57, Итоговая награда: 0\n", "Средняя награда: 0.39\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " O \n", " \n", "X \n", "Current turn: O\n", " O \n", " X \n", "X \n", "Current turn: X\n", " O \n", " X O \n", "X \n", "Current turn: O\n", " O X \n", " X O \n", "X \n", "Эпизод 58, Итоговая награда: 1\n", "Средняя награда: 0.40\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", " X \n", "X O \n", " \n", "Current turn: X\n", "O X \n", "X O \n", " \n", "Current turn: O\n", "O X \n", "X O \n", " X \n", "Current turn: X\n", "O X \n", "X O \n", " X O \n", "Current turn: O\n", "O X \n", "X O \n", "X X O \n", "Current turn: X\n", "O X \n", "X O O \n", "X X O \n", "Эпизод 59, Итоговая награда: -1\n", "Средняя награда: 0.37\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " O X \n", " \n", "Current turn: O\n", " X \n", " O X \n", " \n", "Current turn: X\n", " X \n", " O X \n", "O \n", "Current turn: O\n", " X X \n", " O X \n", "O \n", "Current turn: X\n", "O X X \n", " O X \n", "O \n", "Current turn: O\n", "O X X \n", " O X \n", "O X \n", "Эпизод 60, Итоговая награда: 1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", "O \n", "X \n", " \n", "Current turn: O\n", "O \n", "X \n", " X \n", "Current turn: X\n", "O \n", "X \n", "O X \n", "Current turn: O\n", "O \n", "X X \n", "O X \n", "Current turn: X\n", "O \n", "X X \n", "O X O \n", "Current turn: O\n", "O \n", "X X X \n", "O X O \n", "Эпизод 61, Итоговая награда: 1\n", "Средняя награда: 0.39\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", "O \n", "X \n", " \n", "Current turn: O\n", "O X \n", "X \n", " \n", "Current turn: X\n", "O X \n", "X O \n", " \n", "Current turn: O\n", "O X \n", "X X O \n", " \n", "Current turn: X\n", "O X O \n", "X X O \n", " \n", "Current turn: O\n", "O X O \n", "X X O \n", "X \n", "Current turn: X\n", "O X O \n", "X X O \n", "X O \n", "Эпизод 62, Итоговая награда: -1\n", "Средняя награда: 0.37\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", "X O \n", " \n", "Current turn: X\n", " X \n", "X O \n", " O \n", "Current turn: O\n", " X X \n", "X O \n", " O \n", "Current turn: X\n", " X X \n", "X O \n", "O O \n", "Current turn: O\n", "X X X \n", "X O \n", "O O \n", "Эпизод 63, Итоговая награда: 1\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", " X \n", "X O \n", " \n", "Current turn: X\n", " X \n", "X O \n", "O \n", "Current turn: O\n", "X X \n", "X O \n", "O \n", "Current turn: X\n", "X X \n", "X O O \n", "O \n", "Current turn: O\n", "X X \n", "X O O \n", "O X \n", "Current turn: X\n", "X O X \n", "X O O \n", "O X \n", "Current turn: O\n", "X O X \n", "X O O \n", "O X X \n", "Эпизод 64, Итоговая награда: 0\n", "Средняя награда: 0.38\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " \n", "X O \n", "Current turn: O\n", "X \n", " \n", "X O \n", "Current turn: X\n", "X \n", " O \n", "X O \n", "Current turn: O\n", "X X \n", " O \n", "X O \n", "Current turn: X\n", "X X \n", " O O \n", "X O \n", "Current turn: O\n", "X X \n", " O O \n", "X O X \n", "Current turn: X\n", "X O X \n", " O O \n", "X O X \n", "Эпизод 65, Итоговая награда: -1\n", "Средняя награда: 0.35\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", "O \n", " \n", " X \n", "Current turn: O\n", "O \n", " X \n", " X \n", "Current turn: X\n", "O \n", " O X \n", " X \n", "Current turn: O\n", "O X \n", " O X \n", " X \n", "Эпизод 66, Итоговая награда: 1\n", "Средняя награда: 0.36\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " \n", " O X \n", "Current turn: O\n", " \n", "X \n", " O X \n", "Current turn: X\n", " O \n", "X \n", " O X \n", "Current turn: O\n", " O \n", "X \n", "X O X \n", "Current turn: X\n", " O \n", "X O \n", "X O X \n", "Эпизод 67, Итоговая награда: -1\n", "Средняя награда: 0.34\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " O \n", " X \n", " \n", "Current turn: O\n", " O \n", " X \n", " X \n", "Current turn: X\n", "O O \n", " X \n", " X \n", "Current turn: O\n", "O O \n", " X \n", " X X \n", "Current turn: X\n", "O O \n", "O X \n", " X X \n", "Current turn: O\n", "O O X \n", "O X \n", " X X \n", "Current turn: X\n", "O O X \n", "O X \n", "O X X \n", "Эпизод 68, Итоговая награда: -1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " O X \n", " \n", " \n", "Current turn: O\n", " O X \n", " X \n", " \n", "Current turn: X\n", " O X \n", " X \n", " O \n", "Current turn: O\n", " O X \n", " X \n", "X O \n", "Эпизод 69, Итоговая награда: 1\n", "Средняя награда: 0.33\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", " X \n", "X O \n", " \n", "Current turn: X\n", "O X \n", "X O \n", " \n", "Current turn: O\n", "O X \n", "X O \n", " X \n", "Current turn: X\n", "O X \n", "X O O \n", " X \n", "Current turn: O\n", "O X X \n", "X O O \n", " X \n", "Current turn: X\n", "O X X \n", "X O O \n", "O X \n", "Current turn: O\n", "O X X \n", "X O O \n", "O X X \n", "Эпизод 70, Итоговая награда: 0\n", "Средняя награда: 0.33\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " O \n", " X \n", "Current turn: O\n", "X \n", " O \n", " X \n", "Current turn: X\n", "X \n", " O \n", " O X \n", "Current turn: O\n", "X \n", "X O \n", " O X \n", "Current turn: X\n", "X \n", "X O \n", "O O X \n", "Current turn: O\n", "X X \n", "X O \n", "O O X \n", "Current turn: X\n", "X O X \n", "X O \n", "O O X \n", "Эпизод 71, Итоговая награда: -1\n", "Средняя награда: 0.31\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " O \n", " X \n", "Current turn: X\n", " X \n", " O \n", "O X \n", "Current turn: O\n", " X \n", " O \n", "O X X \n", "Current turn: X\n", " X \n", "O O \n", "O X X \n", "Current turn: O\n", "X X \n", "O O \n", "O X X \n", "Current turn: X\n", "X X \n", "O O O \n", "O X X \n", "Эпизод 72, Итоговая награда: -1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " O \n", " X \n", " \n", "Current turn: O\n", "X O \n", " X \n", " \n", "Current turn: X\n", "X O \n", " X \n", "O \n", "Current turn: O\n", "X O \n", " X \n", "O X \n", "Эпизод 73, Итоговая награда: 1\n", "Средняя награда: 0.30\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", "O \n", "X \n", "Current turn: O\n", " \n", "O X \n", "X \n", "Current turn: X\n", "O \n", "O X \n", "X \n", "Current turn: O\n", "O \n", "O X \n", "X X \n", "Current turn: X\n", "O \n", "O X \n", "X X O \n", "Current turn: O\n", "O X \n", "O X \n", "X X O \n", "Эпизод 74, Итоговая награда: 1\n", "Средняя награда: 0.31\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " O \n", "X \n", "Current turn: O\n", " \n", " O \n", "X X \n", "Current turn: X\n", "O \n", " O \n", "X X \n", "Current turn: O\n", "O X \n", " O \n", "X X \n", "Current turn: X\n", "O X \n", " O \n", "X X O \n", "Current turn: O\n", "O X X \n", " O \n", "X X O \n", "Current turn: X\n", "O X X \n", "O O \n", "X X O \n", "Current turn: O\n", "O X X \n", "O X O \n", "X X O \n", "Эпизод 75, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", "O \n", "Current turn: O\n", " X \n", " X \n", "O \n", "Current turn: X\n", " O X \n", " X \n", "O \n", "Current turn: O\n", " O X \n", "X X \n", "O \n", "Current turn: X\n", " O X \n", "X O X \n", "O \n", "Current turn: O\n", " O X \n", "X O X \n", "O X \n", "Эпизод 76, Итоговая награда: 1\n", "Средняя награда: 0.33\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X X \n", " O \n", " \n", "Current turn: X\n", " X X \n", " O \n", "O \n", "Current turn: O\n", " X X \n", "X O \n", "O \n", "Current turn: X\n", " X X \n", "X O \n", "O O \n", "Current turn: O\n", " X X \n", "X O \n", "O X O \n", "Current turn: X\n", "O X X \n", "X O \n", "O X O \n", "Эпизод 77, Итоговая награда: -1\n", "Средняя награда: 0.31\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " \n", " O \n", "Current turn: O\n", "X \n", " \n", "X O \n", "Current turn: X\n", "X O \n", " \n", "X O \n", "Current turn: O\n", "X O \n", " X \n", "X O \n", "Current turn: X\n", "X O \n", "O X \n", "X O \n", "Current turn: O\n", "X O \n", "O X \n", "X X O \n", "Current turn: X\n", "X O \n", "O X O \n", "X X O \n", "Эпизод 78, Итоговая награда: -1\n", "Средняя награда: 0.29\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " \n", " O \n", "Current turn: O\n", "X \n", " X \n", " O \n", "Current turn: X\n", "X \n", " X \n", "O O \n", "Current turn: O\n", "X \n", "X X \n", "O O \n", "Current turn: X\n", "X O \n", "X X \n", "O O \n", "Current turn: O\n", "X O \n", "X X X \n", "O O \n", "Эпизод 79, Итоговая награда: 1\n", "Средняя награда: 0.30\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " O \n", " X \n", "Current turn: O\n", " \n", " O \n", "X X \n", "Current turn: X\n", " O \n", " O \n", "X X \n", "Current turn: O\n", " O \n", " O X \n", "X X \n", "Current turn: X\n", "O O \n", " O X \n", "X X \n", "Current turn: O\n", "O O \n", "X O X \n", "X X \n", "Current turn: X\n", "O O O \n", "X O X \n", "X X \n", "Эпизод 80, Итоговая награда: -1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " O \n", " X \n", " \n", "Current turn: O\n", " O \n", " X \n", " X \n", "Current turn: X\n", " O \n", " X O \n", " X \n", "Current turn: O\n", " O X \n", " X O \n", " X \n", "Current turn: X\n", "O O X \n", " X O \n", " X \n", "Current turn: O\n", "O O X \n", "X X O \n", " X \n", "Current turn: X\n", "O O X \n", "X X O \n", " O X \n", "Current turn: O\n", "O O X \n", "X X O \n", "X O X \n", "Эпизод 81, Итоговая награда: 1\n", "Средняя награда: 0.30\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " O \n", " X \n", " \n", "Current turn: O\n", " O \n", " X X \n", " \n", "Current turn: X\n", " O \n", " X X \n", "O \n", "Current turn: O\n", "X O \n", " X X \n", "O \n", "Current turn: X\n", "X O \n", " X X \n", "O O \n", "Current turn: O\n", "X O X \n", " X X \n", "O O \n", "Current turn: X\n", "X O X \n", "O X X \n", "O O \n", "Current turn: O\n", "X O X \n", "O X X \n", "O O X \n", "Эпизод 82, Итоговая награда: 1\n", "Средняя награда: 0.30\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " O X \n", " \n", "Current turn: O\n", "X \n", " O X \n", " \n", "Current turn: X\n", "X \n", " O X \n", " O \n", "Current turn: O\n", "X \n", "X O X \n", " O \n", "Current turn: X\n", "X O \n", "X O X \n", " O \n", "Эпизод 83, Итоговая награда: -1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " \n", " \n", "X \n", "Current turn: X\n", " \n", " \n", "X O \n", "Current turn: O\n", " \n", " X \n", "X O \n", "Current turn: X\n", " \n", "O X \n", "X O \n", "Current turn: O\n", "X \n", "O X \n", "X O \n", "Current turn: X\n", "X \n", "O X O \n", "X O \n", "Current turn: O\n", "X X \n", "O X O \n", "X O \n", "Эпизод 84, Итоговая награда: 1\n", "Средняя награда: 0.30\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " \n", "X O \n", " \n", "Current turn: O\n", "X \n", "X O \n", " \n", "Current turn: X\n", "X \n", "X O \n", " O \n", "Current turn: O\n", "X X \n", "X O \n", " O \n", "Current turn: X\n", "X X \n", "X O \n", "O O \n", "Current turn: O\n", "X X \n", "X O \n", "O X O \n", "Current turn: X\n", "X X O \n", "X O \n", "O X O \n", "Эпизод 85, Итоговая награда: -1\n", "Средняя награда: 0.28\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", "O X \n", " \n", " \n", "Current turn: O\n", "O X \n", " \n", " X \n", "Current turn: X\n", "O X O \n", " \n", " X \n", "Current turn: O\n", "O X O \n", "X \n", " X \n", "Current turn: X\n", "O X O \n", "X O \n", " X \n", "Current turn: O\n", "O X O \n", "X O X \n", " X \n", "Current turn: X\n", "O X O \n", "X O X \n", "O X \n", "Эпизод 86, Итоговая награда: -1\n", "Средняя награда: 0.27\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", " \n", " O \n", "Current turn: O\n", "X \n", " \n", " X O \n", "Current turn: X\n", "X \n", " O \n", " X O \n", "Current turn: O\n", "X X \n", " O \n", " X O \n", "Current turn: X\n", "X X \n", " O \n", "O X O \n", "Current turn: O\n", "X X X \n", " O \n", "O X O \n", "Эпизод 87, Итоговая награда: 1\n", "Средняя награда: 0.28\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", "O X \n", " \n", "Current turn: O\n", " \n", "O X X \n", " \n", "Current turn: X\n", " O \n", "O X X \n", " \n", "Current turn: O\n", "X O \n", "O X X \n", " \n", "Current turn: X\n", "X O \n", "O X X \n", " O \n", "Current turn: O\n", "X O \n", "O X X \n", "X O \n", "Current turn: X\n", "X O O \n", "O X X \n", "X O \n", "Current turn: O\n", "X O O \n", "O X X \n", "X O X \n", "Эпизод 88, Итоговая награда: 1\n", "Средняя награда: 0.28\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X \n", " O \n", " X \n", "Current turn: X\n", " X \n", " O \n", " O X \n", "Current turn: O\n", " X \n", "X O \n", " O X \n", "Current turn: X\n", " X \n", "X O O \n", " O X \n", "Current turn: O\n", " X \n", "X O O \n", "X O X \n", "Current turn: X\n", " X O \n", "X O O \n", "X O X \n", "Current turn: O\n", "X X O \n", "X O O \n", "X O X \n", "Эпизод 89, Итоговая награда: 1\n", "Средняя награда: 0.29\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", "O \n", "Current turn: O\n", " \n", " X \n", "O X \n", "Current turn: X\n", " \n", " O X \n", "O X \n", "Current turn: O\n", " X \n", " O X \n", "O X \n", "Эпизод 90, Итоговая награда: 1\n", "Средняя награда: 0.30\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", "O X \n", " \n", "Current turn: O\n", " \n", "O X X \n", " \n", "Current turn: X\n", "O \n", "O X X \n", " \n", "Current turn: O\n", "O \n", "O X X \n", " X \n", "Current turn: X\n", "O O \n", "O X X \n", " X \n", "Current turn: O\n", "O O X \n", "O X X \n", " X \n", "Эпизод 91, Итоговая награда: 1\n", "Средняя награда: 0.31\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X \n", "O \n", " \n", "Current turn: O\n", "X X \n", "O \n", " \n", "Current turn: X\n", "X X \n", "O O \n", " \n", "Current turn: O\n", "X X \n", "O O X \n", " \n", "Current turn: X\n", "X X \n", "O O X \n", "O \n", "Current turn: O\n", "X X X \n", "O O X \n", "O \n", "Эпизод 92, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " O \n", " X \n", " \n", "Current turn: O\n", " O \n", " X \n", " X \n", "Current turn: X\n", " O \n", " X \n", "O X \n", "Current turn: O\n", "X O \n", " X \n", "O X \n", "Current turn: X\n", "X O \n", " O X \n", "O X \n", "Current turn: O\n", "X O X \n", " O X \n", "O X \n", "Эпизод 93, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " \n", "X \n", " \n", "Current turn: X\n", " O \n", "X \n", " \n", "Current turn: O\n", " O \n", "X \n", " X \n", "Current turn: X\n", " O \n", "X O \n", " X \n", "Current turn: O\n", "X O \n", "X O \n", " X \n", "Current turn: X\n", "X O \n", "X O \n", "O X \n", "Эпизод 94, Итоговая награда: -1\n", "Средняя награда: 0.31\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " O \n", " \n", "Current turn: O\n", " X X \n", " O \n", " \n", "Current turn: X\n", " X X \n", " O \n", " O \n", "Current turn: O\n", "X X X \n", " O \n", " O \n", "Эпизод 95, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", "X \n", " \n", " \n", "Current turn: X\n", "X O \n", " \n", " \n", "Current turn: O\n", "X O \n", " \n", " X \n", "Current turn: X\n", "X O \n", " O \n", " X \n", "Current turn: O\n", "X O \n", " O X \n", " X \n", "Current turn: X\n", "X O \n", " O X \n", " X O \n", "Current turn: O\n", "X O \n", "X O X \n", " X O \n", "Current turn: X\n", "X O O \n", "X O X \n", " X O \n", "Current turn: O\n", "X O O \n", "X O X \n", "X X O \n", "Эпизод 96, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " \n", " \n", " X \n", "Current turn: X\n", " \n", " O \n", " X \n", "Current turn: O\n", " \n", "X O \n", " X \n", "Current turn: X\n", "O \n", "X O \n", " X \n", "Current turn: O\n", "O \n", "X O X \n", " X \n", "Current turn: X\n", "O O \n", "X O X \n", " X \n", "Current turn: O\n", "O O \n", "X O X \n", "X X \n", "Current turn: X\n", "O O O \n", "X O X \n", "X X \n", "Эпизод 97, Итоговая награда: -1\n", "Средняя награда: 0.31\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", "O \n", "Current turn: O\n", " \n", "X X \n", "O \n", "Current turn: X\n", " O \n", "X X \n", "O \n", "Current turn: O\n", " O X \n", "X X \n", "O \n", "Current turn: X\n", " O X \n", "X X \n", "O O \n", "Current turn: O\n", "X O X \n", "X X \n", "O O \n", "Current turn: X\n", "X O X \n", "X X O \n", "O O \n", "Current turn: O\n", "X O X \n", "X X O \n", "O O X \n", "Эпизод 98, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " X \n", " \n", " \n", "Current turn: X\n", " X \n", " \n", " O \n", "Current turn: O\n", " X \n", " \n", "X O \n", "Current turn: X\n", " X \n", " O \n", "X O \n", "Current turn: O\n", "X X \n", " O \n", "X O \n", "Current turn: X\n", "X X \n", " O O \n", "X O \n", "Current turn: O\n", "X X \n", "X O O \n", "X O \n", "Эпизод 99, Итоговая награда: 1\n", "Средняя награда: 0.32\n", "Current turn: O\n", " \n", " X \n", " \n", "Current turn: X\n", " \n", " X \n", " O \n", "Current turn: O\n", " X \n", " X \n", " O \n", "Current turn: X\n", "O X \n", " X \n", " O \n", "Current turn: O\n", "O X \n", " X \n", "X O \n", "Current turn: X\n", "O X \n", " X \n", "X O O \n", "Current turn: O\n", "O X \n", " X X \n", "X O O \n", "Current turn: X\n", "O X \n", "O X X \n", "X O O \n", "Current turn: O\n", "O X X \n", "O X X \n", "X O O \n", "Эпизод 100, Итоговая награда: 1\n", "Средняя награда: 0.33\n" ] } ], "source": [ "# Основной цикл обучения агента\n", "\n", "# Создаём игровую среду\n", "game_env = TicTacToeEnv()\n", "\n", "# Создаём агента, играющего крестиками\n", "player_agent = GameAgent(token=1)\n", "\n", "total_episodes = 100 # Количество эпизодов (игр) для обучения\n", "reward_history = [] # Для хранения результатов эпизодов\n", "\n", "# Переменная для отслеживания символа текущего игрока\n", "initial_turn = 1\n", "\n", "for episode in range(total_episodes):\n", " # Сбрасываем состояние игры перед началом нового эпизода\n", " game_state, _ = game_env.reset()\n", "\n", " # Общая награда за эпизод\n", " episode_reward = 0\n", "\n", " # Флаг завершения игры\n", " game_finished = False\n", " current_turn = initial_turn\n", "\n", " # Игровой цикл (до 9 ходов для поля 3x3)\n", " for move_count in range(9): \n", " moves = game_env.available_moves() # Получаем доступные ходы\n", "\n", " # Если ходов нет, игра завершается\n", " if not moves:\n", " break\n", "\n", " # Агент делает выбор\n", " chosen_move = player_agent.select_move(moves) if len(moves) > 1 else moves[0]\n", "\n", " # Выполняем ход и обновляем состояние игры\n", " next_state, reward, game_finished, _ = game_env.step(chosen_move)\n", " episode_reward += reward\n", " game_state = next_state\n", "\n", " # Отображаем текущее состояние\n", " game_env.render()\n", "\n", " # Если игра завершена, выходим\n", " if game_finished:\n", " break\n", "\n", " current_turn = -current_turn # Смена игрока\n", "\n", " reward_history.append(episode_reward)\n", "\n", " # Выводим статистику\n", " print(f\"Эпизод {episode + 1}, Итоговая награда: {episode_reward}\")\n", " avg_reward = sum(reward_history) / len(reward_history)\n", " print(f\"Средняя награда: {avg_reward:.2f}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 2 }