3452 lines
79 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Перевод среды на gymnasium"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import gymnasium as gym\n",
"from gymnasium import spaces\n",
"\n",
"class TicTacToeEnv(gym.Env):\n",
" metadata = {'render.modes': ['human']}\n",
" \n",
" symbols = ['O', ' ', 'X']\n",
"\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.action_space = spaces.Discrete(9)\n",
" self.observation_space = spaces.Discrete(9 * 3 * 2)\n",
" self.reset()\n",
"\n",
" def step(self, action):\n",
" finished = False\n",
" score = 0\n",
"\n",
" player, cell = action # player - игрок (1 или -1), cell - номер клетки\n",
"\n",
" board = self.state['board']\n",
" current_cell = board[cell]\n",
" current_player = self.state['current_turn']\n",
" if current_cell != 0: # Клетка занята\n",
" print(f\"Некорректный ход: Клетка {cell} уже занята.\")\n",
" finished = True\n",
" score = -1 * current_player\n",
" elif player != current_player: # Ход сделан не тем игроком\n",
" print(f\"Некорректный ход: игрок {player} не на очереди.\")\n",
" finished = True\n",
" score = -1 * current_player\n",
" else:\n",
" board[cell] = player\n",
" self.state['current_turn'] = -player\n",
"\n",
" for row in range(3):\n",
" # Проверяем строки и столбцы\n",
" if (board[row * 3] == player and board[row * 3 + 1] == player and board[row * 3 + 2] == player) or \\\n",
" (board[row] == player and board[row + 3] == player and board[row + 6] == player):\n",
" score = player\n",
" finished = True\n",
" break\n",
"\n",
" # Проверяем диагонали\n",
" if (board[0] == player and board[4] == player and board[8] == player) or \\\n",
" (board[2] == player and board[4] == player and board[6] == player):\n",
" score = player\n",
" finished = True\n",
" \n",
" return self.state, score, finished, {}\n",
"\n",
" def reset(self):\n",
" self.state = {\n",
" 'board': [0] * 9, # Поле 3x3\n",
" 'current_turn': 1 # Начинает первый игрок\n",
" }\n",
" return self.state, {}\n",
"\n",
" def render(self, close=False):\n",
" if close:\n",
" return\n",
" print(\"Current turn:\", self.symbols[self.state['current_turn'] + 1])\n",
" for idx in range(9):\n",
" print(self.symbols[self.state['board'][idx] + 1], end=\" \")\n",
" if (idx % 3) == 2:\n",
" print()\n",
"\n",
" def available_moves(self):\n",
" moves = []\n",
" for idx in range(9):\n",
" if self.state['board'][idx] == 0:\n",
" player = self.state['current_turn']\n",
" moves.append([player, idx])\n",
" return moves"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Агент\n",
"Агент - система, которая взаимодействует с окружающей средой, чтобы достичь определенной цели. Задача агента: выработка стратегии, которая максимизирует награду в долгосрочной перспективе.\n",
"Роль агента: агент принимает решение, основываясь на текущем состоянии среды и получает обратную свзяь от среды.\n",
"Функционал агента: принятие решения - использование алгоритмов или стратегий для дальнейшего принятия решения; обучение - обновление знаний или стратегий основываясь на полученный опыт; интерактивность - адаптация к изменениям в среде."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"# Агент, взаимодействующий со средой для выбора стратегий на основе доступных ходов\n",
"class GameAgent:\n",
" def __init__(self, token):\n",
" self.token = token # Символ игрока (1 - X, -1 - O)\n",
" \n",
" def select_move(self, moves):\n",
" return random.choice(moves) # Выбор случайного хода из доступных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Основной цикл обучения"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
" \n",
"X \n",
"X O \n",
"Current turn: X\n",
" O \n",
"X \n",
"X O \n",
"Current turn: O\n",
" O \n",
"X X \n",
"X O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"X O O \n",
"Current turn: O\n",
"X O \n",
"X X \n",
"X O O \n",
"Эпизод 1, Итоговая награда: 1\n",
"Средняя награда: 1.00\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
" \n",
" \n",
"X O X \n",
"Current turn: X\n",
" O \n",
" \n",
"X O X \n",
"Current turn: O\n",
"X O \n",
" \n",
"X O X \n",
"Current turn: X\n",
"X O \n",
"O \n",
"X O X \n",
"Current turn: O\n",
"X O \n",
"O X \n",
"X O X \n",
"Current turn: X\n",
"X O O \n",
"O X \n",
"X O X \n",
"Current turn: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Эпизод 2, Итоговая награда: 1\n",
"Средняя награда: 1.00\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
"X O \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
" X \n",
" X O \n",
"X O \n",
"Эпизод 3, Итоговая награда: 1\n",
"Средняя награда: 1.00\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
"O X \n",
" \n",
"Current turn: O\n",
" \n",
"O X X \n",
" \n",
"Current turn: X\n",
" \n",
"O X X \n",
"O \n",
"Current turn: O\n",
" X \n",
"O X X \n",
"O \n",
"Current turn: X\n",
"O X \n",
"O X X \n",
"O \n",
"Эпизод 4, Итоговая награда: -1\n",
"Средняя награда: 0.50\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
"X \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
" X \n",
" O X \n",
"X O \n",
"Current turn: X\n",
" X \n",
"O O X \n",
"X O \n",
"Current turn: O\n",
" X X \n",
"O O X \n",
"X O \n",
"Current turn: X\n",
" X X \n",
"O O X \n",
"X O O \n",
"Current turn: O\n",
"X X X \n",
"O O X \n",
"X O O \n",
"Эпизод 5, Итоговая награда: 1\n",
"Средняя награда: 0.60\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
"O \n",
"Current turn: O\n",
" \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X X \n",
"O \n",
"Current turn: X\n",
"O O X \n",
"X X \n",
"O \n",
"Current turn: O\n",
"O O X \n",
"X X \n",
"O X \n",
"Current turn: X\n",
"O O X \n",
"X O X \n",
"O X \n",
"Current turn: O\n",
"O O X \n",
"X O X \n",
"O X X \n",
"Эпизод 6, Итоговая награда: 1\n",
"Средняя награда: 0.67\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O \n",
" \n",
"Current turn: X\n",
"X O \n",
"X O \n",
" \n",
"Current turn: O\n",
"X O \n",
"X O \n",
"X \n",
"Эпизод 7, Итоговая награда: 1\n",
"Средняя награда: 0.71\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
"O \n",
" \n",
"Current turn: O\n",
"X \n",
"O X \n",
" \n",
"Current turn: X\n",
"X O \n",
"O X \n",
" \n",
"Current turn: O\n",
"X O X \n",
"O X \n",
" \n",
"Current turn: X\n",
"X O X \n",
"O X \n",
" O \n",
"Current turn: O\n",
"X O X \n",
"O X X \n",
" O \n",
"Current turn: X\n",
"X O X \n",
"O X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"O X X \n",
"O O X \n",
"Эпизод 8, Итоговая награда: 1\n",
"Средняя награда: 0.75\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" O \n",
" \n",
" X \n",
"Current turn: O\n",
"X O \n",
" \n",
" X \n",
"Current turn: X\n",
"X O \n",
" O \n",
" X \n",
"Current turn: O\n",
"X O X \n",
" O \n",
" X \n",
"Current turn: X\n",
"X O X \n",
"O O \n",
" X \n",
"Current turn: O\n",
"X O X \n",
"O O \n",
" X X \n",
"Current turn: X\n",
"X O X \n",
"O O O \n",
" X X \n",
"Эпизод 9, Итоговая награда: -1\n",
"Средняя награда: 0.56\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X X \n",
" \n",
" O \n",
"Current turn: X\n",
" X X \n",
" \n",
"O O \n",
"Current turn: O\n",
" X X \n",
" X \n",
"O O \n",
"Current turn: X\n",
"O X X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"O X X \n",
" X X \n",
"O O \n",
"Current turn: X\n",
"O X X \n",
"O X X \n",
"O O \n",
"Эпизод 10, Итоговая награда: -1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
"X O \n",
" X \n",
" \n",
"Current turn: X\n",
"X O \n",
" X \n",
" O \n",
"Current turn: O\n",
"X O \n",
"X X \n",
" O \n",
"Current turn: X\n",
"X O \n",
"X X \n",
" O O \n",
"Current turn: O\n",
"X O \n",
"X X X \n",
" O O \n",
"Эпизод 11, Итоговая награда: 1\n",
"Средняя награда: 0.45\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" X \n",
"O X \n",
"Current turn: X\n",
" \n",
" X \n",
"O O X \n",
"Current turn: O\n",
"X \n",
" X \n",
"O O X \n",
"Current turn: X\n",
"X O \n",
" X \n",
"O O X \n",
"Current turn: O\n",
"X O \n",
"X X \n",
"O O X \n",
"Current turn: X\n",
"X O O \n",
"X X \n",
"O O X \n",
"Current turn: O\n",
"X O O \n",
"X X X \n",
"O O X \n",
"Эпизод 12, Итоговая награда: 1\n",
"Средняя награда: 0.50\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
"O \n",
" X \n",
"Current turn: O\n",
" X \n",
"O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"O \n",
" X \n",
"Current turn: O\n",
"O X \n",
"O X \n",
" X \n",
"Current turn: X\n",
"O X \n",
"O X \n",
" O X \n",
"Current turn: O\n",
"O X X \n",
"O X \n",
" O X \n",
"Current turn: X\n",
"O X X \n",
"O X \n",
"O O X \n",
"Эпизод 13, Итоговая награда: -1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" O \n",
"X \n",
"Current turn: O\n",
" \n",
"X O \n",
"X \n",
"Current turn: X\n",
"O \n",
"X O \n",
"X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
"X O \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Эпизод 14, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
"O \n",
" \n",
" X \n",
"Current turn: O\n",
"O \n",
" X \n",
" X \n",
"Current turn: X\n",
"O \n",
" X \n",
"O X \n",
"Current turn: O\n",
"O \n",
"X X \n",
"O X \n",
"Current turn: X\n",
"O \n",
"X X \n",
"O X O \n",
"Current turn: O\n",
"O X \n",
"X X \n",
"O X O \n",
"Current turn: X\n",
"O X O \n",
"X X \n",
"O X O \n",
"Current turn: O\n",
"O X O \n",
"X X X \n",
"O X O \n",
"Эпизод 15, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" O \n",
" \n",
"Current turn: O\n",
"X \n",
" O \n",
" X \n",
"Current turn: X\n",
"X \n",
" O \n",
"O X \n",
"Current turn: O\n",
"X \n",
"X O \n",
"O X \n",
"Current turn: X\n",
"X O \n",
"X O \n",
"O X \n",
"Current turn: O\n",
"X O \n",
"X O \n",
"O X X \n",
"Current turn: X\n",
"X O O \n",
"X O \n",
"O X X \n",
"Current turn: O\n",
"X O O \n",
"X X O \n",
"O X X \n",
"Эпизод 16, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" \n",
"X X O \n",
" \n",
"Current turn: X\n",
" \n",
"X X O \n",
" O \n",
"Current turn: O\n",
" \n",
"X X O \n",
"X O \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"X X O \n",
"Current turn: X\n",
"O O \n",
"X X O \n",
"X X O \n",
"Эпизод 17, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" \n",
"O X X \n",
"Current turn: X\n",
" O \n",
" \n",
"O X X \n",
"Current turn: O\n",
" X O \n",
" \n",
"O X X \n",
"Current turn: X\n",
" X O \n",
"O \n",
"O X X \n",
"Current turn: O\n",
" X O \n",
"O X \n",
"O X X \n",
"Current turn: X\n",
" X O \n",
"O O X \n",
"O X X \n",
"Эпизод 18, Итоговая награда: -1\n",
"Средняя награда: 0.22\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" X O \n",
" \n",
"Current turn: X\n",
" X \n",
" X O \n",
" O \n",
"Current turn: O\n",
" X X \n",
" X O \n",
" O \n",
"Current turn: X\n",
" X X \n",
" X O \n",
"O O \n",
"Current turn: O\n",
" X X \n",
"X X O \n",
"O O \n",
"Current turn: X\n",
" X X \n",
"X X O \n",
"O O O \n",
"Эпизод 19, Итоговая награда: -1\n",
"Средняя награда: 0.16\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
"O X \n",
" \n",
" \n",
"Current turn: O\n",
"O X \n",
" X \n",
" \n",
"Current turn: X\n",
"O X \n",
" X O \n",
" \n",
"Current turn: O\n",
"O X \n",
" X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
" X O \n",
" O X \n",
"Current turn: O\n",
"O X X \n",
" X O \n",
" O X \n",
"Current turn: X\n",
"O X X \n",
"O X O \n",
" O X \n",
"Current turn: O\n",
"O X X \n",
"O X O \n",
"X O X \n",
"Эпизод 20, Итоговая награда: 1\n",
"Средняя награда: 0.20\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" X \n",
" O \n",
"Current turn: X\n",
" X \n",
" X \n",
" O O \n",
"Current turn: O\n",
" X \n",
" X X \n",
" O O \n",
"Current turn: X\n",
" X O \n",
" X X \n",
" O O \n",
"Current turn: O\n",
" X O \n",
" X X \n",
"X O O \n",
"Current turn: X\n",
" X O \n",
"O X X \n",
"X O O \n",
"Current turn: O\n",
"X X O \n",
"O X X \n",
"X O O \n",
"Эпизод 21, Итоговая награда: 0\n",
"Средняя награда: 0.19\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" X \n",
" X \n",
" O \n",
"Current turn: X\n",
" X \n",
"O X \n",
" O \n",
"Current turn: O\n",
"X X \n",
"O X \n",
" O \n",
"Current turn: X\n",
"X X \n",
"O X \n",
"O O \n",
"Current turn: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Эпизод 22, Итоговая награда: 1\n",
"Средняя награда: 0.23\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
" X \n",
" O \n",
" X \n",
"Current turn: X\n",
"O X \n",
" O \n",
" X \n",
"Current turn: O\n",
"O X \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" X O \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
" X O \n",
"Эпизод 23, Итоговая награда: 1\n",
"Средняя награда: 0.26\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X X \n",
" \n",
" O \n",
"Current turn: X\n",
"X X \n",
" O \n",
" O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
" O \n",
"Current turn: X\n",
"X X \n",
"X O O \n",
" O \n",
"Current turn: O\n",
"X X X \n",
"X O O \n",
" O \n",
"Эпизод 24, Итоговая награда: 1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
"O \n",
" \n",
"Current turn: O\n",
" X \n",
"O X \n",
" \n",
"Current turn: X\n",
"O X \n",
"O X \n",
" \n",
"Current turn: O\n",
"O X \n",
"O X \n",
"X \n",
"Эпизод 25, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X \n",
"O \n",
"Current turn: O\n",
" \n",
"X \n",
"O X \n",
"Current turn: X\n",
" O \n",
"X \n",
"O X \n",
"Current turn: O\n",
" O \n",
"X X \n",
"O X \n",
"Current turn: X\n",
" O \n",
"X X O \n",
"O X \n",
"Current turn: O\n",
"X O \n",
"X X O \n",
"O X \n",
"Current turn: X\n",
"X O \n",
"X X O \n",
"O X O \n",
"Current turn: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Эпизод 26, Итоговая награда: 0\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
"X O \n",
"Current turn: X\n",
"O X \n",
" \n",
"X O \n",
"Current turn: O\n",
"O X \n",
" X \n",
"X O \n",
"Current turn: X\n",
"O O X \n",
" X \n",
"X O \n",
"Current turn: O\n",
"O O X \n",
"X X \n",
"X O \n",
"Current turn: X\n",
"O O X \n",
"X X \n",
"X O O \n",
"Current turn: O\n",
"O O X \n",
"X X X \n",
"X O O \n",
"Эпизод 27, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O \n",
" \n",
"Current turn: X\n",
"X \n",
"X O O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O O \n",
" X \n",
"Current turn: X\n",
"X O \n",
"X O O \n",
" X \n",
"Current turn: O\n",
"X O \n",
"X O O \n",
"X X \n",
"Эпизод 28, Итоговая награда: 1\n",
"Средняя награда: 0.36\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
"O X \n",
" \n",
" \n",
"Current turn: O\n",
"O X X \n",
" \n",
" \n",
"Current turn: X\n",
"O X X \n",
" \n",
"O \n",
"Current turn: O\n",
"O X X \n",
" X \n",
"O \n",
"Current turn: X\n",
"O X X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"O X X \n",
" X X \n",
"O O \n",
"Current turn: X\n",
"O X X \n",
" X X \n",
"O O O \n",
"Эпизод 29, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" \n",
"O X X \n",
"Current turn: X\n",
" \n",
"O \n",
"O X X \n",
"Current turn: O\n",
" \n",
"O X \n",
"O X X \n",
"Current turn: X\n",
" \n",
"O X O \n",
"O X X \n",
"Current turn: O\n",
" X \n",
"O X O \n",
"O X X \n",
"Эпизод 30, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" \n",
" X \n",
" O X \n",
"Current turn: X\n",
" O \n",
" X \n",
" O X \n",
"Current turn: O\n",
" O \n",
" X \n",
"X O X \n",
"Current turn: X\n",
" O O \n",
" X \n",
"X O X \n",
"Current turn: O\n",
" O O \n",
"X X \n",
"X O X \n",
"Current turn: X\n",
" O O \n",
"X X O \n",
"X O X \n",
"Current turn: O\n",
"X O O \n",
"X X O \n",
"X O X \n",
"Эпизод 31, Итоговая награда: 1\n",
"Средняя награда: 0.35\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X \n",
"O \n",
"Current turn: O\n",
" \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"O \n",
"Current turn: O\n",
" O \n",
"X X X \n",
"O \n",
"Эпизод 32, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
"O \n",
" \n",
"X \n",
"Current turn: O\n",
"O \n",
" X \n",
"X \n",
"Current turn: X\n",
"O \n",
" X O \n",
"X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
"X O \n",
"Current turn: X\n",
"O X \n",
"X X O \n",
"X O O \n",
"Current turn: O\n",
"O X X \n",
"X X O \n",
"X O O \n",
"Эпизод 33, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" \n",
"O X X \n",
"Current turn: X\n",
" \n",
"O \n",
"O X X \n",
"Current turn: O\n",
" \n",
"O X \n",
"O X X \n",
"Current turn: X\n",
"O \n",
"O X \n",
"O X X \n",
"Эпизод 34, Итоговая награда: -1\n",
"Средняя награда: 0.35\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
"O \n",
" \n",
"Current turn: O\n",
"X X \n",
"O \n",
" \n",
"Current turn: X\n",
"X X \n",
"O \n",
" O \n",
"Current turn: O\n",
"X X \n",
"O \n",
" O X \n",
"Current turn: X\n",
"X X O \n",
"O \n",
" O X \n",
"Current turn: O\n",
"X X O \n",
"O \n",
"X O X \n",
"Current turn: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"Current turn: O\n",
"X X O \n",
"O O X \n",
"X O X \n",
"Эпизод 35, Итоговая награда: 0\n",
"Средняя награда: 0.34\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
"X \n",
" O \n",
"Current turn: X\n",
"X \n",
"X \n",
"O O \n",
"Current turn: O\n",
"X \n",
"X \n",
"O O X \n",
"Current turn: X\n",
"X \n",
"X O \n",
"O O X \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O O X \n",
"Current turn: X\n",
"X X \n",
"X O O \n",
"O O X \n",
"Current turn: O\n",
"X X X \n",
"X O O \n",
"O O X \n",
"Эпизод 36, Итоговая награда: 1\n",
"Средняя награда: 0.36\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
"O \n",
"Current turn: O\n",
" X \n",
" X \n",
"O \n",
"Current turn: X\n",
" X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"X X \n",
" X \n",
"O O \n",
"Current turn: X\n",
"X X \n",
"O X \n",
"O O \n",
"Current turn: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Эпизод 37, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O X \n",
" \n",
"Current turn: X\n",
" X \n",
" O X \n",
" O \n",
"Current turn: O\n",
" X \n",
" O X \n",
"X O \n",
"Current turn: X\n",
" X \n",
"O O X \n",
"X O \n",
"Current turn: O\n",
"X X \n",
"O O X \n",
"X O \n",
"Current turn: X\n",
"X X \n",
"O O X \n",
"X O O \n",
"Current turn: O\n",
"X X X \n",
"O O X \n",
"X O O \n",
"Эпизод 38, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X X \n",
" O \n",
" \n",
"Current turn: X\n",
" X X \n",
" O \n",
" O \n",
"Current turn: O\n",
" X X \n",
" O \n",
"X O \n",
"Current turn: X\n",
" X X \n",
" O \n",
"X O O \n",
"Current turn: O\n",
" X X \n",
" X O \n",
"X O O \n",
"Эпизод 39, Итоговая награда: 1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" O \n",
"X \n",
"Current turn: O\n",
" \n",
" X O \n",
"X \n",
"Current turn: X\n",
" O \n",
" X O \n",
"X \n",
"Current turn: O\n",
"X O \n",
" X O \n",
"X \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X \n",
"Current turn: O\n",
"X O \n",
"O X O \n",
"X X \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X X O \n",
"Current turn: O\n",
"X O X \n",
"O X O \n",
"X X O \n",
"Эпизод 40, Итоговая награда: 1\n",
"Средняя награда: 0.42\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
"X \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
" X \n",
" X O \n",
"X O \n",
"Эпизод 41, Итоговая награда: 1\n",
"Средняя награда: 0.44\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
"O \n",
" \n",
"X \n",
"Current turn: O\n",
"O X \n",
" \n",
"X \n",
"Current turn: X\n",
"O X \n",
" O \n",
"X \n",
"Current turn: O\n",
"O X \n",
" O \n",
"X X \n",
"Current turn: X\n",
"O X \n",
" O O \n",
"X X \n",
"Current turn: O\n",
"O X \n",
"X O O \n",
"X X \n",
"Current turn: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"Current turn: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Эпизод 42, Итоговая награда: 1\n",
"Средняя награда: 0.45\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X O \n",
" \n",
"Current turn: O\n",
"X \n",
" X O \n",
" \n",
"Current turn: X\n",
"X \n",
"O X O \n",
" \n",
"Current turn: O\n",
"X \n",
"O X O \n",
"X \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X \n",
"Current turn: O\n",
"X X O \n",
"O X O \n",
"X \n",
"Current turn: X\n",
"X X O \n",
"O X O \n",
"X O \n",
"Current turn: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Эпизод 43, Итоговая награда: 1\n",
"Средняя награда: 0.47\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
" O X \n",
"Current turn: X\n",
" X O \n",
" \n",
" O X \n",
"Current turn: O\n",
" X O \n",
" X \n",
" O X \n",
"Current turn: X\n",
" X O \n",
"O X \n",
" O X \n",
"Current turn: O\n",
"X X O \n",
"O X \n",
" O X \n",
"Эпизод 44, Итоговая награда: 1\n",
"Средняя награда: 0.48\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X \n",
"O \n",
"Current turn: O\n",
" \n",
"X \n",
"O X \n",
"Current turn: X\n",
" O \n",
"X \n",
"O X \n",
"Current turn: O\n",
"X O \n",
"X \n",
"O X \n",
"Current turn: X\n",
"X O \n",
"X O \n",
"O X \n",
"Эпизод 45, Итоговая награда: -1\n",
"Средняя награда: 0.44\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" \n",
" X \n",
" O X \n",
"Current turn: X\n",
" \n",
" O X \n",
" O X \n",
"Current turn: O\n",
" \n",
"X O X \n",
" O X \n",
"Current turn: X\n",
" O \n",
"X O X \n",
" O X \n",
"Эпизод 46, Итоговая награда: -1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
" X O \n",
"Current turn: O\n",
" \n",
"X \n",
" X O \n",
"Current turn: X\n",
" O \n",
"X \n",
" X O \n",
"Current turn: O\n",
" O \n",
"X \n",
"X X O \n",
"Current turn: X\n",
" O \n",
"X O \n",
"X X O \n",
"Current turn: O\n",
" O \n",
"X O X \n",
"X X O \n",
"Current turn: X\n",
"O O \n",
"X O X \n",
"X X O \n",
"Эпизод 47, Итоговая награда: -1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" O \n",
" \n",
"X \n",
"Current turn: O\n",
" O \n",
" \n",
"X X \n",
"Current turn: X\n",
" O O \n",
" \n",
"X X \n",
"Current turn: O\n",
"X O O \n",
" \n",
"X X \n",
"Current turn: X\n",
"X O O \n",
" O \n",
"X X \n",
"Current turn: O\n",
"X O O \n",
" O \n",
"X X X \n",
"Эпизод 48, Итоговая награда: 1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X O \n",
" \n",
" \n",
"Current turn: O\n",
"X O \n",
" \n",
" X \n",
"Current turn: X\n",
"X O \n",
"O \n",
" X \n",
"Current turn: O\n",
"X O \n",
"O \n",
"X X \n",
"Current turn: X\n",
"X O \n",
"O O \n",
"X X \n",
"Current turn: O\n",
"X O \n",
"O O \n",
"X X X \n",
"Эпизод 49, Итоговая награда: 1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" O \n",
"X \n",
" \n",
"Current turn: O\n",
" O \n",
"X X \n",
" \n",
"Current turn: X\n",
" O \n",
"X X \n",
" O \n",
"Current turn: O\n",
" X O \n",
"X X \n",
" O \n",
"Current turn: X\n",
" X O \n",
"X X \n",
" O O \n",
"Current turn: O\n",
" X O \n",
"X X \n",
"X O O \n",
"Current turn: X\n",
" X O \n",
"X O X \n",
"X O O \n",
"Current turn: O\n",
"X X O \n",
"X O X \n",
"X O O \n",
"Эпизод 50, Итоговая награда: 1\n",
"Средняя награда: 0.42\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
" X \n",
"X O \n",
"O \n",
"Current turn: O\n",
" X \n",
"X O X \n",
"O \n",
"Current turn: X\n",
"O X \n",
"X O X \n",
"O \n",
"Current turn: O\n",
"O X \n",
"X O X \n",
"O X \n",
"Current turn: X\n",
"O X O \n",
"X O X \n",
"O X \n",
"Эпизод 51, Итоговая награда: -1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" \n",
"X X O \n",
" \n",
"Current turn: X\n",
" O \n",
"X X O \n",
" \n",
"Current turn: O\n",
" X O \n",
"X X O \n",
" \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
" \n",
"Current turn: O\n",
"O X O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O X O \n",
"X X O \n",
"X O X \n",
"Эпизод 52, Итоговая награда: 0\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" \n",
"X X \n",
" O \n",
"Current turn: X\n",
" \n",
"X X \n",
"O O \n",
"Current turn: O\n",
" X \n",
"X X \n",
"O O \n",
"Current turn: X\n",
"O X \n",
"X X \n",
"O O \n",
"Current turn: O\n",
"O X \n",
"X X X \n",
"O O \n",
"Эпизод 53, Итоговая награда: 1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" X O \n",
" \n",
"Current turn: X\n",
" O X \n",
" X O \n",
" \n",
"Current turn: O\n",
" O X \n",
"X X O \n",
" \n",
"Current turn: X\n",
"O O X \n",
"X X O \n",
" \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
"X \n",
"Эпизод 54, Итоговая награда: 1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" \n",
"X O \n",
" X \n",
"Current turn: X\n",
" O \n",
"X O \n",
" X \n",
"Current turn: O\n",
" X O \n",
"X O \n",
" X \n",
"Current turn: X\n",
" X O \n",
"X O \n",
" O X \n",
"Current turn: O\n",
" X O \n",
"X O X \n",
" O X \n",
"Current turn: X\n",
" X O \n",
"X O X \n",
"O O X \n",
"Эпизод 55, Итоговая награда: -1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
"O \n",
"Current turn: O\n",
" X \n",
" \n",
"O X \n",
"Current turn: X\n",
" X \n",
"O \n",
"O X \n",
"Current turn: O\n",
" X \n",
"O \n",
"O X X \n",
"Current turn: X\n",
" X \n",
"O O \n",
"O X X \n",
"Current turn: O\n",
" X \n",
"O O X \n",
"O X X \n",
"Эпизод 56, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X O \n",
" \n",
"Current turn: O\n",
" \n",
" X O \n",
" X \n",
"Current turn: X\n",
"O \n",
" X O \n",
" X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
" X \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"O X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"O X X \n",
"Current turn: X\n",
"O O \n",
"X X O \n",
"O X X \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
"O X X \n",
"Эпизод 57, Итоговая награда: 0\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" O \n",
" \n",
"X \n",
"Current turn: O\n",
" O \n",
" X \n",
"X \n",
"Current turn: X\n",
" O \n",
" X O \n",
"X \n",
"Current turn: O\n",
" O X \n",
" X O \n",
"X \n",
"Эпизод 58, Итоговая награда: 1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" \n",
"Current turn: O\n",
"O X \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" X O \n",
"Current turn: O\n",
"O X \n",
"X O \n",
"X X O \n",
"Current turn: X\n",
"O X \n",
"X O O \n",
"X X O \n",
"Эпизод 59, Итоговая награда: -1\n",
"Средняя награда: 0.37\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" O X \n",
" \n",
"Current turn: O\n",
" X \n",
" O X \n",
" \n",
"Current turn: X\n",
" X \n",
" O X \n",
"O \n",
"Current turn: O\n",
" X X \n",
" O X \n",
"O \n",
"Current turn: X\n",
"O X X \n",
" O X \n",
"O \n",
"Current turn: O\n",
"O X X \n",
" O X \n",
"O X \n",
"Эпизод 60, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
"O \n",
"X \n",
" \n",
"Current turn: O\n",
"O \n",
"X \n",
" X \n",
"Current turn: X\n",
"O \n",
"X \n",
"O X \n",
"Current turn: O\n",
"O \n",
"X X \n",
"O X \n",
"Current turn: X\n",
"O \n",
"X X \n",
"O X O \n",
"Current turn: O\n",
"O \n",
"X X X \n",
"O X O \n",
"Эпизод 61, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
"O \n",
"X \n",
" \n",
"Current turn: O\n",
"O X \n",
"X \n",
" \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
" \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
" \n",
"Current turn: O\n",
"O X O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Эпизод 62, Итоговая награда: -1\n",
"Средняя награда: 0.37\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
" X \n",
"X O \n",
" O \n",
"Current turn: O\n",
" X X \n",
"X O \n",
" O \n",
"Current turn: X\n",
" X X \n",
"X O \n",
"O O \n",
"Current turn: O\n",
"X X X \n",
"X O \n",
"O O \n",
"Эпизод 63, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
" X \n",
"X O \n",
"O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O \n",
"Current turn: X\n",
"X X \n",
"X O O \n",
"O \n",
"Current turn: O\n",
"X X \n",
"X O O \n",
"O X \n",
"Current turn: X\n",
"X O X \n",
"X O O \n",
"O X \n",
"Current turn: O\n",
"X O X \n",
"X O O \n",
"O X X \n",
"Эпизод 64, Итоговая награда: 0\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
"X \n",
" \n",
"X O \n",
"Current turn: X\n",
"X \n",
" O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
" O \n",
"X O \n",
"Current turn: X\n",
"X X \n",
" O O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
" O O \n",
"X O X \n",
"Current turn: X\n",
"X O X \n",
" O O \n",
"X O X \n",
"Эпизод 65, Итоговая награда: -1\n",
"Средняя награда: 0.35\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
"O \n",
" \n",
" X \n",
"Current turn: O\n",
"O \n",
" X \n",
" X \n",
"Current turn: X\n",
"O \n",
" O X \n",
" X \n",
"Current turn: O\n",
"O X \n",
" O X \n",
" X \n",
"Эпизод 66, Итоговая награда: 1\n",
"Средняя награда: 0.36\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
" O X \n",
"Current turn: O\n",
" \n",
"X \n",
" O X \n",
"Current turn: X\n",
" O \n",
"X \n",
" O X \n",
"Current turn: O\n",
" O \n",
"X \n",
"X O X \n",
"Current turn: X\n",
" O \n",
"X O \n",
"X O X \n",
"Эпизод 67, Итоговая награда: -1\n",
"Средняя награда: 0.34\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X \n",
" X \n",
"Current turn: X\n",
"O O \n",
" X \n",
" X \n",
"Current turn: O\n",
"O O \n",
" X \n",
" X X \n",
"Current turn: X\n",
"O O \n",
"O X \n",
" X X \n",
"Current turn: O\n",
"O O X \n",
"O X \n",
" X X \n",
"Current turn: X\n",
"O O X \n",
"O X \n",
"O X X \n",
"Эпизод 68, Итоговая награда: -1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" O X \n",
" \n",
" \n",
"Current turn: O\n",
" O X \n",
" X \n",
" \n",
"Current turn: X\n",
" O X \n",
" X \n",
" O \n",
"Current turn: O\n",
" O X \n",
" X \n",
"X O \n",
"Эпизод 69, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" \n",
"Current turn: O\n",
"O X \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"X O O \n",
" X \n",
"Current turn: O\n",
"O X X \n",
"X O O \n",
" X \n",
"Current turn: X\n",
"O X X \n",
"X O O \n",
"O X \n",
"Current turn: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Эпизод 70, Итоговая награда: 0\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
"X \n",
" O \n",
" X \n",
"Current turn: X\n",
"X \n",
" O \n",
" O X \n",
"Current turn: O\n",
"X \n",
"X O \n",
" O X \n",
"Current turn: X\n",
"X \n",
"X O \n",
"O O X \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O O X \n",
"Current turn: X\n",
"X O X \n",
"X O \n",
"O O X \n",
"Эпизод 71, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
" X \n",
"Current turn: X\n",
" X \n",
" O \n",
"O X \n",
"Current turn: O\n",
" X \n",
" O \n",
"O X X \n",
"Current turn: X\n",
" X \n",
"O O \n",
"O X X \n",
"Current turn: O\n",
"X X \n",
"O O \n",
"O X X \n",
"Current turn: X\n",
"X X \n",
"O O O \n",
"O X X \n",
"Эпизод 72, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
"X O \n",
" X \n",
" \n",
"Current turn: X\n",
"X O \n",
" X \n",
"O \n",
"Current turn: O\n",
"X O \n",
" X \n",
"O X \n",
"Эпизод 73, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
"O \n",
"X \n",
"Current turn: O\n",
" \n",
"O X \n",
"X \n",
"Current turn: X\n",
"O \n",
"O X \n",
"X \n",
"Current turn: O\n",
"O \n",
"O X \n",
"X X \n",
"Current turn: X\n",
"O \n",
"O X \n",
"X X O \n",
"Current turn: O\n",
"O X \n",
"O X \n",
"X X O \n",
"Эпизод 74, Итоговая награда: 1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" O \n",
"X \n",
"Current turn: O\n",
" \n",
" O \n",
"X X \n",
"Current turn: X\n",
"O \n",
" O \n",
"X X \n",
"Current turn: O\n",
"O X \n",
" O \n",
"X X \n",
"Current turn: X\n",
"O X \n",
" O \n",
"X X O \n",
"Current turn: O\n",
"O X X \n",
" O \n",
"X X O \n",
"Current turn: X\n",
"O X X \n",
"O O \n",
"X X O \n",
"Current turn: O\n",
"O X X \n",
"O X O \n",
"X X O \n",
"Эпизод 75, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
"O \n",
"Current turn: O\n",
" X \n",
" X \n",
"O \n",
"Current turn: X\n",
" O X \n",
" X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O X \n",
"X O X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X O X \n",
"O X \n",
"Эпизод 76, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X X \n",
" O \n",
" \n",
"Current turn: X\n",
" X X \n",
" O \n",
"O \n",
"Current turn: O\n",
" X X \n",
"X O \n",
"O \n",
"Current turn: X\n",
" X X \n",
"X O \n",
"O O \n",
"Current turn: O\n",
" X X \n",
"X O \n",
"O X O \n",
"Current turn: X\n",
"O X X \n",
"X O \n",
"O X O \n",
"Эпизод 77, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
" \n",
"X O \n",
"Current turn: X\n",
"X O \n",
" \n",
"X O \n",
"Current turn: O\n",
"X O \n",
" X \n",
"X O \n",
"Current turn: X\n",
"X O \n",
"O X \n",
"X O \n",
"Current turn: O\n",
"X O \n",
"O X \n",
"X X O \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X X O \n",
"Эпизод 78, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
" X \n",
" O \n",
"Current turn: X\n",
"X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"X \n",
"X X \n",
"O O \n",
"Current turn: X\n",
"X O \n",
"X X \n",
"O O \n",
"Current turn: O\n",
"X O \n",
"X X X \n",
"O O \n",
"Эпизод 79, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
" \n",
" O \n",
"X X \n",
"Current turn: X\n",
" O \n",
" O \n",
"X X \n",
"Current turn: O\n",
" O \n",
" O X \n",
"X X \n",
"Current turn: X\n",
"O O \n",
" O X \n",
"X X \n",
"Current turn: O\n",
"O O \n",
"X O X \n",
"X X \n",
"Current turn: X\n",
"O O O \n",
"X O X \n",
"X X \n",
"Эпизод 80, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X \n",
" X \n",
"Current turn: X\n",
" O \n",
" X O \n",
" X \n",
"Current turn: O\n",
" O X \n",
" X O \n",
" X \n",
"Current turn: X\n",
"O O X \n",
" X O \n",
" X \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
" X \n",
"Current turn: X\n",
"O O X \n",
"X X O \n",
" O X \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
"X O X \n",
"Эпизод 81, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X X \n",
" \n",
"Current turn: X\n",
" O \n",
" X X \n",
"O \n",
"Current turn: O\n",
"X O \n",
" X X \n",
"O \n",
"Current turn: X\n",
"X O \n",
" X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
" X X \n",
"O O \n",
"Current turn: X\n",
"X O X \n",
"O X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"O X X \n",
"O O X \n",
"Эпизод 82, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" O X \n",
" \n",
"Current turn: O\n",
"X \n",
" O X \n",
" \n",
"Current turn: X\n",
"X \n",
" O X \n",
" O \n",
"Current turn: O\n",
"X \n",
"X O X \n",
" O \n",
"Current turn: X\n",
"X O \n",
"X O X \n",
" O \n",
"Эпизод 83, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
" \n",
" X \n",
"X O \n",
"Current turn: X\n",
" \n",
"O X \n",
"X O \n",
"Current turn: O\n",
"X \n",
"O X \n",
"X O \n",
"Current turn: X\n",
"X \n",
"O X O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
"O X O \n",
"X O \n",
"Эпизод 84, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O \n",
" \n",
"Current turn: X\n",
"X \n",
"X O \n",
" O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
" O \n",
"Current turn: X\n",
"X X \n",
"X O \n",
"O O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O X O \n",
"Current turn: X\n",
"X X O \n",
"X O \n",
"O X O \n",
"Эпизод 85, Итоговая награда: -1\n",
"Средняя награда: 0.28\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
"O X \n",
" \n",
" \n",
"Current turn: O\n",
"O X \n",
" \n",
" X \n",
"Current turn: X\n",
"O X O \n",
" \n",
" X \n",
"Current turn: O\n",
"O X O \n",
"X \n",
" X \n",
"Current turn: X\n",
"O X O \n",
"X O \n",
" X \n",
"Current turn: O\n",
"O X O \n",
"X O X \n",
" X \n",
"Current turn: X\n",
"O X O \n",
"X O X \n",
"O X \n",
"Эпизод 86, Итоговая награда: -1\n",
"Средняя награда: 0.27\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
" \n",
" X O \n",
"Current turn: X\n",
"X \n",
" O \n",
" X O \n",
"Current turn: O\n",
"X X \n",
" O \n",
" X O \n",
"Current turn: X\n",
"X X \n",
" O \n",
"O X O \n",
"Current turn: O\n",
"X X X \n",
" O \n",
"O X O \n",
"Эпизод 87, Итоговая награда: 1\n",
"Средняя награда: 0.28\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
"O X \n",
" \n",
"Current turn: O\n",
" \n",
"O X X \n",
" \n",
"Current turn: X\n",
" O \n",
"O X X \n",
" \n",
"Current turn: O\n",
"X O \n",
"O X X \n",
" \n",
"Current turn: X\n",
"X O \n",
"O X X \n",
" O \n",
"Current turn: O\n",
"X O \n",
"O X X \n",
"X O \n",
"Current turn: X\n",
"X O O \n",
"O X X \n",
"X O \n",
"Current turn: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Эпизод 88, Итоговая награда: 1\n",
"Средняя награда: 0.28\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
" X \n",
"Current turn: X\n",
" X \n",
" O \n",
" O X \n",
"Current turn: O\n",
" X \n",
"X O \n",
" O X \n",
"Current turn: X\n",
" X \n",
"X O O \n",
" O X \n",
"Current turn: O\n",
" X \n",
"X O O \n",
"X O X \n",
"Current turn: X\n",
" X O \n",
"X O O \n",
"X O X \n",
"Current turn: O\n",
"X X O \n",
"X O O \n",
"X O X \n",
"Эпизод 89, Итоговая награда: 1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
"O \n",
"Current turn: O\n",
" \n",
" X \n",
"O X \n",
"Current turn: X\n",
" \n",
" O X \n",
"O X \n",
"Current turn: O\n",
" X \n",
" O X \n",
"O X \n",
"Эпизод 90, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
"O X \n",
" \n",
"Current turn: O\n",
" \n",
"O X X \n",
" \n",
"Current turn: X\n",
"O \n",
"O X X \n",
" \n",
"Current turn: O\n",
"O \n",
"O X X \n",
" X \n",
"Current turn: X\n",
"O O \n",
"O X X \n",
" X \n",
"Current turn: O\n",
"O O X \n",
"O X X \n",
" X \n",
"Эпизод 91, Итоговая награда: 1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
"O \n",
" \n",
"Current turn: O\n",
"X X \n",
"O \n",
" \n",
"Current turn: X\n",
"X X \n",
"O O \n",
" \n",
"Current turn: O\n",
"X X \n",
"O O X \n",
" \n",
"Current turn: X\n",
"X X \n",
"O O X \n",
"O \n",
"Current turn: O\n",
"X X X \n",
"O O X \n",
"O \n",
"Эпизод 92, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X \n",
" X \n",
"Current turn: X\n",
" O \n",
" X \n",
"O X \n",
"Current turn: O\n",
"X O \n",
" X \n",
"O X \n",
"Current turn: X\n",
"X O \n",
" O X \n",
"O X \n",
"Current turn: O\n",
"X O X \n",
" O X \n",
"O X \n",
"Эпизод 93, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" O \n",
"X \n",
" \n",
"Current turn: O\n",
" O \n",
"X \n",
" X \n",
"Current turn: X\n",
" O \n",
"X O \n",
" X \n",
"Current turn: O\n",
"X O \n",
"X O \n",
" X \n",
"Current turn: X\n",
"X O \n",
"X O \n",
"O X \n",
"Эпизод 94, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X X \n",
" O \n",
" \n",
"Current turn: X\n",
" X X \n",
" O \n",
" O \n",
"Current turn: O\n",
"X X X \n",
" O \n",
" O \n",
"Эпизод 95, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X O \n",
" \n",
" \n",
"Current turn: O\n",
"X O \n",
" \n",
" X \n",
"Current turn: X\n",
"X O \n",
" O \n",
" X \n",
"Current turn: O\n",
"X O \n",
" O X \n",
" X \n",
"Current turn: X\n",
"X O \n",
" O X \n",
" X O \n",
"Current turn: O\n",
"X O \n",
"X O X \n",
" X O \n",
"Current turn: X\n",
"X O O \n",
"X O X \n",
" X O \n",
"Current turn: O\n",
"X O O \n",
"X O X \n",
"X X O \n",
"Эпизод 96, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
" \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O \n",
"X O \n",
" X \n",
"Current turn: O\n",
"O \n",
"X O X \n",
" X \n",
"Current turn: X\n",
"O O \n",
"X O X \n",
" X \n",
"Current turn: O\n",
"O O \n",
"X O X \n",
"X X \n",
"Current turn: X\n",
"O O O \n",
"X O X \n",
"X X \n",
"Эпизод 97, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
"O \n",
"Current turn: O\n",
" \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O X \n",
"X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"X X \n",
"O O \n",
"Current turn: X\n",
"X O X \n",
"X X O \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"X X O \n",
"O O X \n",
"Эпизод 98, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
"X O \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
" O \n",
"X O \n",
"Current turn: X\n",
"X X \n",
" O O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
"X O O \n",
"X O \n",
"Эпизод 99, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" X \n",
" X \n",
" O \n",
"Current turn: X\n",
"O X \n",
" X \n",
" O \n",
"Current turn: O\n",
"O X \n",
" X \n",
"X O \n",
"Current turn: X\n",
"O X \n",
" X \n",
"X O O \n",
"Current turn: O\n",
"O X \n",
" X X \n",
"X O O \n",
"Current turn: X\n",
"O X \n",
"O X X \n",
"X O O \n",
"Current turn: O\n",
"O X X \n",
"O X X \n",
"X O O \n",
"Эпизод 100, Итоговая награда: 1\n",
"Средняя награда: 0.33\n"
]
}
],
"source": [
"# Основной цикл обучения агента\n",
"\n",
"# Создаём игровую среду\n",
"game_env = TicTacToeEnv()\n",
"\n",
"# Создаём агента, играющего крестиками\n",
"player_agent = GameAgent(token=1)\n",
"\n",
"total_episodes = 100 # Количество эпизодов (игр) для обучения\n",
"reward_history = [] # Для хранения результатов эпизодов\n",
"\n",
"# Переменная для отслеживания символа текущего игрока\n",
"initial_turn = 1\n",
"\n",
"for episode in range(total_episodes):\n",
" # Сбрасываем состояние игры перед началом нового эпизода\n",
" game_state, _ = game_env.reset()\n",
"\n",
" # Общая награда за эпизод\n",
" episode_reward = 0\n",
"\n",
" # Флаг завершения игры\n",
" game_finished = False\n",
" current_turn = initial_turn\n",
"\n",
" # Игровой цикл (до 9 ходов для поля 3x3)\n",
" for move_count in range(9): \n",
" moves = game_env.available_moves() # Получаем доступные ходы\n",
"\n",
" # Если ходов нет, игра завершается\n",
" if not moves:\n",
" break\n",
"\n",
" # Агент делает выбор\n",
" chosen_move = player_agent.select_move(moves) if len(moves) > 1 else moves[0]\n",
"\n",
" # Выполняем ход и обновляем состояние игры\n",
" next_state, reward, game_finished, _ = game_env.step(chosen_move)\n",
" episode_reward += reward\n",
" game_state = next_state\n",
"\n",
" # Отображаем текущее состояние\n",
" game_env.render()\n",
"\n",
" # Если игра завершена, выходим\n",
" if game_finished:\n",
" break\n",
"\n",
" current_turn = -current_turn # Смена игрока\n",
"\n",
" reward_history.append(episode_reward)\n",
"\n",
" # Выводим статистику\n",
" print(f\"Эпизод {episode + 1}, Итоговая награда: {episode_reward}\")\n",
" avg_reward = sum(reward_history) / len(reward_history)\n",
" print(f\"Средняя награда: {avg_reward:.2f}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}