3452 lines
79 KiB
Plaintext
Raw Permalink Normal View History

2024-12-11 21:11:06 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Перевод среды на gymnasium"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import gymnasium as gym\n",
"from gymnasium import spaces\n",
"\n",
"class TicTacToeEnv(gym.Env):\n",
" metadata = {'render.modes': ['human']}\n",
" \n",
" symbols = ['O', ' ', 'X']\n",
"\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.action_space = spaces.Discrete(9)\n",
" self.observation_space = spaces.Discrete(9 * 3 * 2)\n",
" self.reset()\n",
"\n",
" def step(self, action):\n",
" finished = False\n",
" score = 0\n",
"\n",
" player, cell = action # player - игрок (1 или -1), cell - номер клетки\n",
"\n",
" board = self.state['board']\n",
" current_cell = board[cell]\n",
" current_player = self.state['current_turn']\n",
" if current_cell != 0: # Клетка занята\n",
" print(f\"Некорректный ход: Клетка {cell} уже занята.\")\n",
" finished = True\n",
" score = -1 * current_player\n",
" elif player != current_player: # Ход сделан не тем игроком\n",
" print(f\"Некорректный ход: игрок {player} не на очереди.\")\n",
" finished = True\n",
" score = -1 * current_player\n",
" else:\n",
" board[cell] = player\n",
" self.state['current_turn'] = -player\n",
"\n",
" for row in range(3):\n",
" # Проверяем строки и столбцы\n",
" if (board[row * 3] == player and board[row * 3 + 1] == player and board[row * 3 + 2] == player) or \\\n",
" (board[row] == player and board[row + 3] == player and board[row + 6] == player):\n",
" score = player\n",
" finished = True\n",
" break\n",
"\n",
" # Проверяем диагонали\n",
" if (board[0] == player and board[4] == player and board[8] == player) or \\\n",
" (board[2] == player and board[4] == player and board[6] == player):\n",
" score = player\n",
" finished = True\n",
" \n",
" return self.state, score, finished, {}\n",
"\n",
" def reset(self):\n",
" self.state = {\n",
" 'board': [0] * 9, # Поле 3x3\n",
" 'current_turn': 1 # Начинает первый игрок\n",
" }\n",
" return self.state, {}\n",
"\n",
" def render(self, close=False):\n",
" if close:\n",
" return\n",
" print(\"Current turn:\", self.symbols[self.state['current_turn'] + 1])\n",
" for idx in range(9):\n",
" print(self.symbols[self.state['board'][idx] + 1], end=\" \")\n",
" if (idx % 3) == 2:\n",
" print()\n",
"\n",
" def available_moves(self):\n",
" moves = []\n",
" for idx in range(9):\n",
" if self.state['board'][idx] == 0:\n",
" player = self.state['current_turn']\n",
" moves.append([player, idx])\n",
" return moves"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Агент\n",
"Агент - система, которая взаимодействует с окружающей средой, чтобы достичь определенной цели. Задача агента: выработка стратегии, которая максимизирует награду в долгосрочной перспективе.\n",
"Роль агента: агент принимает решение, основываясь на текущем состоянии среды и получает обратную свзяь от среды.\n",
"Функционал агента: принятие решения - использование алгоритмов или стратегий для дальнейшего принятия решения; обучение - обновление знаний или стратегий основываясь на полученный опыт; интерактивность - адаптация к изменениям в среде."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"# Агент, взаимодействующий со средой для выбора стратегий на основе доступных ходов\n",
"class GameAgent:\n",
" def __init__(self, token):\n",
" self.token = token # Символ игрока (1 - X, -1 - O)\n",
" \n",
" def select_move(self, moves):\n",
" return random.choice(moves) # Выбор случайного хода из доступных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Основной цикл обучения"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
" \n",
"X \n",
"X O \n",
"Current turn: X\n",
" O \n",
"X \n",
"X O \n",
"Current turn: O\n",
" O \n",
"X X \n",
"X O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"X O O \n",
"Current turn: O\n",
"X O \n",
"X X \n",
"X O O \n",
"Эпизод 1, Итоговая награда: 1\n",
"Средняя награда: 1.00\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
" \n",
" \n",
"X O X \n",
"Current turn: X\n",
" O \n",
" \n",
"X O X \n",
"Current turn: O\n",
"X O \n",
" \n",
"X O X \n",
"Current turn: X\n",
"X O \n",
"O \n",
"X O X \n",
"Current turn: O\n",
"X O \n",
"O X \n",
"X O X \n",
"Current turn: X\n",
"X O O \n",
"O X \n",
"X O X \n",
"Current turn: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Эпизод 2, Итоговая награда: 1\n",
"Средняя награда: 1.00\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
"X O \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
" X \n",
" X O \n",
"X O \n",
"Эпизод 3, Итоговая награда: 1\n",
"Средняя награда: 1.00\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
"O X \n",
" \n",
"Current turn: O\n",
" \n",
"O X X \n",
" \n",
"Current turn: X\n",
" \n",
"O X X \n",
"O \n",
"Current turn: O\n",
" X \n",
"O X X \n",
"O \n",
"Current turn: X\n",
"O X \n",
"O X X \n",
"O \n",
"Эпизод 4, Итоговая награда: -1\n",
"Средняя награда: 0.50\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
"X \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
" X \n",
" O X \n",
"X O \n",
"Current turn: X\n",
" X \n",
"O O X \n",
"X O \n",
"Current turn: O\n",
" X X \n",
"O O X \n",
"X O \n",
"Current turn: X\n",
" X X \n",
"O O X \n",
"X O O \n",
"Current turn: O\n",
"X X X \n",
"O O X \n",
"X O O \n",
"Эпизод 5, Итоговая награда: 1\n",
"Средняя награда: 0.60\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
"O \n",
"Current turn: O\n",
" \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X X \n",
"O \n",
"Current turn: X\n",
"O O X \n",
"X X \n",
"O \n",
"Current turn: O\n",
"O O X \n",
"X X \n",
"O X \n",
"Current turn: X\n",
"O O X \n",
"X O X \n",
"O X \n",
"Current turn: O\n",
"O O X \n",
"X O X \n",
"O X X \n",
"Эпизод 6, Итоговая награда: 1\n",
"Средняя награда: 0.67\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O \n",
" \n",
"Current turn: X\n",
"X O \n",
"X O \n",
" \n",
"Current turn: O\n",
"X O \n",
"X O \n",
"X \n",
"Эпизод 7, Итоговая награда: 1\n",
"Средняя награда: 0.71\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
"O \n",
" \n",
"Current turn: O\n",
"X \n",
"O X \n",
" \n",
"Current turn: X\n",
"X O \n",
"O X \n",
" \n",
"Current turn: O\n",
"X O X \n",
"O X \n",
" \n",
"Current turn: X\n",
"X O X \n",
"O X \n",
" O \n",
"Current turn: O\n",
"X O X \n",
"O X X \n",
" O \n",
"Current turn: X\n",
"X O X \n",
"O X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"O X X \n",
"O O X \n",
"Эпизод 8, Итоговая награда: 1\n",
"Средняя награда: 0.75\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" O \n",
" \n",
" X \n",
"Current turn: O\n",
"X O \n",
" \n",
" X \n",
"Current turn: X\n",
"X O \n",
" O \n",
" X \n",
"Current turn: O\n",
"X O X \n",
" O \n",
" X \n",
"Current turn: X\n",
"X O X \n",
"O O \n",
" X \n",
"Current turn: O\n",
"X O X \n",
"O O \n",
" X X \n",
"Current turn: X\n",
"X O X \n",
"O O O \n",
" X X \n",
"Эпизод 9, Итоговая награда: -1\n",
"Средняя награда: 0.56\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X X \n",
" \n",
" O \n",
"Current turn: X\n",
" X X \n",
" \n",
"O O \n",
"Current turn: O\n",
" X X \n",
" X \n",
"O O \n",
"Current turn: X\n",
"O X X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"O X X \n",
" X X \n",
"O O \n",
"Current turn: X\n",
"O X X \n",
"O X X \n",
"O O \n",
"Эпизод 10, Итоговая награда: -1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
"X O \n",
" X \n",
" \n",
"Current turn: X\n",
"X O \n",
" X \n",
" O \n",
"Current turn: O\n",
"X O \n",
"X X \n",
" O \n",
"Current turn: X\n",
"X O \n",
"X X \n",
" O O \n",
"Current turn: O\n",
"X O \n",
"X X X \n",
" O O \n",
"Эпизод 11, Итоговая награда: 1\n",
"Средняя награда: 0.45\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" X \n",
"O X \n",
"Current turn: X\n",
" \n",
" X \n",
"O O X \n",
"Current turn: O\n",
"X \n",
" X \n",
"O O X \n",
"Current turn: X\n",
"X O \n",
" X \n",
"O O X \n",
"Current turn: O\n",
"X O \n",
"X X \n",
"O O X \n",
"Current turn: X\n",
"X O O \n",
"X X \n",
"O O X \n",
"Current turn: O\n",
"X O O \n",
"X X X \n",
"O O X \n",
"Эпизод 12, Итоговая награда: 1\n",
"Средняя награда: 0.50\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
"O \n",
" X \n",
"Current turn: O\n",
" X \n",
"O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"O \n",
" X \n",
"Current turn: O\n",
"O X \n",
"O X \n",
" X \n",
"Current turn: X\n",
"O X \n",
"O X \n",
" O X \n",
"Current turn: O\n",
"O X X \n",
"O X \n",
" O X \n",
"Current turn: X\n",
"O X X \n",
"O X \n",
"O O X \n",
"Эпизод 13, Итоговая награда: -1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" O \n",
"X \n",
"Current turn: O\n",
" \n",
"X O \n",
"X \n",
"Current turn: X\n",
"O \n",
"X O \n",
"X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
"X O \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Эпизод 14, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
"O \n",
" \n",
" X \n",
"Current turn: O\n",
"O \n",
" X \n",
" X \n",
"Current turn: X\n",
"O \n",
" X \n",
"O X \n",
"Current turn: O\n",
"O \n",
"X X \n",
"O X \n",
"Current turn: X\n",
"O \n",
"X X \n",
"O X O \n",
"Current turn: O\n",
"O X \n",
"X X \n",
"O X O \n",
"Current turn: X\n",
"O X O \n",
"X X \n",
"O X O \n",
"Current turn: O\n",
"O X O \n",
"X X X \n",
"O X O \n",
"Эпизод 15, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" O \n",
" \n",
"Current turn: O\n",
"X \n",
" O \n",
" X \n",
"Current turn: X\n",
"X \n",
" O \n",
"O X \n",
"Current turn: O\n",
"X \n",
"X O \n",
"O X \n",
"Current turn: X\n",
"X O \n",
"X O \n",
"O X \n",
"Current turn: O\n",
"X O \n",
"X O \n",
"O X X \n",
"Current turn: X\n",
"X O O \n",
"X O \n",
"O X X \n",
"Current turn: O\n",
"X O O \n",
"X X O \n",
"O X X \n",
"Эпизод 16, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" \n",
"X X O \n",
" \n",
"Current turn: X\n",
" \n",
"X X O \n",
" O \n",
"Current turn: O\n",
" \n",
"X X O \n",
"X O \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"X X O \n",
"Current turn: X\n",
"O O \n",
"X X O \n",
"X X O \n",
"Эпизод 17, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" \n",
"O X X \n",
"Current turn: X\n",
" O \n",
" \n",
"O X X \n",
"Current turn: O\n",
" X O \n",
" \n",
"O X X \n",
"Current turn: X\n",
" X O \n",
"O \n",
"O X X \n",
"Current turn: O\n",
" X O \n",
"O X \n",
"O X X \n",
"Current turn: X\n",
" X O \n",
"O O X \n",
"O X X \n",
"Эпизод 18, Итоговая награда: -1\n",
"Средняя награда: 0.22\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" X O \n",
" \n",
"Current turn: X\n",
" X \n",
" X O \n",
" O \n",
"Current turn: O\n",
" X X \n",
" X O \n",
" O \n",
"Current turn: X\n",
" X X \n",
" X O \n",
"O O \n",
"Current turn: O\n",
" X X \n",
"X X O \n",
"O O \n",
"Current turn: X\n",
" X X \n",
"X X O \n",
"O O O \n",
"Эпизод 19, Итоговая награда: -1\n",
"Средняя награда: 0.16\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
"O X \n",
" \n",
" \n",
"Current turn: O\n",
"O X \n",
" X \n",
" \n",
"Current turn: X\n",
"O X \n",
" X O \n",
" \n",
"Current turn: O\n",
"O X \n",
" X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
" X O \n",
" O X \n",
"Current turn: O\n",
"O X X \n",
" X O \n",
" O X \n",
"Current turn: X\n",
"O X X \n",
"O X O \n",
" O X \n",
"Current turn: O\n",
"O X X \n",
"O X O \n",
"X O X \n",
"Эпизод 20, Итоговая награда: 1\n",
"Средняя награда: 0.20\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" X \n",
" O \n",
"Current turn: X\n",
" X \n",
" X \n",
" O O \n",
"Current turn: O\n",
" X \n",
" X X \n",
" O O \n",
"Current turn: X\n",
" X O \n",
" X X \n",
" O O \n",
"Current turn: O\n",
" X O \n",
" X X \n",
"X O O \n",
"Current turn: X\n",
" X O \n",
"O X X \n",
"X O O \n",
"Current turn: O\n",
"X X O \n",
"O X X \n",
"X O O \n",
"Эпизод 21, Итоговая награда: 0\n",
"Средняя награда: 0.19\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" X \n",
" X \n",
" O \n",
"Current turn: X\n",
" X \n",
"O X \n",
" O \n",
"Current turn: O\n",
"X X \n",
"O X \n",
" O \n",
"Current turn: X\n",
"X X \n",
"O X \n",
"O O \n",
"Current turn: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Эпизод 22, Итоговая награда: 1\n",
"Средняя награда: 0.23\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
" X \n",
" O \n",
" X \n",
"Current turn: X\n",
"O X \n",
" O \n",
" X \n",
"Current turn: O\n",
"O X \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" X O \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
" X O \n",
"Эпизод 23, Итоговая награда: 1\n",
"Средняя награда: 0.26\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X X \n",
" \n",
" O \n",
"Current turn: X\n",
"X X \n",
" O \n",
" O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
" O \n",
"Current turn: X\n",
"X X \n",
"X O O \n",
" O \n",
"Current turn: O\n",
"X X X \n",
"X O O \n",
" O \n",
"Эпизод 24, Итоговая награда: 1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
"O \n",
" \n",
"Current turn: O\n",
" X \n",
"O X \n",
" \n",
"Current turn: X\n",
"O X \n",
"O X \n",
" \n",
"Current turn: O\n",
"O X \n",
"O X \n",
"X \n",
"Эпизод 25, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X \n",
"O \n",
"Current turn: O\n",
" \n",
"X \n",
"O X \n",
"Current turn: X\n",
" O \n",
"X \n",
"O X \n",
"Current turn: O\n",
" O \n",
"X X \n",
"O X \n",
"Current turn: X\n",
" O \n",
"X X O \n",
"O X \n",
"Current turn: O\n",
"X O \n",
"X X O \n",
"O X \n",
"Current turn: X\n",
"X O \n",
"X X O \n",
"O X O \n",
"Current turn: O\n",
"X O X \n",
"X X O \n",
"O X O \n",
"Эпизод 26, Итоговая награда: 0\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
"X O \n",
"Current turn: X\n",
"O X \n",
" \n",
"X O \n",
"Current turn: O\n",
"O X \n",
" X \n",
"X O \n",
"Current turn: X\n",
"O O X \n",
" X \n",
"X O \n",
"Current turn: O\n",
"O O X \n",
"X X \n",
"X O \n",
"Current turn: X\n",
"O O X \n",
"X X \n",
"X O O \n",
"Current turn: O\n",
"O O X \n",
"X X X \n",
"X O O \n",
"Эпизод 27, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O \n",
" \n",
"Current turn: X\n",
"X \n",
"X O O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O O \n",
" X \n",
"Current turn: X\n",
"X O \n",
"X O O \n",
" X \n",
"Current turn: O\n",
"X O \n",
"X O O \n",
"X X \n",
"Эпизод 28, Итоговая награда: 1\n",
"Средняя награда: 0.36\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
"O X \n",
" \n",
" \n",
"Current turn: O\n",
"O X X \n",
" \n",
" \n",
"Current turn: X\n",
"O X X \n",
" \n",
"O \n",
"Current turn: O\n",
"O X X \n",
" X \n",
"O \n",
"Current turn: X\n",
"O X X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"O X X \n",
" X X \n",
"O O \n",
"Current turn: X\n",
"O X X \n",
" X X \n",
"O O O \n",
"Эпизод 29, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" \n",
"O X X \n",
"Current turn: X\n",
" \n",
"O \n",
"O X X \n",
"Current turn: O\n",
" \n",
"O X \n",
"O X X \n",
"Current turn: X\n",
" \n",
"O X O \n",
"O X X \n",
"Current turn: O\n",
" X \n",
"O X O \n",
"O X X \n",
"Эпизод 30, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" \n",
" X \n",
" O X \n",
"Current turn: X\n",
" O \n",
" X \n",
" O X \n",
"Current turn: O\n",
" O \n",
" X \n",
"X O X \n",
"Current turn: X\n",
" O O \n",
" X \n",
"X O X \n",
"Current turn: O\n",
" O O \n",
"X X \n",
"X O X \n",
"Current turn: X\n",
" O O \n",
"X X O \n",
"X O X \n",
"Current turn: O\n",
"X O O \n",
"X X O \n",
"X O X \n",
"Эпизод 31, Итоговая награда: 1\n",
"Средняя награда: 0.35\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X \n",
"O \n",
"Current turn: O\n",
" \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"O \n",
"Current turn: O\n",
" O \n",
"X X X \n",
"O \n",
"Эпизод 32, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
"O \n",
" \n",
"X \n",
"Current turn: O\n",
"O \n",
" X \n",
"X \n",
"Current turn: X\n",
"O \n",
" X O \n",
"X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
"X O \n",
"Current turn: X\n",
"O X \n",
"X X O \n",
"X O O \n",
"Current turn: O\n",
"O X X \n",
"X X O \n",
"X O O \n",
"Эпизод 33, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
"O X \n",
"Current turn: O\n",
" \n",
" \n",
"O X X \n",
"Current turn: X\n",
" \n",
"O \n",
"O X X \n",
"Current turn: O\n",
" \n",
"O X \n",
"O X X \n",
"Current turn: X\n",
"O \n",
"O X \n",
"O X X \n",
"Эпизод 34, Итоговая награда: -1\n",
"Средняя награда: 0.35\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
"O \n",
" \n",
"Current turn: O\n",
"X X \n",
"O \n",
" \n",
"Current turn: X\n",
"X X \n",
"O \n",
" O \n",
"Current turn: O\n",
"X X \n",
"O \n",
" O X \n",
"Current turn: X\n",
"X X O \n",
"O \n",
" O X \n",
"Current turn: O\n",
"X X O \n",
"O \n",
"X O X \n",
"Current turn: X\n",
"X X O \n",
"O O \n",
"X O X \n",
"Current turn: O\n",
"X X O \n",
"O O X \n",
"X O X \n",
"Эпизод 35, Итоговая награда: 0\n",
"Средняя награда: 0.34\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
"X \n",
" O \n",
"Current turn: X\n",
"X \n",
"X \n",
"O O \n",
"Current turn: O\n",
"X \n",
"X \n",
"O O X \n",
"Current turn: X\n",
"X \n",
"X O \n",
"O O X \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O O X \n",
"Current turn: X\n",
"X X \n",
"X O O \n",
"O O X \n",
"Current turn: O\n",
"X X X \n",
"X O O \n",
"O O X \n",
"Эпизод 36, Итоговая награда: 1\n",
"Средняя награда: 0.36\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
"O \n",
"Current turn: O\n",
" X \n",
" X \n",
"O \n",
"Current turn: X\n",
" X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"X X \n",
" X \n",
"O O \n",
"Current turn: X\n",
"X X \n",
"O X \n",
"O O \n",
"Current turn: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Эпизод 37, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O X \n",
" \n",
"Current turn: X\n",
" X \n",
" O X \n",
" O \n",
"Current turn: O\n",
" X \n",
" O X \n",
"X O \n",
"Current turn: X\n",
" X \n",
"O O X \n",
"X O \n",
"Current turn: O\n",
"X X \n",
"O O X \n",
"X O \n",
"Current turn: X\n",
"X X \n",
"O O X \n",
"X O O \n",
"Current turn: O\n",
"X X X \n",
"O O X \n",
"X O O \n",
"Эпизод 38, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X X \n",
" O \n",
" \n",
"Current turn: X\n",
" X X \n",
" O \n",
" O \n",
"Current turn: O\n",
" X X \n",
" O \n",
"X O \n",
"Current turn: X\n",
" X X \n",
" O \n",
"X O O \n",
"Current turn: O\n",
" X X \n",
" X O \n",
"X O O \n",
"Эпизод 39, Итоговая награда: 1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" O \n",
"X \n",
"Current turn: O\n",
" \n",
" X O \n",
"X \n",
"Current turn: X\n",
" O \n",
" X O \n",
"X \n",
"Current turn: O\n",
"X O \n",
" X O \n",
"X \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X \n",
"Current turn: O\n",
"X O \n",
"O X O \n",
"X X \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X X O \n",
"Current turn: O\n",
"X O X \n",
"O X O \n",
"X X O \n",
"Эпизод 40, Итоговая награда: 1\n",
"Средняя награда: 0.42\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
"X \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
" X \n",
" X O \n",
"X O \n",
"Эпизод 41, Итоговая награда: 1\n",
"Средняя награда: 0.44\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
"O \n",
" \n",
"X \n",
"Current turn: O\n",
"O X \n",
" \n",
"X \n",
"Current turn: X\n",
"O X \n",
" O \n",
"X \n",
"Current turn: O\n",
"O X \n",
" O \n",
"X X \n",
"Current turn: X\n",
"O X \n",
" O O \n",
"X X \n",
"Current turn: O\n",
"O X \n",
"X O O \n",
"X X \n",
"Current turn: X\n",
"O X O \n",
"X O O \n",
"X X \n",
"Current turn: O\n",
"O X O \n",
"X O O \n",
"X X X \n",
"Эпизод 42, Итоговая награда: 1\n",
"Средняя награда: 0.45\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X O \n",
" \n",
"Current turn: O\n",
"X \n",
" X O \n",
" \n",
"Current turn: X\n",
"X \n",
"O X O \n",
" \n",
"Current turn: O\n",
"X \n",
"O X O \n",
"X \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X \n",
"Current turn: O\n",
"X X O \n",
"O X O \n",
"X \n",
"Current turn: X\n",
"X X O \n",
"O X O \n",
"X O \n",
"Current turn: O\n",
"X X O \n",
"O X O \n",
"X O X \n",
"Эпизод 43, Итоговая награда: 1\n",
"Средняя награда: 0.47\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
" O X \n",
"Current turn: X\n",
" X O \n",
" \n",
" O X \n",
"Current turn: O\n",
" X O \n",
" X \n",
" O X \n",
"Current turn: X\n",
" X O \n",
"O X \n",
" O X \n",
"Current turn: O\n",
"X X O \n",
"O X \n",
" O X \n",
"Эпизод 44, Итоговая награда: 1\n",
"Средняя награда: 0.48\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X \n",
"O \n",
"Current turn: O\n",
" \n",
"X \n",
"O X \n",
"Current turn: X\n",
" O \n",
"X \n",
"O X \n",
"Current turn: O\n",
"X O \n",
"X \n",
"O X \n",
"Current turn: X\n",
"X O \n",
"X O \n",
"O X \n",
"Эпизод 45, Итоговая награда: -1\n",
"Средняя награда: 0.44\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" \n",
" X \n",
" O X \n",
"Current turn: X\n",
" \n",
" O X \n",
" O X \n",
"Current turn: O\n",
" \n",
"X O X \n",
" O X \n",
"Current turn: X\n",
" O \n",
"X O X \n",
" O X \n",
"Эпизод 46, Итоговая награда: -1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
" X O \n",
"Current turn: O\n",
" \n",
"X \n",
" X O \n",
"Current turn: X\n",
" O \n",
"X \n",
" X O \n",
"Current turn: O\n",
" O \n",
"X \n",
"X X O \n",
"Current turn: X\n",
" O \n",
"X O \n",
"X X O \n",
"Current turn: O\n",
" O \n",
"X O X \n",
"X X O \n",
"Current turn: X\n",
"O O \n",
"X O X \n",
"X X O \n",
"Эпизод 47, Итоговая награда: -1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" O \n",
" \n",
"X \n",
"Current turn: O\n",
" O \n",
" \n",
"X X \n",
"Current turn: X\n",
" O O \n",
" \n",
"X X \n",
"Current turn: O\n",
"X O O \n",
" \n",
"X X \n",
"Current turn: X\n",
"X O O \n",
" O \n",
"X X \n",
"Current turn: O\n",
"X O O \n",
" O \n",
"X X X \n",
"Эпизод 48, Итоговая награда: 1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X O \n",
" \n",
" \n",
"Current turn: O\n",
"X O \n",
" \n",
" X \n",
"Current turn: X\n",
"X O \n",
"O \n",
" X \n",
"Current turn: O\n",
"X O \n",
"O \n",
"X X \n",
"Current turn: X\n",
"X O \n",
"O O \n",
"X X \n",
"Current turn: O\n",
"X O \n",
"O O \n",
"X X X \n",
"Эпизод 49, Итоговая награда: 1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" O \n",
"X \n",
" \n",
"Current turn: O\n",
" O \n",
"X X \n",
" \n",
"Current turn: X\n",
" O \n",
"X X \n",
" O \n",
"Current turn: O\n",
" X O \n",
"X X \n",
" O \n",
"Current turn: X\n",
" X O \n",
"X X \n",
" O O \n",
"Current turn: O\n",
" X O \n",
"X X \n",
"X O O \n",
"Current turn: X\n",
" X O \n",
"X O X \n",
"X O O \n",
"Current turn: O\n",
"X X O \n",
"X O X \n",
"X O O \n",
"Эпизод 50, Итоговая награда: 1\n",
"Средняя награда: 0.42\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
" X \n",
"X O \n",
"O \n",
"Current turn: O\n",
" X \n",
"X O X \n",
"O \n",
"Current turn: X\n",
"O X \n",
"X O X \n",
"O \n",
"Current turn: O\n",
"O X \n",
"X O X \n",
"O X \n",
"Current turn: X\n",
"O X O \n",
"X O X \n",
"O X \n",
"Эпизод 51, Итоговая награда: -1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" \n",
"X X O \n",
" \n",
"Current turn: X\n",
" O \n",
"X X O \n",
" \n",
"Current turn: O\n",
" X O \n",
"X X O \n",
" \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
" \n",
"Current turn: O\n",
"O X O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Current turn: O\n",
"O X O \n",
"X X O \n",
"X O X \n",
"Эпизод 52, Итоговая награда: 0\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" \n",
"X X \n",
" O \n",
"Current turn: X\n",
" \n",
"X X \n",
"O O \n",
"Current turn: O\n",
" X \n",
"X X \n",
"O O \n",
"Current turn: X\n",
"O X \n",
"X X \n",
"O O \n",
"Current turn: O\n",
"O X \n",
"X X X \n",
"O O \n",
"Эпизод 53, Итоговая награда: 1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" X O \n",
" \n",
"Current turn: X\n",
" O X \n",
" X O \n",
" \n",
"Current turn: O\n",
" O X \n",
"X X O \n",
" \n",
"Current turn: X\n",
"O O X \n",
"X X O \n",
" \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
"X \n",
"Эпизод 54, Итоговая награда: 1\n",
"Средняя награда: 0.41\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" \n",
"X O \n",
" X \n",
"Current turn: X\n",
" O \n",
"X O \n",
" X \n",
"Current turn: O\n",
" X O \n",
"X O \n",
" X \n",
"Current turn: X\n",
" X O \n",
"X O \n",
" O X \n",
"Current turn: O\n",
" X O \n",
"X O X \n",
" O X \n",
"Current turn: X\n",
" X O \n",
"X O X \n",
"O O X \n",
"Эпизод 55, Итоговая награда: -1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
"O \n",
"Current turn: O\n",
" X \n",
" \n",
"O X \n",
"Current turn: X\n",
" X \n",
"O \n",
"O X \n",
"Current turn: O\n",
" X \n",
"O \n",
"O X X \n",
"Current turn: X\n",
" X \n",
"O O \n",
"O X X \n",
"Current turn: O\n",
" X \n",
"O O X \n",
"O X X \n",
"Эпизод 56, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X O \n",
" \n",
"Current turn: O\n",
" \n",
" X O \n",
" X \n",
"Current turn: X\n",
"O \n",
" X O \n",
" X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
" X \n",
"Current turn: X\n",
"O \n",
"X X O \n",
"O X \n",
"Current turn: O\n",
"O \n",
"X X O \n",
"O X X \n",
"Current turn: X\n",
"O O \n",
"X X O \n",
"O X X \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
"O X X \n",
"Эпизод 57, Итоговая награда: 0\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" O \n",
" \n",
"X \n",
"Current turn: O\n",
" O \n",
" X \n",
"X \n",
"Current turn: X\n",
" O \n",
" X O \n",
"X \n",
"Current turn: O\n",
" O X \n",
" X O \n",
"X \n",
"Эпизод 58, Итоговая награда: 1\n",
"Средняя награда: 0.40\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" \n",
"Current turn: O\n",
"O X \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" X O \n",
"Current turn: O\n",
"O X \n",
"X O \n",
"X X O \n",
"Current turn: X\n",
"O X \n",
"X O O \n",
"X X O \n",
"Эпизод 59, Итоговая награда: -1\n",
"Средняя награда: 0.37\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" O X \n",
" \n",
"Current turn: O\n",
" X \n",
" O X \n",
" \n",
"Current turn: X\n",
" X \n",
" O X \n",
"O \n",
"Current turn: O\n",
" X X \n",
" O X \n",
"O \n",
"Current turn: X\n",
"O X X \n",
" O X \n",
"O \n",
"Current turn: O\n",
"O X X \n",
" O X \n",
"O X \n",
"Эпизод 60, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
"O \n",
"X \n",
" \n",
"Current turn: O\n",
"O \n",
"X \n",
" X \n",
"Current turn: X\n",
"O \n",
"X \n",
"O X \n",
"Current turn: O\n",
"O \n",
"X X \n",
"O X \n",
"Current turn: X\n",
"O \n",
"X X \n",
"O X O \n",
"Current turn: O\n",
"O \n",
"X X X \n",
"O X O \n",
"Эпизод 61, Итоговая награда: 1\n",
"Средняя награда: 0.39\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
"O \n",
"X \n",
" \n",
"Current turn: O\n",
"O X \n",
"X \n",
" \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" \n",
"Current turn: O\n",
"O X \n",
"X X O \n",
" \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
" \n",
"Current turn: O\n",
"O X O \n",
"X X O \n",
"X \n",
"Current turn: X\n",
"O X O \n",
"X X O \n",
"X O \n",
"Эпизод 62, Итоговая награда: -1\n",
"Средняя награда: 0.37\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
" X \n",
"X O \n",
" O \n",
"Current turn: O\n",
" X X \n",
"X O \n",
" O \n",
"Current turn: X\n",
" X X \n",
"X O \n",
"O O \n",
"Current turn: O\n",
"X X X \n",
"X O \n",
"O O \n",
"Эпизод 63, Итоговая награда: 1\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
" X \n",
"X O \n",
"O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O \n",
"Current turn: X\n",
"X X \n",
"X O O \n",
"O \n",
"Current turn: O\n",
"X X \n",
"X O O \n",
"O X \n",
"Current turn: X\n",
"X O X \n",
"X O O \n",
"O X \n",
"Current turn: O\n",
"X O X \n",
"X O O \n",
"O X X \n",
"Эпизод 64, Итоговая награда: 0\n",
"Средняя награда: 0.38\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
"X \n",
" \n",
"X O \n",
"Current turn: X\n",
"X \n",
" O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
" O \n",
"X O \n",
"Current turn: X\n",
"X X \n",
" O O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
" O O \n",
"X O X \n",
"Current turn: X\n",
"X O X \n",
" O O \n",
"X O X \n",
"Эпизод 65, Итоговая награда: -1\n",
"Средняя награда: 0.35\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
"O \n",
" \n",
" X \n",
"Current turn: O\n",
"O \n",
" X \n",
" X \n",
"Current turn: X\n",
"O \n",
" O X \n",
" X \n",
"Current turn: O\n",
"O X \n",
" O X \n",
" X \n",
"Эпизод 66, Итоговая награда: 1\n",
"Средняя награда: 0.36\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" \n",
" O X \n",
"Current turn: O\n",
" \n",
"X \n",
" O X \n",
"Current turn: X\n",
" O \n",
"X \n",
" O X \n",
"Current turn: O\n",
" O \n",
"X \n",
"X O X \n",
"Current turn: X\n",
" O \n",
"X O \n",
"X O X \n",
"Эпизод 67, Итоговая награда: -1\n",
"Средняя награда: 0.34\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X \n",
" X \n",
"Current turn: X\n",
"O O \n",
" X \n",
" X \n",
"Current turn: O\n",
"O O \n",
" X \n",
" X X \n",
"Current turn: X\n",
"O O \n",
"O X \n",
" X X \n",
"Current turn: O\n",
"O O X \n",
"O X \n",
" X X \n",
"Current turn: X\n",
"O O X \n",
"O X \n",
"O X X \n",
"Эпизод 68, Итоговая награда: -1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" O X \n",
" \n",
" \n",
"Current turn: O\n",
" O X \n",
" X \n",
" \n",
"Current turn: X\n",
" O X \n",
" X \n",
" O \n",
"Current turn: O\n",
" O X \n",
" X \n",
"X O \n",
"Эпизод 69, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
" X \n",
"X O \n",
" \n",
"Current turn: X\n",
"O X \n",
"X O \n",
" \n",
"Current turn: O\n",
"O X \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O X \n",
"X O O \n",
" X \n",
"Current turn: O\n",
"O X X \n",
"X O O \n",
" X \n",
"Current turn: X\n",
"O X X \n",
"X O O \n",
"O X \n",
"Current turn: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Эпизод 70, Итоговая награда: 0\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
"X \n",
" O \n",
" X \n",
"Current turn: X\n",
"X \n",
" O \n",
" O X \n",
"Current turn: O\n",
"X \n",
"X O \n",
" O X \n",
"Current turn: X\n",
"X \n",
"X O \n",
"O O X \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O O X \n",
"Current turn: X\n",
"X O X \n",
"X O \n",
"O O X \n",
"Эпизод 71, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
" X \n",
"Current turn: X\n",
" X \n",
" O \n",
"O X \n",
"Current turn: O\n",
" X \n",
" O \n",
"O X X \n",
"Current turn: X\n",
" X \n",
"O O \n",
"O X X \n",
"Current turn: O\n",
"X X \n",
"O O \n",
"O X X \n",
"Current turn: X\n",
"X X \n",
"O O O \n",
"O X X \n",
"Эпизод 72, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
"X O \n",
" X \n",
" \n",
"Current turn: X\n",
"X O \n",
" X \n",
"O \n",
"Current turn: O\n",
"X O \n",
" X \n",
"O X \n",
"Эпизод 73, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
"O \n",
"X \n",
"Current turn: O\n",
" \n",
"O X \n",
"X \n",
"Current turn: X\n",
"O \n",
"O X \n",
"X \n",
"Current turn: O\n",
"O \n",
"O X \n",
"X X \n",
"Current turn: X\n",
"O \n",
"O X \n",
"X X O \n",
"Current turn: O\n",
"O X \n",
"O X \n",
"X X O \n",
"Эпизод 74, Итоговая награда: 1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" O \n",
"X \n",
"Current turn: O\n",
" \n",
" O \n",
"X X \n",
"Current turn: X\n",
"O \n",
" O \n",
"X X \n",
"Current turn: O\n",
"O X \n",
" O \n",
"X X \n",
"Current turn: X\n",
"O X \n",
" O \n",
"X X O \n",
"Current turn: O\n",
"O X X \n",
" O \n",
"X X O \n",
"Current turn: X\n",
"O X X \n",
"O O \n",
"X X O \n",
"Current turn: O\n",
"O X X \n",
"O X O \n",
"X X O \n",
"Эпизод 75, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
"O \n",
"Current turn: O\n",
" X \n",
" X \n",
"O \n",
"Current turn: X\n",
" O X \n",
" X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O X \n",
"X O X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X O X \n",
"O X \n",
"Эпизод 76, Итоговая награда: 1\n",
"Средняя награда: 0.33\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X X \n",
" O \n",
" \n",
"Current turn: X\n",
" X X \n",
" O \n",
"O \n",
"Current turn: O\n",
" X X \n",
"X O \n",
"O \n",
"Current turn: X\n",
" X X \n",
"X O \n",
"O O \n",
"Current turn: O\n",
" X X \n",
"X O \n",
"O X O \n",
"Current turn: X\n",
"O X X \n",
"X O \n",
"O X O \n",
"Эпизод 77, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
" \n",
"X O \n",
"Current turn: X\n",
"X O \n",
" \n",
"X O \n",
"Current turn: O\n",
"X O \n",
" X \n",
"X O \n",
"Current turn: X\n",
"X O \n",
"O X \n",
"X O \n",
"Current turn: O\n",
"X O \n",
"O X \n",
"X X O \n",
"Current turn: X\n",
"X O \n",
"O X O \n",
"X X O \n",
"Эпизод 78, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
" X \n",
" O \n",
"Current turn: X\n",
"X \n",
" X \n",
"O O \n",
"Current turn: O\n",
"X \n",
"X X \n",
"O O \n",
"Current turn: X\n",
"X O \n",
"X X \n",
"O O \n",
"Current turn: O\n",
"X O \n",
"X X X \n",
"O O \n",
"Эпизод 79, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
" \n",
" O \n",
"X X \n",
"Current turn: X\n",
" O \n",
" O \n",
"X X \n",
"Current turn: O\n",
" O \n",
" O X \n",
"X X \n",
"Current turn: X\n",
"O O \n",
" O X \n",
"X X \n",
"Current turn: O\n",
"O O \n",
"X O X \n",
"X X \n",
"Current turn: X\n",
"O O O \n",
"X O X \n",
"X X \n",
"Эпизод 80, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X \n",
" X \n",
"Current turn: X\n",
" O \n",
" X O \n",
" X \n",
"Current turn: O\n",
" O X \n",
" X O \n",
" X \n",
"Current turn: X\n",
"O O X \n",
" X O \n",
" X \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
" X \n",
"Current turn: X\n",
"O O X \n",
"X X O \n",
" O X \n",
"Current turn: O\n",
"O O X \n",
"X X O \n",
"X O X \n",
"Эпизод 81, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X X \n",
" \n",
"Current turn: X\n",
" O \n",
" X X \n",
"O \n",
"Current turn: O\n",
"X O \n",
" X X \n",
"O \n",
"Current turn: X\n",
"X O \n",
" X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
" X X \n",
"O O \n",
"Current turn: X\n",
"X O X \n",
"O X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"O X X \n",
"O O X \n",
"Эпизод 82, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" O X \n",
" \n",
"Current turn: O\n",
"X \n",
" O X \n",
" \n",
"Current turn: X\n",
"X \n",
" O X \n",
" O \n",
"Current turn: O\n",
"X \n",
"X O X \n",
" O \n",
"Current turn: X\n",
"X O \n",
"X O X \n",
" O \n",
"Эпизод 83, Итоговая награда: -1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" \n",
"X \n",
"Current turn: X\n",
" \n",
" \n",
"X O \n",
"Current turn: O\n",
" \n",
" X \n",
"X O \n",
"Current turn: X\n",
" \n",
"O X \n",
"X O \n",
"Current turn: O\n",
"X \n",
"O X \n",
"X O \n",
"Current turn: X\n",
"X \n",
"O X O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
"O X O \n",
"X O \n",
"Эпизод 84, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" \n",
"X O \n",
" \n",
"Current turn: O\n",
"X \n",
"X O \n",
" \n",
"Current turn: X\n",
"X \n",
"X O \n",
" O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
" O \n",
"Current turn: X\n",
"X X \n",
"X O \n",
"O O \n",
"Current turn: O\n",
"X X \n",
"X O \n",
"O X O \n",
"Current turn: X\n",
"X X O \n",
"X O \n",
"O X O \n",
"Эпизод 85, Итоговая награда: -1\n",
"Средняя награда: 0.28\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
"O X \n",
" \n",
" \n",
"Current turn: O\n",
"O X \n",
" \n",
" X \n",
"Current turn: X\n",
"O X O \n",
" \n",
" X \n",
"Current turn: O\n",
"O X O \n",
"X \n",
" X \n",
"Current turn: X\n",
"O X O \n",
"X O \n",
" X \n",
"Current turn: O\n",
"O X O \n",
"X O X \n",
" X \n",
"Current turn: X\n",
"O X O \n",
"X O X \n",
"O X \n",
"Эпизод 86, Итоговая награда: -1\n",
"Средняя награда: 0.27\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
" \n",
" O \n",
"Current turn: O\n",
"X \n",
" \n",
" X O \n",
"Current turn: X\n",
"X \n",
" O \n",
" X O \n",
"Current turn: O\n",
"X X \n",
" O \n",
" X O \n",
"Current turn: X\n",
"X X \n",
" O \n",
"O X O \n",
"Current turn: O\n",
"X X X \n",
" O \n",
"O X O \n",
"Эпизод 87, Итоговая награда: 1\n",
"Средняя награда: 0.28\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
"O X \n",
" \n",
"Current turn: O\n",
" \n",
"O X X \n",
" \n",
"Current turn: X\n",
" O \n",
"O X X \n",
" \n",
"Current turn: O\n",
"X O \n",
"O X X \n",
" \n",
"Current turn: X\n",
"X O \n",
"O X X \n",
" O \n",
"Current turn: O\n",
"X O \n",
"O X X \n",
"X O \n",
"Current turn: X\n",
"X O O \n",
"O X X \n",
"X O \n",
"Current turn: O\n",
"X O O \n",
"O X X \n",
"X O X \n",
"Эпизод 88, Итоговая награда: 1\n",
"Средняя награда: 0.28\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X \n",
" O \n",
" X \n",
"Current turn: X\n",
" X \n",
" O \n",
" O X \n",
"Current turn: O\n",
" X \n",
"X O \n",
" O X \n",
"Current turn: X\n",
" X \n",
"X O O \n",
" O X \n",
"Current turn: O\n",
" X \n",
"X O O \n",
"X O X \n",
"Current turn: X\n",
" X O \n",
"X O O \n",
"X O X \n",
"Current turn: O\n",
"X X O \n",
"X O O \n",
"X O X \n",
"Эпизод 89, Итоговая награда: 1\n",
"Средняя награда: 0.29\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
"O \n",
"Current turn: O\n",
" \n",
" X \n",
"O X \n",
"Current turn: X\n",
" \n",
" O X \n",
"O X \n",
"Current turn: O\n",
" X \n",
" O X \n",
"O X \n",
"Эпизод 90, Итоговая награда: 1\n",
"Средняя награда: 0.30\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
"O X \n",
" \n",
"Current turn: O\n",
" \n",
"O X X \n",
" \n",
"Current turn: X\n",
"O \n",
"O X X \n",
" \n",
"Current turn: O\n",
"O \n",
"O X X \n",
" X \n",
"Current turn: X\n",
"O O \n",
"O X X \n",
" X \n",
"Current turn: O\n",
"O O X \n",
"O X X \n",
" X \n",
"Эпизод 91, Итоговая награда: 1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X \n",
"O \n",
" \n",
"Current turn: O\n",
"X X \n",
"O \n",
" \n",
"Current turn: X\n",
"X X \n",
"O O \n",
" \n",
"Current turn: O\n",
"X X \n",
"O O X \n",
" \n",
"Current turn: X\n",
"X X \n",
"O O X \n",
"O \n",
"Current turn: O\n",
"X X X \n",
"O O X \n",
"O \n",
"Эпизод 92, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" O \n",
" X \n",
" \n",
"Current turn: O\n",
" O \n",
" X \n",
" X \n",
"Current turn: X\n",
" O \n",
" X \n",
"O X \n",
"Current turn: O\n",
"X O \n",
" X \n",
"O X \n",
"Current turn: X\n",
"X O \n",
" O X \n",
"O X \n",
"Current turn: O\n",
"X O X \n",
" O X \n",
"O X \n",
"Эпизод 93, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
"X \n",
" \n",
"Current turn: X\n",
" O \n",
"X \n",
" \n",
"Current turn: O\n",
" O \n",
"X \n",
" X \n",
"Current turn: X\n",
" O \n",
"X O \n",
" X \n",
"Current turn: O\n",
"X O \n",
"X O \n",
" X \n",
"Current turn: X\n",
"X O \n",
"X O \n",
"O X \n",
"Эпизод 94, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" O \n",
" \n",
"Current turn: O\n",
" X X \n",
" O \n",
" \n",
"Current turn: X\n",
" X X \n",
" O \n",
" O \n",
"Current turn: O\n",
"X X X \n",
" O \n",
" O \n",
"Эпизод 95, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
"X \n",
" \n",
" \n",
"Current turn: X\n",
"X O \n",
" \n",
" \n",
"Current turn: O\n",
"X O \n",
" \n",
" X \n",
"Current turn: X\n",
"X O \n",
" O \n",
" X \n",
"Current turn: O\n",
"X O \n",
" O X \n",
" X \n",
"Current turn: X\n",
"X O \n",
" O X \n",
" X O \n",
"Current turn: O\n",
"X O \n",
"X O X \n",
" X O \n",
"Current turn: X\n",
"X O O \n",
"X O X \n",
" X O \n",
"Current turn: O\n",
"X O O \n",
"X O X \n",
"X X O \n",
"Эпизод 96, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
" \n",
" X \n",
"Current turn: X\n",
" \n",
" O \n",
" X \n",
"Current turn: O\n",
" \n",
"X O \n",
" X \n",
"Current turn: X\n",
"O \n",
"X O \n",
" X \n",
"Current turn: O\n",
"O \n",
"X O X \n",
" X \n",
"Current turn: X\n",
"O O \n",
"X O X \n",
" X \n",
"Current turn: O\n",
"O O \n",
"X O X \n",
"X X \n",
"Current turn: X\n",
"O O O \n",
"X O X \n",
"X X \n",
"Эпизод 97, Итоговая награда: -1\n",
"Средняя награда: 0.31\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
"O \n",
"Current turn: O\n",
" \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O \n",
"X X \n",
"O \n",
"Current turn: O\n",
" O X \n",
"X X \n",
"O \n",
"Current turn: X\n",
" O X \n",
"X X \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"X X \n",
"O O \n",
"Current turn: X\n",
"X O X \n",
"X X O \n",
"O O \n",
"Current turn: O\n",
"X O X \n",
"X X O \n",
"O O X \n",
"Эпизод 98, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" X \n",
" \n",
" \n",
"Current turn: X\n",
" X \n",
" \n",
" O \n",
"Current turn: O\n",
" X \n",
" \n",
"X O \n",
"Current turn: X\n",
" X \n",
" O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
" O \n",
"X O \n",
"Current turn: X\n",
"X X \n",
" O O \n",
"X O \n",
"Current turn: O\n",
"X X \n",
"X O O \n",
"X O \n",
"Эпизод 99, Итоговая награда: 1\n",
"Средняя награда: 0.32\n",
"Current turn: O\n",
" \n",
" X \n",
" \n",
"Current turn: X\n",
" \n",
" X \n",
" O \n",
"Current turn: O\n",
" X \n",
" X \n",
" O \n",
"Current turn: X\n",
"O X \n",
" X \n",
" O \n",
"Current turn: O\n",
"O X \n",
" X \n",
"X O \n",
"Current turn: X\n",
"O X \n",
" X \n",
"X O O \n",
"Current turn: O\n",
"O X \n",
" X X \n",
"X O O \n",
"Current turn: X\n",
"O X \n",
"O X X \n",
"X O O \n",
"Current turn: O\n",
"O X X \n",
"O X X \n",
"X O O \n",
"Эпизод 100, Итоговая награда: 1\n",
"Средняя награда: 0.33\n"
]
}
],
"source": [
"# Основной цикл обучения агента\n",
"\n",
"# Создаём игровую среду\n",
"game_env = TicTacToeEnv()\n",
"\n",
"# Создаём агента, играющего крестиками\n",
"player_agent = GameAgent(token=1)\n",
"\n",
"total_episodes = 100 # Количество эпизодов (игр) для обучения\n",
"reward_history = [] # Для хранения результатов эпизодов\n",
"\n",
"# Переменная для отслеживания символа текущего игрока\n",
"initial_turn = 1\n",
"\n",
"for episode in range(total_episodes):\n",
" # Сбрасываем состояние игры перед началом нового эпизода\n",
" game_state, _ = game_env.reset()\n",
"\n",
" # Общая награда за эпизод\n",
" episode_reward = 0\n",
"\n",
" # Флаг завершения игры\n",
" game_finished = False\n",
" current_turn = initial_turn\n",
"\n",
" # Игровой цикл (до 9 ходов для поля 3x3)\n",
" for move_count in range(9): \n",
" moves = game_env.available_moves() # Получаем доступные ходы\n",
"\n",
" # Если ходов нет, игра завершается\n",
" if not moves:\n",
" break\n",
"\n",
" # Агент делает выбор\n",
" chosen_move = player_agent.select_move(moves) if len(moves) > 1 else moves[0]\n",
"\n",
" # Выполняем ход и обновляем состояние игры\n",
" next_state, reward, game_finished, _ = game_env.step(chosen_move)\n",
" episode_reward += reward\n",
" game_state = next_state\n",
"\n",
" # Отображаем текущее состояние\n",
" game_env.render()\n",
"\n",
" # Если игра завершена, выходим\n",
" if game_finished:\n",
" break\n",
"\n",
" current_turn = -current_turn # Смена игрока\n",
"\n",
" reward_history.append(episode_reward)\n",
"\n",
" # Выводим статистику\n",
" print(f\"Эпизод {episode + 1}, Итоговая награда: {episode_reward}\")\n",
" avg_reward = sum(reward_history) / len(reward_history)\n",
" print(f\"Средняя награда: {avg_reward:.2f}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}