From c0eed1473217e13287499d92d6cbb99ce465516d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=9C=D0=B0=D0=BB?= =?UTF-8?q?=D0=B0=D1=84=D0=B5=D0=B5=D0=B2?= Date: Thu, 19 Dec 2024 17:04:58 +0400 Subject: [PATCH] lab 6 done --- lab_6/lab_6.ipynb | 1842 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1842 insertions(+) create mode 100644 lab_6/lab_6.ipynb diff --git a/lab_6/lab_6.ipynb b/lab_6/lab_6.ipynb new file mode 100644 index 0000000..5549729 --- /dev/null +++ b/lab_6/lab_6.ipynb @@ -0,0 +1,1842 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Перевод проекта на gymnasium" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "from gymnasium import spaces\n", + "\n", + "class TicTacToeEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + " \n", + " symbols = ['O', ' ', 'X']\n", + "\n", + " def __init__(self):\n", + " self.action_space = spaces.Discrete(9)\n", + " self.observation_space = spaces.Discrete(9*3*2) # flattened\n", + " def step(self, action):\n", + " done = False\n", + "\n", + " p, square = action\n", + " \n", + " # check move legality\n", + " board = self.state['board']\n", + " proposed = board[square]\n", + " player = self.state['on_move']\n", + " if (proposed != 0): # wrong player, not empty\n", + " print(\"illegal move \", action, \". (square occupied): \", square)\n", + " done = True\n", + " if (p != player): # wrong player, not empty\n", + " print(\"illegal move \", action, \" not on move: \", p)\n", + " done = True\n", + " else:\n", + " board[square] = p\n", + " self.state['on_move'] = -p\n", + "\n", + " # check game over\n", + " for i in range(3):\n", + " # horizontals and verticals\n", + " if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p)\n", + " or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)):\n", + " done = True\n", + " break\n", + " # diagonals\n", + " if((board[0] == p and board[4] == p and board[8] == p)\n", + " or (board[2] == p and board[4] == p and board[6] == p)):\n", + " done = True\n", + " \n", + " return self.state, done, {}\n", + " def reset(self):\n", + " self.state = {}\n", + " self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]\n", + " self.state['on_move'] = 1\n", + " return self.state\n", + " def render(self, mode='human', close=False):\n", + " if close:\n", + " return\n", + " print(\"on move: \" , self.symbols[self.state['on_move']+1])\n", + " for i in range (9):\n", + " print (self.symbols[self.state['board'][i]+1], end=\" \")\n", + " if ((i % 3) == 2):\n", + " print()\n", + " def move_generator(self):\n", + " moves = []\n", + " for i in range (9):\n", + " if (self.state['board'][i] == 0):\n", + " p = self.state['on_move']\n", + " m = [p, i]\n", + " moves.append(m)\n", + " return moves\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ключевые функции Gymnasium:\n", + "self.reset() — инициализация среды.\n", + "\n", + "self.step(action) — выполнение действия и переход в новое состояние.\n", + "\n", + "self.render() — визуализация текущего состояния среды.\n", + "\n", + "### Реализация агента\n", + "\n", + "Агент принимает решение (выбирает действие), основываясь на текущем состоянии среды, и затем получает обратную связь (награду и новое состояние) от среды.\n", + "\n", + "##### Функции агента:\n", + "Выбор действия: Использует алгоритмы или стратегии, чтобы определить, что делать дальше.\n", + "\n", + "Обучение: Обновляет свои знания или стратегию на основе опыта, чтобы лучше справляться с задачей.\n", + "\n", + "Интерактивность: Адаптируется к изменениям в среде." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "class MyAgent:\n", + " def __init__(self, symbol):\n", + " self.symbol = symbol # Символ игрока (1 - X, -1 - O)\n", + " \n", + " def get_action(self, moves):\n", + " return random.choice(moves) # Выбор случайного хода из доступных\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Цикл обучения" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " O \n", + " \n", + " X \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + " O \n", + "X \n", + "O X \n", + "on move: O\n", + "X O \n", + "X \n", + "O X \n", + "on move: X\n", + "X O O \n", + "X \n", + "O X \n", + "on move: O\n", + "X O O \n", + "X X \n", + "O X \n", + "on move: X\n", + "X O O \n", + "X O X \n", + "O X \n", + "Episode 1 is done\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + "O X \n", + "X \n", + " O \n", + "on move: O\n", + "O X X \n", + "X \n", + " O \n", + "on move: X\n", + "O X X \n", + "X \n", + " O O \n", + "on move: O\n", + "O X X \n", + "X X \n", + " O O \n", + "on move: X\n", + "O X X \n", + "X X \n", + "O O O \n", + "Episode 2 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " X \n", + " X \n", + " O \n", + "on move: X\n", + " X \n", + " X \n", + "O O \n", + "on move: O\n", + " X \n", + "X X \n", + "O O \n", + "on move: X\n", + " X O \n", + "X X \n", + "O O \n", + "on move: O\n", + " X O \n", + "X X \n", + "O O X \n", + "on move: X\n", + "O X O \n", + "X X \n", + "O O X \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "O O X \n", + "Episode 3 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + "O \n", + " X \n", + "X O \n", + "on move: O\n", + "O X \n", + " X \n", + "X O \n", + "on move: X\n", + "O X O \n", + " X \n", + "X O \n", + "on move: O\n", + "O X O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "X O O \n", + "Episode 4 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " \n", + "X X O \n", + " \n", + "on move: X\n", + " \n", + "X X O \n", + " O \n", + "on move: O\n", + " \n", + "X X O \n", + "X O \n", + "on move: X\n", + "O \n", + "X X O \n", + "X O \n", + "on move: O\n", + "O X \n", + "X X O \n", + "X O \n", + "Episode 5 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + " \n", + " X X \n", + "O O \n", + "on move: O\n", + "X \n", + " X X \n", + "O O \n", + "on move: X\n", + "X \n", + " X X \n", + "O O O \n", + "Episode 6 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X O \n", + " X \n", + " X O \n", + "on move: X\n", + "X O \n", + " X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + " X \n", + "O X O \n", + "on move: X\n", + "X X O \n", + " O X \n", + "O X O \n", + "Episode 7 is done\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + "X X \n", + " O \n", + " \n", + "on move: X\n", + "X X O \n", + " O \n", + " \n", + "on move: O\n", + "X X O \n", + "X O \n", + " \n", + "on move: X\n", + "X X O \n", + "X O \n", + " O \n", + "on move: O\n", + "X X O \n", + "X O \n", + "X O \n", + "Episode 8 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " \n", + "O X \n", + "X \n", + "on move: X\n", + " \n", + "O X O \n", + "X \n", + "on move: O\n", + "X \n", + "O X O \n", + "X \n", + "on move: X\n", + "X \n", + "O X O \n", + "X O \n", + "on move: O\n", + "X \n", + "O X O \n", + "X O X \n", + "Episode 9 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " \n", + " X X \n", + "O \n", + "on move: X\n", + " O \n", + " X X \n", + "O \n", + "on move: O\n", + " O \n", + "X X X \n", + "O \n", + "Episode 10 is done\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " \n", + "O X \n", + " X \n", + "on move: X\n", + " O \n", + "O X \n", + " X \n", + "on move: O\n", + " X O \n", + "O X \n", + " X \n", + "on move: X\n", + "O X O \n", + "O X \n", + " X \n", + "on move: O\n", + "O X O \n", + "O X \n", + " X X \n", + "on move: X\n", + "O X O \n", + "O X \n", + "O X X \n", + "Episode 11 is done\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " X O \n", + " \n", + "on move: X\n", + " X \n", + " X O \n", + " O \n", + "on move: O\n", + "X X \n", + " X O \n", + " O \n", + "on move: X\n", + "X O X \n", + " X O \n", + " O \n", + "on move: O\n", + "X O X \n", + " X O \n", + "X O \n", + "Episode 12 is done\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + "O \n", + " \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O \n", + "O X \n", + "on move: O\n", + " X \n", + "O X \n", + "O X \n", + "on move: X\n", + " X O \n", + "O X \n", + "O X \n", + "on move: O\n", + " X O \n", + "O X \n", + "O X X \n", + "on move: X\n", + " X O \n", + "O O X \n", + "O X X \n", + "Episode 13 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X \n", + "X \n", + "on move: X\n", + "O O \n", + "X \n", + "X \n", + "on move: O\n", + "O X O \n", + "X \n", + "X \n", + "on move: X\n", + "O X O \n", + "X \n", + "X O \n", + "on move: O\n", + "O X O \n", + "X X \n", + "X O \n", + "on move: X\n", + "O X O \n", + "X X \n", + "X O O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + "X O O \n", + "Episode 14 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + " X \n", + "on move: X\n", + " O \n", + " O X \n", + " X \n", + "on move: O\n", + " O X \n", + " O X \n", + " X \n", + "Episode 15 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + " O \n", + " X \n", + "X \n", + "on move: X\n", + " O \n", + " X O \n", + "X \n", + "on move: O\n", + " O \n", + "X X O \n", + "X \n", + "on move: X\n", + " O \n", + "X X O \n", + "X O \n", + "on move: O\n", + "X O \n", + "X X O \n", + "X O \n", + "Episode 16 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O \n", + " X \n", + "on move: X\n", + "X \n", + "O \n", + " X O \n", + "on move: O\n", + "X X \n", + "O \n", + " X O \n", + "on move: X\n", + "X X O \n", + "O \n", + " X O \n", + "on move: O\n", + "X X O \n", + "O \n", + "X X O \n", + "on move: X\n", + "X X O \n", + "O O \n", + "X X O \n", + "on move: O\n", + "X X O \n", + "O O X \n", + "X X O \n", + "Episode 17 is done\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " O \n", + " X \n", + "on move: O\n", + " X \n", + " O \n", + " X \n", + "on move: X\n", + " X \n", + " O \n", + "O X \n", + "on move: O\n", + " X \n", + "X O \n", + "O X \n", + "on move: X\n", + " X \n", + "X O O \n", + "O X \n", + "on move: O\n", + " X \n", + "X O O \n", + "O X X \n", + "on move: X\n", + " O X \n", + "X O O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "O X X \n", + "Episode 18 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + "O \n", + "X \n", + "on move: O\n", + " X \n", + "O \n", + "X \n", + "on move: X\n", + " X \n", + "O \n", + "X O \n", + "on move: O\n", + "X X \n", + "O \n", + "X O \n", + "on move: X\n", + "X X O \n", + "O \n", + "X O \n", + "on move: O\n", + "X X O \n", + "O X \n", + "X O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "X O O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "X O O \n", + "Episode 19 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + "X \n", + " \n", + "on move: X\n", + "X O \n", + "X \n", + " O \n", + "on move: O\n", + "X O \n", + "X X \n", + " O \n", + "on move: X\n", + "X O \n", + "X X O \n", + " O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + " O \n", + "on move: X\n", + "X O X \n", + "X X O \n", + "O O \n", + "on move: O\n", + "X O X \n", + "X X O \n", + "O X O \n", + "Episode 20 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " \n", + "O \n", + "on move: O\n", + "X \n", + " \n", + "O X \n", + "on move: X\n", + "X O \n", + " \n", + "O X \n", + "on move: O\n", + "X O \n", + " \n", + "O X X \n", + "on move: X\n", + "X O O \n", + " \n", + "O X X \n", + "on move: O\n", + "X O O \n", + " X \n", + "O X X \n", + "on move: X\n", + "X O O \n", + "O X \n", + "O X X \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "O X X \n", + "Episode 21 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O X \n", + " \n", + " \n", + "on move: X\n", + "X O X \n", + " O \n", + " \n", + "on move: O\n", + "X O X \n", + "X O \n", + " \n", + "on move: X\n", + "X O X \n", + "X O \n", + " O \n", + "on move: O\n", + "X O X \n", + "X O \n", + "X O \n", + "Episode 22 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + " O \n", + " O \n", + "X X \n", + "on move: O\n", + " O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O O \n", + "X O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "X O \n", + "X X \n", + "on move: X\n", + "O X O \n", + "X O \n", + "X X O \n", + "Episode 23 is done\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " \n", + " O \n", + "on move: O\n", + " X \n", + " \n", + " X O \n", + "on move: X\n", + " X \n", + " O \n", + " X O \n", + "on move: O\n", + " X X \n", + " O \n", + " X O \n", + "on move: X\n", + "O X X \n", + " O \n", + " X O \n", + "Episode 24 is done\n", + "on move: O\n", + " X \n", + " \n", + " \n", + "on move: X\n", + " X \n", + " O \n", + " \n", + "on move: O\n", + " X \n", + " O X \n", + " \n", + "on move: X\n", + " X \n", + " O X \n", + " O \n", + "on move: O\n", + "X X \n", + " O X \n", + " O \n", + "on move: X\n", + "X X \n", + " O X \n", + "O O \n", + "on move: O\n", + "X X \n", + " O X \n", + "O X O \n", + "on move: X\n", + "X X \n", + "O O X \n", + "O X O \n", + "on move: O\n", + "X X X \n", + "O O X \n", + "O X O \n", + "Episode 25 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " X \n", + " O X \n", + " \n", + "on move: X\n", + " O X \n", + " O X \n", + " \n", + "on move: O\n", + " O X \n", + "X O X \n", + " \n", + "on move: X\n", + " O X \n", + "X O X \n", + "O \n", + "on move: O\n", + "X O X \n", + "X O X \n", + "O \n", + "on move: X\n", + "X O X \n", + "X O X \n", + "O O \n", + "on move: O\n", + "X O X \n", + "X O X \n", + "O X O \n", + "Episode 26 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " O X \n", + " \n", + "on move: O\n", + " \n", + " O X \n", + " X \n", + "on move: X\n", + " \n", + " O X \n", + " X O \n", + "on move: O\n", + "X \n", + " O X \n", + " X O \n", + "on move: X\n", + "X O \n", + " O X \n", + " X O \n", + "on move: O\n", + "X X O \n", + " O X \n", + " X O \n", + "on move: X\n", + "X X O \n", + " O X \n", + "O X O \n", + "Episode 27 is done\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + " \n", + "O X X \n", + "on move: X\n", + " \n", + "O \n", + "O X X \n", + "on move: O\n", + " \n", + "O X \n", + "O X X \n", + "on move: X\n", + " O \n", + "O X \n", + "O X X \n", + "on move: O\n", + " X O \n", + "O X \n", + "O X X \n", + "Episode 28 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + " O \n", + "on move: O\n", + " X \n", + "X \n", + " O \n", + "on move: X\n", + " X \n", + "X O \n", + " O \n", + "on move: O\n", + " X \n", + "X O \n", + " X O \n", + "on move: X\n", + " X \n", + "X O \n", + "O X O \n", + "on move: O\n", + " X \n", + "X X O \n", + "O X O \n", + "on move: X\n", + "O X \n", + "X X O \n", + "O X O \n", + "on move: O\n", + "O X X \n", + "X X O \n", + "O X O \n", + "Episode 29 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X O \n", + " \n", + "on move: O\n", + " X \n", + "X O \n", + " \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X \n", + "X X O \n", + "O \n", + "on move: X\n", + "O X \n", + "X X O \n", + "O \n", + "on move: O\n", + "O X \n", + "X X O \n", + "O X \n", + "on move: X\n", + "O X \n", + "X X O \n", + "O O X \n", + "on move: O\n", + "O X X \n", + "X X O \n", + "O O X \n", + "Episode 30 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + " O \n", + "X \n", + " X \n", + "on move: X\n", + "O O \n", + "X \n", + " X \n", + "on move: O\n", + "O O X \n", + "X \n", + " X \n", + "on move: X\n", + "O O X \n", + "X \n", + "O X \n", + "on move: O\n", + "O O X \n", + "X \n", + "O X X \n", + "on move: X\n", + "O O X \n", + "X O \n", + "O X X \n", + "on move: O\n", + "O O X \n", + "X X O \n", + "O X X \n", + "Episode 31 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " O \n", + " X \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X O \n", + " X \n", + "X O \n", + "on move: X\n", + "X O \n", + "O X \n", + "X O \n", + "on move: O\n", + "X O \n", + "O X X \n", + "X O \n", + "on move: X\n", + "X O O \n", + "O X X \n", + "X O \n", + "on move: O\n", + "X O O \n", + "O X X \n", + "X O X \n", + "Episode 32 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O X \n", + " \n", + "on move: X\n", + "X \n", + "O X \n", + "O \n", + "on move: O\n", + "X \n", + "O X \n", + "O X \n", + "on move: X\n", + "X O \n", + "O X \n", + "O X \n", + "on move: O\n", + "X O X \n", + "O X \n", + "O X \n", + "on move: X\n", + "X O X \n", + "O X O \n", + "O X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "O X X \n", + "Episode 33 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " O \n", + "X \n", + " \n", + "on move: O\n", + "X O \n", + "X \n", + " \n", + "on move: X\n", + "X O \n", + "X \n", + "O \n", + "on move: O\n", + "X O \n", + "X X \n", + "O \n", + "on move: X\n", + "X O \n", + "X X \n", + "O O \n", + "on move: O\n", + "X O \n", + "X X \n", + "O X O \n", + "on move: X\n", + "X O \n", + "X O X \n", + "O X O \n", + "on move: O\n", + "X O X \n", + "X O X \n", + "O X O \n", + "Episode 34 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " O \n", + " \n", + "X \n", + "on move: O\n", + " O \n", + " \n", + "X X \n", + "on move: X\n", + "O O \n", + " \n", + "X X \n", + "on move: O\n", + "O X O \n", + " \n", + "X X \n", + "on move: X\n", + "O X O \n", + "O \n", + "X X \n", + "on move: O\n", + "O X O \n", + "O \n", + "X X X \n", + "Episode 35 is done\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + "X \n", + "O X \n", + "on move: X\n", + " O \n", + "X \n", + "O X \n", + "on move: O\n", + " O \n", + "X \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X \n", + "O X X \n", + "on move: O\n", + "O O \n", + "X X \n", + "O X X \n", + "on move: X\n", + "O O \n", + "X O X \n", + "O X X \n", + "Episode 36 is done\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + "O \n", + " X \n", + "on move: O\n", + " X \n", + "O \n", + " X \n", + "on move: X\n", + " X \n", + "O O \n", + " X \n", + "on move: O\n", + " X X \n", + "O O \n", + " X \n", + "on move: X\n", + "O X X \n", + "O O \n", + " X \n", + "on move: O\n", + "O X X \n", + "O O \n", + " X X \n", + "on move: X\n", + "O X X \n", + "O O \n", + "O X X \n", + "Episode 37 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + "O \n", + " \n", + "on move: O\n", + "X \n", + "O \n", + " X \n", + "on move: X\n", + "X O \n", + "O \n", + " X \n", + "on move: O\n", + "X O \n", + "O X \n", + " X \n", + "on move: X\n", + "X O \n", + "O X \n", + " X O \n", + "on move: O\n", + "X X O \n", + "O X \n", + " X O \n", + "on move: X\n", + "X X O \n", + "O X \n", + "O X O \n", + "on move: O\n", + "X X O \n", + "O X X \n", + "O X O \n", + "Episode 38 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + "O X \n", + " \n", + "on move: O\n", + " X \n", + "O X \n", + " \n", + "on move: X\n", + " X \n", + "O X \n", + "O \n", + "on move: O\n", + " X \n", + "O X \n", + "O X \n", + "on move: X\n", + " O X \n", + "O X \n", + "O X \n", + "on move: O\n", + " O X \n", + "O X \n", + "O X X \n", + "on move: X\n", + " O X \n", + "O X O \n", + "O X X \n", + "on move: O\n", + "X O X \n", + "O X O \n", + "O X X \n", + "Episode 39 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + "X O \n", + "X \n", + "on move: X\n", + " \n", + "X O O \n", + "X \n", + "on move: O\n", + " X \n", + "X O O \n", + "X \n", + "on move: X\n", + " O X \n", + "X O O \n", + "X \n", + "on move: O\n", + "X O X \n", + "X O O \n", + "X \n", + "Episode 40 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + " \n", + "X X \n", + " O \n", + "on move: X\n", + " O \n", + "X X \n", + " O \n", + "on move: O\n", + " X O \n", + "X X \n", + " O \n", + "on move: X\n", + "O X O \n", + "X X \n", + " O \n", + "on move: O\n", + "O X O \n", + "X X X \n", + " O \n", + "Episode 41 is done\n", + "on move: O\n", + " \n", + " \n", + " X \n", + "on move: X\n", + " \n", + " \n", + "O X \n", + "on move: O\n", + " \n", + " X \n", + "O X \n", + "on move: X\n", + " O \n", + " X \n", + "O X \n", + "on move: O\n", + "X O \n", + " X \n", + "O X \n", + "on move: X\n", + "X O \n", + " X O \n", + "O X \n", + "on move: O\n", + "X X O \n", + " X O \n", + "O X \n", + "Episode 42 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X \n", + " O \n", + " \n", + "on move: O\n", + "X \n", + " O \n", + "X \n", + "on move: X\n", + "X O \n", + " O \n", + "X \n", + "on move: O\n", + "X O X \n", + " O \n", + "X \n", + "on move: X\n", + "X O X \n", + " O \n", + "X O \n", + "on move: O\n", + "X O X \n", + " X O \n", + "X O \n", + "Episode 43 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " O \n", + "X \n", + "on move: O\n", + " \n", + " X O \n", + "X \n", + "on move: X\n", + "O \n", + " X O \n", + "X \n", + "on move: O\n", + "O \n", + "X X O \n", + "X \n", + "on move: X\n", + "O \n", + "X X O \n", + "X O \n", + "on move: O\n", + "O X \n", + "X X O \n", + "X O \n", + "Episode 44 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + " O \n", + "on move: O\n", + "X \n", + " X \n", + " O \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X O \n", + " X X \n", + " O \n", + "on move: X\n", + "X O \n", + " X X \n", + "O O \n", + "on move: O\n", + "X O \n", + "X X X \n", + "O O \n", + "Episode 45 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + " \n", + "X \n", + "O \n", + "on move: O\n", + " X \n", + "X \n", + "O \n", + "on move: X\n", + " X \n", + "X O \n", + "O \n", + "on move: O\n", + " X X \n", + "X O \n", + "O \n", + "on move: X\n", + "O X X \n", + "X O \n", + "O \n", + "on move: O\n", + "O X X \n", + "X O X \n", + "O \n", + "on move: X\n", + "O X X \n", + "X O X \n", + "O O \n", + "Episode 46 is done\n", + "on move: O\n", + " \n", + "X \n", + " \n", + "on move: X\n", + "O \n", + "X \n", + " \n", + "on move: O\n", + "O \n", + "X X \n", + " \n", + "on move: X\n", + "O \n", + "X X \n", + " O \n", + "on move: O\n", + "O \n", + "X X X \n", + " O \n", + "Episode 47 is done\n", + "on move: O\n", + " \n", + " X \n", + " \n", + "on move: X\n", + " \n", + " X \n", + "O \n", + "on move: O\n", + " X \n", + " X \n", + "O \n", + "on move: X\n", + "O X \n", + " X \n", + "O \n", + "on move: O\n", + "O X \n", + " X \n", + "O X \n", + "on move: X\n", + "O X \n", + " X \n", + "O O X \n", + "on move: O\n", + "O X \n", + "X X \n", + "O O X \n", + "on move: X\n", + "O X \n", + "X X O \n", + "O O X \n", + "on move: O\n", + "O X X \n", + "X X O \n", + "O O X \n", + "Episode 48 is done\n", + "on move: O\n", + " \n", + " \n", + "X \n", + "on move: X\n", + " \n", + " \n", + "X O \n", + "on move: O\n", + " \n", + " X \n", + "X O \n", + "on move: X\n", + " O \n", + " X \n", + "X O \n", + "on move: O\n", + " O \n", + "X X \n", + "X O \n", + "on move: X\n", + " O \n", + "X X O \n", + "X O \n", + "on move: O\n", + " O \n", + "X X O \n", + "X O X \n", + "on move: X\n", + " O O \n", + "X X O \n", + "X O X \n", + "on move: O\n", + "X O O \n", + "X X O \n", + "X O X \n", + "Episode 49 is done\n", + "on move: O\n", + "X \n", + " \n", + " \n", + "on move: X\n", + "X O \n", + " \n", + " \n", + "on move: O\n", + "X O \n", + " X \n", + " \n", + "on move: X\n", + "X O \n", + " X \n", + " O \n", + "on move: O\n", + "X O \n", + " X \n", + " X O \n", + "on move: X\n", + "X O \n", + " X \n", + "O X O \n", + "on move: O\n", + "X O \n", + " X X \n", + "O X O \n", + "on move: X\n", + "X O O \n", + " X X \n", + "O X O \n", + "on move: O\n", + "X O O \n", + "X X X \n", + "O X O \n", + "Episode 50 is done\n" + ] + } + ], + "source": [ + "# Создание среды для игры в крестики-нолики\n", + "myenv = TicTacToeEnv()\n", + "\n", + "# Создание агента\n", + "agent = MyAgent(symbol=1)\n", + "\n", + "num_episodes = 50 # Количество эпизодов (игр) для обучения\n", + "collected_rewards = [] # Список для хранения наград/побед в каждом эпизоде \n", + "\n", + "# Переменная для отслеживания символа и текущего игрока\n", + "current_player = 1\n", + "\n", + "for i in range(num_episodes):\n", + " # Сброс среды и начало нового эпизода\n", + " state, _ = myenv.reset() \n", + "\n", + " # Общая награда за эпизод\n", + " total_reward = 0\n", + "\n", + " # Флаг завершения игры\n", + " done = False\n", + " player = current_player \n", + "\n", + " # Максимум 9 ходов, поскольку поле 3x3 \n", + " for j in range(9): \n", + " moves = myenv.move_generator() \n", + "\n", + " # Ходов нет, заканчиваем игру\n", + " if not moves:\n", + " break\n", + "\n", + " \n", + " if len(moves) == 1:\n", + " move = moves[0] # Если остался один ход на основе стратегии\n", + " else:\n", + " move = agent.get_action(moves) # Агент выбирает ход на основе стратегии\n", + "\n", + " # Выполнение хода и обновление состояния игры\n", + " next_state, done, info = myenv.step(move)\n", + " state = next_state\n", + "\n", + " # Отображаем текущее состояние игры\n", + " myenv.render()\n", + "\n", + " if done:\n", + " break\n", + "\n", + " player = -player # Смена игрока\n", + "\n", + " print(f\"Episode {i+1} is done\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "kernel", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}