Алексей Крюков 3e12d321f6 500 ашчув
2024-12-25 13:21:50 +04:00

1795 lines
38 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"репул чтобы 500 не было "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#500 fixed\n",
"import gymnasium as gym\n",
"from gymnasium import spaces\n",
"\n",
"class TicTacToeEnv(gym.Env):\n",
" metadata = {'render.modes': ['human']}\n",
" \n",
" symbols = ['O', ' ', 'X']\n",
"\n",
" def __init__(self):\n",
" self.action_space = spaces.Discrete(9)\n",
" self.observation_space = spaces.Discrete(9*3*2) # flattened\n",
" def step(self, action):\n",
" done = False\n",
"\n",
" p, square = action\n",
" \n",
" # check move legality\n",
" board = self.state['board']\n",
" proposed = board[square]\n",
" player = self.state['on_move']\n",
" if (proposed != 0): # wrong player, not empty\n",
" print(\"illegal move \", action, \". (square occupied): \", square)\n",
" done = True\n",
" if (p != player): # wrong player, not empty\n",
" print(\"illegal move \", action, \" not on move: \", p)\n",
" done = True\n",
" else:\n",
" board[square] = p\n",
" self.state['on_move'] = -p\n",
"\n",
" # check game over\n",
" for i in range(3):\n",
" # horizontals and verticals\n",
" if ((board[i * 3] == p and board[i * 3 + 1] == p and board[i * 3 + 2] == p)\n",
" or (board[i + 0] == p and board[i + 3] == p and board[i + 6] == p)):\n",
" done = True\n",
" break\n",
" # diagonals\n",
" if((board[0] == p and board[4] == p and board[8] == p)\n",
" or (board[2] == p and board[4] == p and board[6] == p)):\n",
" done = True\n",
" \n",
" return self.state, done, {}\n",
" def reset(self):\n",
" self.state = {}\n",
" self.state['board'] = [0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" self.state['on_move'] = 1\n",
" return self.state\n",
" def render(self, mode='human', close=False):\n",
" if close:\n",
" return\n",
" print(\"on move: \" , self.symbols[self.state['on_move']+1])\n",
" for i in range (9):\n",
" print (self.symbols[self.state['board'][i]+1], end=\" \")\n",
" if ((i % 3) == 2):\n",
" print()\n",
" def move_generator(self):\n",
" moves = []\n",
" for i in range (9):\n",
" if (self.state['board'][i] == 0):\n",
" p = self.state['on_move']\n",
" m = [p, i]\n",
" moves.append(m)\n",
" return moves\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ключевые функции Gymnasium:¶\n",
"self.reset() — инициализация среды.\n",
"\n",
"self.step(action) — выполнение действия и переход в новое состояние.\n",
"\n",
"self.render() — визуализация текущего состояния среды.\n",
"\n",
"Реализация агента¶\n",
"Агент принимает решение (выбирает действие), основываясь на текущем состоянии среды, и затем получает обратную связь (награду и новое состояние) от среды.\n",
"\n",
"Функции агента:¶\n",
"Выбор действия: Использует алгоритмы или стратегии, чтобы определить, что делать дальше.\n",
"\n",
"Обучение: Обновляет свои знания или стратегию на основе опыта, чтобы лучше справляться с задачей.\n",
"\n",
"Интерактивность: Адаптируется к изменениям в среде.\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"class MyAgent:\n",
" def __init__(self, symbol):\n",
" self.symbol = symbol # Символ игрока (1 - X, -1 - O)\n",
" \n",
" def get_action(self, moves):\n",
" return random.choice(moves) # Выбор случайного хода из доступных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Цикл обучения"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
" X O \n",
"on move: O\n",
" O X \n",
" X \n",
" X O \n",
"on move: X\n",
" O X \n",
" O X \n",
" X O \n",
"on move: O\n",
"X O X \n",
" O X \n",
" X O \n",
"on move: X\n",
"X O X \n",
" O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X O X \n",
"O X O \n",
"Episode 1 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
" \n",
"O \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" O X \n",
"O \n",
"X X \n",
"on move: X\n",
"O O X \n",
"O \n",
"X X \n",
"on move: O\n",
"O O X \n",
"O X \n",
"X X \n",
"Episode 2 is done\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
"X \n",
" \n",
"on move: X\n",
"X O O \n",
"X \n",
" \n",
"on move: O\n",
"X O O \n",
"X \n",
"X \n",
"Episode 3 is done\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
"O \n",
"X \n",
" \n",
"on move: O\n",
"O X \n",
"X \n",
" \n",
"on move: X\n",
"O X \n",
"X O \n",
" \n",
"on move: O\n",
"O X X \n",
"X O \n",
" \n",
"on move: X\n",
"O X X \n",
"X O O \n",
" \n",
"on move: O\n",
"O X X \n",
"X O O \n",
" X \n",
"on move: X\n",
"O X X \n",
"X O O \n",
"O X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 4 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
" X \n",
" \n",
"X O \n",
"on move: X\n",
" X \n",
" O \n",
"X O \n",
"on move: O\n",
"X X \n",
" O \n",
"X O \n",
"on move: X\n",
"X X O \n",
" O \n",
"X O \n",
"Episode 5 is done\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" \n",
"X O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X \n",
"X O O \n",
"on move: O\n",
" X \n",
"X \n",
"X O O \n",
"on move: X\n",
" X \n",
"X O \n",
"X O O \n",
"on move: O\n",
" X X \n",
"X O \n",
"X O O \n",
"on move: X\n",
" X X \n",
"X O O \n",
"X O O \n",
"on move: O\n",
"X X X \n",
"X O O \n",
"X O O \n",
"Episode 6 is done\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X O \n",
" \n",
"X \n",
"on move: X\n",
"X O \n",
" \n",
"X O \n",
"on move: O\n",
"X O \n",
" \n",
"X X O \n",
"on move: X\n",
"X O O \n",
" \n",
"X X O \n",
"on move: O\n",
"X O O \n",
"X \n",
"X X O \n",
"Episode 7 is done\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
" O \n",
"X \n",
"on move: O\n",
" \n",
" O \n",
"X X \n",
"on move: X\n",
" \n",
"O O \n",
"X X \n",
"on move: O\n",
" \n",
"O O \n",
"X X X \n",
"Episode 8 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
" O \n",
" X X \n",
" O \n",
"on move: O\n",
" O \n",
" X X \n",
" X O \n",
"on move: X\n",
" O \n",
" X X \n",
"O X O \n",
"on move: O\n",
"X O \n",
" X X \n",
"O X O \n",
"on move: X\n",
"X O O \n",
" X X \n",
"O X O \n",
"on move: O\n",
"X O O \n",
"X X X \n",
"O X O \n",
"Episode 9 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" \n",
"X X \n",
"on move: X\n",
"O \n",
" \n",
"X O X \n",
"on move: O\n",
"O X \n",
" \n",
"X O X \n",
"on move: X\n",
"O O X \n",
" \n",
"X O X \n",
"on move: O\n",
"O O X \n",
" X \n",
"X O X \n",
"Episode 10 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
"O X \n",
" X \n",
"O \n",
"on move: O\n",
"O X \n",
" X \n",
"O X \n",
"on move: X\n",
"O X \n",
" X \n",
"O X O \n",
"on move: O\n",
"O X \n",
" X X \n",
"O X O \n",
"on move: X\n",
"O X \n",
"O X X \n",
"O X O \n",
"Episode 11 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
"O X \n",
" \n",
" \n",
"on move: O\n",
"O X \n",
"X \n",
" \n",
"on move: X\n",
"O X \n",
"X \n",
" O \n",
"on move: O\n",
"O X \n",
"X X \n",
" O \n",
"on move: X\n",
"O X \n",
"X X \n",
" O O \n",
"on move: O\n",
"O X \n",
"X X X \n",
" O O \n",
"Episode 12 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
"O O \n",
"X \n",
" X \n",
"on move: O\n",
"O O \n",
"X X \n",
" X \n",
"on move: X\n",
"O O \n",
"X X O \n",
" X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
" X \n",
"Episode 13 is done\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
"O \n",
" \n",
"on move: O\n",
"X X \n",
"O \n",
" \n",
"on move: X\n",
"X X \n",
"O \n",
"O \n",
"on move: O\n",
"X X \n",
"O X \n",
"O \n",
"on move: X\n",
"X X \n",
"O X \n",
"O O \n",
"on move: O\n",
"X X X \n",
"O X \n",
"O O \n",
"Episode 14 is done\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
" O \n",
"on move: O\n",
" \n",
"X \n",
"X O \n",
"on move: X\n",
" \n",
"X O \n",
"X O \n",
"on move: O\n",
" \n",
"X O \n",
"X X O \n",
"on move: X\n",
" O \n",
"X O \n",
"X X O \n",
"on move: O\n",
" X O \n",
"X O \n",
"X X O \n",
"on move: X\n",
" X O \n",
"X O O \n",
"X X O \n",
"Episode 15 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X X \n",
" \n",
"O O \n",
"Episode 16 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
"X O \n",
" X \n",
"on move: X\n",
" \n",
"X O O \n",
" X \n",
"on move: O\n",
" \n",
"X O O \n",
" X X \n",
"on move: X\n",
" \n",
"X O O \n",
"O X X \n",
"on move: O\n",
" X \n",
"X O O \n",
"O X X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"O X X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 17 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
"O \n",
"on move: O\n",
" \n",
" X \n",
"O X \n",
"on move: X\n",
" \n",
" X O \n",
"O X \n",
"on move: O\n",
"X \n",
" X O \n",
"O X \n",
"Episode 18 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" O X \n",
" \n",
" X \n",
"on move: X\n",
" O X \n",
" \n",
"O X \n",
"on move: O\n",
" O X \n",
" X \n",
"O X \n",
"on move: X\n",
"O O X \n",
" X \n",
"O X \n",
"on move: O\n",
"O O X \n",
" X \n",
"O X X \n",
"on move: X\n",
"O O X \n",
"O X \n",
"O X X \n",
"Episode 19 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
"O O \n",
" X \n",
" X \n",
"on move: O\n",
"O O \n",
" X \n",
" X X \n",
"on move: X\n",
"O O \n",
" X \n",
"O X X \n",
"on move: O\n",
"O O \n",
" X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"O X X \n",
"Episode 20 is done\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X X \n",
" \n",
" O \n",
"on move: X\n",
"X X \n",
" \n",
"O O \n",
"on move: O\n",
"X X \n",
" X \n",
"O O \n",
"on move: X\n",
"X X O \n",
" X \n",
"O O \n",
"on move: O\n",
"X X O \n",
" X X \n",
"O O \n",
"on move: X\n",
"X X O \n",
" X X \n",
"O O O \n",
"Episode 21 is done\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" \n",
"O \n",
"X \n",
"on move: O\n",
" \n",
"O X \n",
"X \n",
"on move: X\n",
" \n",
"O X \n",
"X O \n",
"on move: O\n",
" \n",
"O X X \n",
"X O \n",
"on move: X\n",
"O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O X \n",
"O X X \n",
"X O \n",
"on move: X\n",
"O X O \n",
"O X X \n",
"X O \n",
"on move: O\n",
"O X O \n",
"O X X \n",
"X O X \n",
"Episode 22 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" X \n",
"O \n",
"on move: X\n",
" X \n",
" O X \n",
"O \n",
"on move: O\n",
" X \n",
"X O X \n",
"O \n",
"on move: X\n",
" X \n",
"X O X \n",
"O O \n",
"on move: O\n",
" X X \n",
"X O X \n",
"O O \n",
"on move: X\n",
"O X X \n",
"X O X \n",
"O O \n",
"on move: O\n",
"O X X \n",
"X O X \n",
"O O X \n",
"Episode 23 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O X \n",
" X \n",
" \n",
"on move: X\n",
"O X \n",
" X \n",
" O \n",
"on move: O\n",
"O X \n",
"X X \n",
" O \n",
"on move: X\n",
"O X \n",
"X X \n",
" O O \n",
"on move: O\n",
"O X X \n",
"X X \n",
" O O \n",
"on move: X\n",
"O X X \n",
"X O X \n",
" O O \n",
"Episode 24 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
" \n",
"O X \n",
"on move: X\n",
" X \n",
" O \n",
"O X \n",
"on move: O\n",
" X \n",
" O \n",
"O X X \n",
"on move: X\n",
"O X \n",
" O \n",
"O X X \n",
"on move: O\n",
"O X \n",
"X O \n",
"O X X \n",
"on move: X\n",
"O X O \n",
"X O \n",
"O X X \n",
"Episode 25 is done\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" \n",
"X \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
" X \n",
"X \n",
"O O \n",
"on move: O\n",
" X \n",
"X X \n",
"O O \n",
"on move: X\n",
" O X \n",
"X X \n",
"O O \n",
"on move: O\n",
" O X \n",
"X X X \n",
"O O \n",
"Episode 26 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" O \n",
" \n",
" X \n",
"on move: O\n",
" X O \n",
" \n",
" X \n",
"on move: X\n",
" X O \n",
" O \n",
" X \n",
"on move: O\n",
" X O \n",
"X O \n",
" X \n",
"on move: X\n",
" X O \n",
"X O \n",
"O X \n",
"Episode 27 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
"O X \n",
"on move: O\n",
" \n",
" \n",
"O X X \n",
"on move: X\n",
" O \n",
" \n",
"O X X \n",
"on move: O\n",
" O \n",
" X \n",
"O X X \n",
"on move: X\n",
"O O \n",
" X \n",
"O X X \n",
"on move: O\n",
"O O \n",
" X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"O X X \n",
"Episode 28 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
"O \n",
" \n",
" X \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
"O X \n",
" X \n",
"on move: O\n",
"O \n",
"O X \n",
"X X \n",
"on move: X\n",
"O \n",
"O X \n",
"X O X \n",
"on move: O\n",
"O \n",
"O X X \n",
"X O X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"X O X \n",
"on move: O\n",
"O O X \n",
"O X X \n",
"X O X \n",
"Episode 29 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" X O \n",
" X \n",
"on move: X\n",
" \n",
" X O \n",
" O X \n",
"on move: O\n",
" \n",
" X O \n",
"X O X \n",
"on move: X\n",
" \n",
"O X O \n",
"X O X \n",
"on move: O\n",
"X \n",
"O X O \n",
"X O X \n",
"Episode 30 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
"O \n",
" X \n",
"on move: O\n",
"X \n",
"O \n",
" X \n",
"on move: X\n",
"X \n",
"O O \n",
" X \n",
"on move: O\n",
"X \n",
"O O \n",
" X X \n",
"on move: X\n",
"X O \n",
"O O \n",
" X X \n",
"on move: O\n",
"X O \n",
"O O X \n",
" X X \n",
"on move: X\n",
"X O \n",
"O O X \n",
"O X X \n",
"Episode 31 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" \n",
" X O \n",
"on move: O\n",
" X \n",
" \n",
" X O \n",
"on move: X\n",
" X \n",
" O \n",
" X O \n",
"on move: O\n",
" X \n",
" O X \n",
" X O \n",
"on move: X\n",
" X \n",
"O O X \n",
" X O \n",
"on move: O\n",
"X X \n",
"O O X \n",
" X O \n",
"on move: X\n",
"X X \n",
"O O X \n",
"O X O \n",
"on move: O\n",
"X X X \n",
"O O X \n",
"O X O \n",
"Episode 32 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
"X O \n",
"on move: X\n",
" \n",
"O X \n",
"X O \n",
"on move: O\n",
" \n",
"O X \n",
"X X O \n",
"on move: X\n",
"O \n",
"O X \n",
"X X O \n",
"on move: O\n",
"O X \n",
"O X \n",
"X X O \n",
"Episode 33 is done\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X O \n",
" \n",
" \n",
"on move: O\n",
"X X O \n",
" \n",
" \n",
"on move: X\n",
"X X O \n",
"O \n",
" \n",
"on move: O\n",
"X X O \n",
"O \n",
" X \n",
"on move: X\n",
"X X O \n",
"O \n",
" O X \n",
"on move: O\n",
"X X O \n",
"O X \n",
" O X \n",
"Episode 34 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X \n",
" O \n",
"on move: O\n",
" \n",
" X \n",
" X O \n",
"on move: X\n",
"O \n",
" X \n",
" X O \n",
"on move: O\n",
"O \n",
"X X \n",
" X O \n",
"on move: X\n",
"O O \n",
"X X \n",
" X O \n",
"on move: O\n",
"O O \n",
"X X \n",
"X X O \n",
"on move: X\n",
"O O \n",
"X O X \n",
"X X O \n",
"Episode 35 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O X \n",
" X \n",
"on move: X\n",
"O \n",
" O X \n",
" X \n",
"on move: O\n",
"O \n",
" O X \n",
" X X \n",
"on move: X\n",
"O O \n",
" O X \n",
" X X \n",
"on move: O\n",
"O O \n",
" O X \n",
"X X X \n",
"Episode 36 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
" X X \n",
"on move: X\n",
" \n",
" O O \n",
" X X \n",
"on move: O\n",
" \n",
"X O O \n",
" X X \n",
"on move: X\n",
" O \n",
"X O O \n",
" X X \n",
"on move: O\n",
" O \n",
"X O O \n",
"X X X \n",
"Episode 37 is done\n",
"on move: O\n",
" \n",
" \n",
"X \n",
"on move: X\n",
" O \n",
" \n",
"X \n",
"on move: O\n",
" O \n",
" \n",
"X X \n",
"on move: X\n",
" O \n",
"O \n",
"X X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
" O \n",
"O X O \n",
"X X \n",
"on move: O\n",
"X O \n",
"O X O \n",
"X X \n",
"Episode 38 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
"X X \n",
" \n",
"on move: X\n",
"O \n",
"X X \n",
" O \n",
"on move: O\n",
"O \n",
"X X \n",
" O X \n",
"on move: X\n",
"O \n",
"X X O \n",
" O X \n",
"on move: O\n",
"O \n",
"X X O \n",
"X O X \n",
"on move: X\n",
"O O \n",
"X X O \n",
"X O X \n",
"on move: O\n",
"O X O \n",
"X X O \n",
"X O X \n",
"Episode 39 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
"O X \n",
" X \n",
"on move: O\n",
" O \n",
"O X \n",
"X X \n",
"on move: X\n",
"O O \n",
"O X \n",
"X X \n",
"on move: O\n",
"O O \n",
"O X \n",
"X X X \n",
"Episode 40 is done\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
" O \n",
"X \n",
" X \n",
"on move: X\n",
" O \n",
"X \n",
"O X \n",
"on move: O\n",
"X O \n",
"X \n",
"O X \n",
"on move: X\n",
"X O \n",
"X O \n",
"O X \n",
"on move: O\n",
"X O \n",
"X O X \n",
"O X \n",
"on move: X\n",
"X O \n",
"X O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"X O X \n",
"O X O \n",
"Episode 41 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X X \n",
" \n",
"on move: X\n",
" O \n",
" X X \n",
"O \n",
"on move: O\n",
" O \n",
" X X \n",
"O X \n",
"on move: X\n",
" O \n",
"O X X \n",
"O X \n",
"on move: O\n",
" O \n",
"O X X \n",
"O X X \n",
"on move: X\n",
"O O \n",
"O X X \n",
"O X X \n",
"Episode 42 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" \n",
" X O \n",
" \n",
"on move: O\n",
" X \n",
" X O \n",
" \n",
"on move: X\n",
" X O \n",
" X O \n",
" \n",
"on move: O\n",
" X O \n",
"X X O \n",
" \n",
"on move: X\n",
" X O \n",
"X X O \n",
" O \n",
"on move: O\n",
"X X O \n",
"X X O \n",
" O \n",
"on move: X\n",
"X X O \n",
"X X O \n",
"O O \n",
"on move: O\n",
"X X O \n",
"X X O \n",
"O O X \n",
"Episode 43 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
" O \n",
" X \n",
" \n",
"on move: O\n",
" O \n",
" X \n",
" X \n",
"on move: X\n",
" O \n",
" X \n",
" X O \n",
"on move: O\n",
" O X \n",
" X \n",
" X O \n",
"on move: X\n",
" O X \n",
"O X \n",
" X O \n",
"on move: O\n",
"X O X \n",
"O X \n",
" X O \n",
"on move: X\n",
"X O X \n",
"O X \n",
"O X O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"O X O \n",
"Episode 44 is done\n",
"on move: O\n",
" \n",
"X \n",
" \n",
"on move: X\n",
" O \n",
"X \n",
" \n",
"on move: O\n",
"X O \n",
"X \n",
" \n",
"on move: X\n",
"X O \n",
"X O \n",
" \n",
"on move: O\n",
"X O \n",
"X X O \n",
" \n",
"on move: X\n",
"X O O \n",
"X X O \n",
" \n",
"on move: O\n",
"X O O \n",
"X X O \n",
"X \n",
"Episode 45 is done\n",
"on move: O\n",
" \n",
" \n",
" X \n",
"on move: X\n",
" \n",
" O \n",
" X \n",
"on move: O\n",
" \n",
" O \n",
" X X \n",
"on move: X\n",
" \n",
" O O \n",
" X X \n",
"on move: O\n",
" \n",
" O O \n",
"X X X \n",
"Episode 46 is done\n",
"on move: O\n",
" \n",
" X \n",
" \n",
"on move: X\n",
"O \n",
" X \n",
" \n",
"on move: O\n",
"O \n",
" X \n",
" X \n",
"on move: X\n",
"O \n",
" X O \n",
" X \n",
"on move: O\n",
"O \n",
" X O \n",
" X X \n",
"on move: X\n",
"O O \n",
" X O \n",
" X X \n",
"on move: O\n",
"O O \n",
"X X O \n",
" X X \n",
"on move: X\n",
"O O O \n",
"X X O \n",
" X X \n",
"Episode 47 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X O \n",
" \n",
" \n",
"on move: O\n",
" X O \n",
" X \n",
" \n",
"on move: X\n",
" X O \n",
" X \n",
"O \n",
"on move: O\n",
" X O \n",
" X \n",
"O X \n",
"on move: X\n",
" X O \n",
"O X \n",
"O X \n",
"on move: O\n",
" X O \n",
"O X X \n",
"O X \n",
"on move: X\n",
" X O \n",
"O X X \n",
"O O X \n",
"on move: O\n",
"X X O \n",
"O X X \n",
"O O X \n",
"Episode 48 is done\n",
"on move: O\n",
" X \n",
" \n",
" \n",
"on move: X\n",
" X \n",
" \n",
"O \n",
"on move: O\n",
" X \n",
"X \n",
"O \n",
"on move: X\n",
"O X \n",
"X \n",
"O \n",
"on move: O\n",
"O X \n",
"X \n",
"O X \n",
"on move: X\n",
"O X \n",
"X O \n",
"O X \n",
"on move: O\n",
"O X \n",
"X O \n",
"O X X \n",
"on move: X\n",
"O X \n",
"X O O \n",
"O X X \n",
"on move: O\n",
"O X X \n",
"X O O \n",
"O X X \n",
"Episode 49 is done\n",
"on move: O\n",
"X \n",
" \n",
" \n",
"on move: X\n",
"X \n",
" \n",
" O \n",
"on move: O\n",
"X \n",
" X \n",
" O \n",
"on move: X\n",
"X O \n",
" X \n",
" O \n",
"on move: O\n",
"X O \n",
" X \n",
"X O \n",
"on move: X\n",
"X O \n",
" X \n",
"X O O \n",
"on move: O\n",
"X O X \n",
" X \n",
"X O O \n",
"on move: X\n",
"X O X \n",
"O X \n",
"X O O \n",
"on move: O\n",
"X O X \n",
"O X X \n",
"X O O \n",
"Episode 50 is done\n"
]
}
],
"source": [
"# Создание среды для игры в крестики-нолики\n",
"myenv = TicTacToeEnv()\n",
"\n",
"# Создание агента\n",
"agent = MyAgent(symbol=1)\n",
"\n",
"num_episodes = 50 # Количество эпизодов (игр) для обучения\n",
"collected_rewards = [] # Список для хранения наград/побед в каждом эпизоде \n",
"\n",
"# Переменная для отслеживания символа и текущего игрока\n",
"current_player = 1\n",
"\n",
"for i in range(num_episodes):\n",
" # Сброс среды и начало нового эпизода\n",
" state, _ = myenv.reset() \n",
"\n",
" # Общая награда за эпизод\n",
" total_reward = 0\n",
"\n",
" # Флаг завершения игры\n",
" done = False\n",
" player = current_player \n",
"\n",
" # Максимум 9 ходов, поскольку поле 3x3 \n",
" for j in range(9): \n",
" moves = myenv.move_generator() \n",
"\n",
" # Ходов нет, заканчиваем игру\n",
" if not moves:\n",
" break\n",
"\n",
" \n",
" if len(moves) == 1:\n",
" move = moves[0] \n",
" else:\n",
" move = agent.get_action(moves) \n",
"\n",
" # Выполнение хода и обновление состояния игры\n",
" next_state, done, info = myenv.step(move)\n",
" state = next_state\n",
"\n",
" # Отображаем текущее состояние игры\n",
" myenv.render()\n",
"\n",
" if done:\n",
" break\n",
"\n",
" player = -player # Смена игрока\n",
"\n",
" print(f\"Episode {i+1} is done\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}