Compare commits

...

47 Commits

Author SHA1 Message Date
d
d2874ac257 7 laba 2024-01-13 18:38:15 +04:00
1cd312ba98 Merge pull request 'gordeeva_anna_lab_6' (#291) from gordeeva_anna_lab_6 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/291
2024-01-10 09:40:07 +04:00
ea025d0b4a Merge pull request 'romanova_adelina_lab_7 is ready' (#290) from romanova_adelina_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/290
2024-01-10 09:39:54 +04:00
43ec6863e4 Merge pull request 'gordeeva_anna_lab7' (#292) from gordeeva_anna_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/292
2024-01-10 09:39:35 +04:00
Meowweasy
e88d1e7fcd itog 2024-01-09 16:55:23 +04:00
Meowweasy
915ec905c6 itog 2024-01-09 16:02:55 +04:00
4bdc8ea733 Merge pull request 'istyukov_timofey_lab_2 is ready' (#287) from istyukov_timofey_lab_2 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/287
2024-01-09 11:30:11 +04:00
236483abf1 Merge pull request 'istyukov_timofey_lab_3 is ready' (#288) from istyukov_timofey_lab_3 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/288
2024-01-09 11:29:58 +04:00
8673873a52 Merge pull request 'istyukov_timofey_lab_4 is ready' (#289) from istyukov_timofey_lab_4 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/289
2024-01-09 11:29:35 +04:00
5035ca66da create README 2024-01-07 13:56:06 +04:00
Ctrl-Tim
b9503c8388 create README 2024-01-07 04:58:17 +04:00
Ctrl-Tim
7c2ddd98f6 create README 2024-01-07 04:56:54 +04:00
Ctrl-Tim
be4ae51c11 commit 1 2024-01-07 04:56:43 +04:00
Ctrl-Tim
5a83f61bd4 create README 2024-01-06 00:54:29 +04:00
2efc882a02 change README 2024-01-04 22:41:18 +04:00
Ctrl-Tim
676080d48d create README 2024-01-04 22:38:19 +04:00
Ctrl-Tim
96b55c6711 commit 1 2024-01-04 22:38:07 +04:00
4498fb5531 Merge pull request 'arzamaskina_milana_lab_7 is ready' (#282) from arzamaskina_milana_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/282
2023-12-28 10:37:15 +04:00
fdda9659eb Merge pull request 'zhukova_alina_lab_7 is ready' (#283) from zhukova_alina_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/283
2023-12-28 10:37:00 +04:00
099679a413 Merge pull request 'kochkareva_elizaveta_lab_7 is ready' (#284) from kochkareva_elizaveta_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/284
2023-12-28 10:36:37 +04:00
dfa7f803fd Merge pull request 'degtyarev_mikhail_lab_7_is_ready' (#286) from degtyarev_mikhail_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/286
2023-12-28 10:36:23 +04:00
c527892559 Merge pull request 'basharin_sevastyan_lab_7' (#281) from basharin_sevastyan_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/281
2023-12-28 10:35:14 +04:00
d915c4d712 Merge pull request 'romanova_adelina_lab_6 is ready' (#280) from romanova_adelina_lab_6 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/280
2023-12-28 10:34:49 +04:00
b5fc91cfdb Merge pull request 'degtyarev_mikhail_lab_6_is_ready' (#285) from degtyarev_mikhail_lab_6 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/285
2023-12-28 10:32:58 +04:00
ac68008d93 Merge pull request 'romanova_adelina_lab_5 is ready' (#279) from romanova_adelina_lab_5 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/279
2023-12-28 10:32:44 +04:00
75e614aa55 Merge pull request 'romanova_adelina_lab_4 is ready' (#278) from romanova_adelina_lab_4 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/278
2023-12-28 10:32:31 +04:00
45dc8c70ea Merge pull request 'romanova_adelina_lab_3 is ready' (#277) from romanova_adelina_lab_3 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/277
2023-12-28 10:30:23 +04:00
86d0b82b5a Merge pull request 'kutygin_andrey_lab_3_ready' (#151) from kutygin_andrey_lab_3 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/151
2023-12-28 10:30:05 +04:00
23e62553d2 Merge pull request 'istyukov_timofey_lab_1 is ready' (#276) from istyukov_timofey_lab_1 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/276
2023-12-28 10:28:51 +04:00
7d5463198a romanova_adelina_lab_7 is ready 2023-12-25 01:19:51 +04:00
b04582b80e degtyarev_mikhail_lab_7_is_ready 2023-12-23 02:04:24 +04:00
d8470fb939 degtyarev_mikhail_lab_6_is_ready 2023-12-23 01:05:21 +04:00
04c0621a05 zhukova_alina_lab_7 is ready 2023-12-18 19:25:37 +04:00
0ce611b443 arzamaskina_milana_lab_7 is ready 2023-12-18 00:28:59 +04:00
fc5942cdb1 basharin_sevastyan_lab_7 is ready 2023-12-14 22:33:48 +04:00
68d1b445a2 romanova_adelina_lab_6 is ready 2023-12-11 16:58:00 +04:00
eb27f1410a romanova_adelina_lab_5 is ready 2023-12-11 14:07:16 +04:00
4bec95e80f romanova_adelina_lab_4 is ready 2023-12-11 12:57:15 +04:00
d0c010c491 romanova_adelina_lab_3 is ready 2023-12-11 12:28:44 +04:00
Ctrl-Tim
790641d82f create README 2023-12-10 16:01:29 +04:00
Ctrl-Tim
ccc3352aa2 commit 3 2023-12-10 15:42:09 +04:00
Ctrl-Tim
fece83fa1a commit 2 2023-12-10 15:35:33 +04:00
Ctrl-Tim
ba4a6f1402 commit 1 2023-12-10 14:22:51 +04:00
71b16e78b7 в процессе 2023-12-07 22:35:52 +04:00
97493ae413 fix part2 2023-12-07 21:30:41 +04:00
b58da2aab4 fix part1 2023-12-07 21:30:27 +04:00
4f479043f1 kutygin_andrey_lab_3_ready 2023-11-13 20:58:11 +04:00
95 changed files with 107040 additions and 23 deletions

143
.idea/workspace.xml generated
View File

@@ -4,10 +4,9 @@
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="0ceb130e-88da-4a20-aad6-17f5ab4226ac" name="Changes" comment="">
<change beforePath="$PROJECT_DIR$/.idea/IIS_2023_1.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/IIS_2023_1.iml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<list default="true" id="0ceb130e-88da-4a20-aad6-17f5ab4226ac" name="Changes" comment="commit 3">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/istyukov_timofey_lab1/lab1.py" beforeDir="false" afterPath="$PROJECT_DIR$/istyukov_timofey_lab1/lab1.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -43,29 +42,50 @@
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;WebServerToolWindowFactoryState&quot;: &quot;false&quot;,
&quot;git-widget-placeholder&quot;: &quot;senkin__alexander__lab__1&quot;,
&quot;last_opened_file_path&quot;: &quot;D:/ulstukek/Course4/IIS/labs&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;,
&quot;settings.editor.selected.configurable&quot;: &quot;reference.settings.ide.settings.new.ui&quot;,
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
<component name="PropertiesComponent"><![CDATA[{
"keyToString": {
"RunOnceActivity.OpenProjectViewOnStart": "true",
"RunOnceActivity.ShowReadmeOnStart": "true",
"WebServerToolWindowFactoryState": "false",
"git-widget-placeholder": "senkin__alexander__lab__1",
"last_opened_file_path": "D:/ulstukek/Course4/IIS/labs",
"node.js.detected.package.eslint": "true",
"node.js.detected.package.tslint": "true",
"node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
"settings.editor.selected.configurable": "preferences.sourceCode",
"vue.rearranger.settings.migration": "true"
}
}</component>
}]]></component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="D:\ulstukek\Course4\IIS\IISLabs\IIS_2023_1\zavrazhnova_svetlana_lab_3" />
<recent name="D:\ulstukek\Course4\IIS\IISLabs\IIS_2023_1\zavrazhnova_svetlana_lab_1" />
</key>
</component>
<component name="RunManager">
<component name="RunManager" selected="Python.lab1">
<configuration name="lab1" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="IIS_2023_1" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="E:\Programms\Python\python.exe" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/istyukov_timofey_lab1" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/istyukov_timofey_lab1/lab1.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="zavrazhnova_svetlana_lab3_2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="IIS_2023_1" />
<option name="INTERPRETER_OPTIONS" value="" />
@@ -132,13 +152,19 @@
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<list>
<item itemvalue="Python.lab1" />
<item itemvalue="Python.zavrazhnova_svetlana_lab3_2" />
<item itemvalue="Python.zavrazhnova_svetlana_lab_2" />
<item itemvalue="Python.zavrazhnova_svetlana_lab_3_1" />
</list>
<recent_temporary>
<list>
<item itemvalue="Python.lab1" />
<item itemvalue="Python.zavrazhnova_svetlana_lab_3_1" />
<item itemvalue="Python.zavrazhnova_svetlana_lab_2" />
<item itemvalue="Python.zavrazhnova_svetlana_lab3_2" />
<item itemvalue="Python.zavrazhnova_svetlana_lab3_2" />
<item itemvalue="Python.zavrazhnova_svetlana_lab_3_1" />
</list>
</recent_temporary>
</component>
@@ -153,6 +179,35 @@
<workItem from="1697735437405" duration="1706000" />
<workItem from="1697740229646" duration="3802000" />
</task>
<task id="LOCAL-00001" summary="commit 1">
<created>1702203771661</created>
<option name="number" value="00001" />
<option name="presentableId" value="LOCAL-00001" />
<option name="project" value="LOCAL" />
<updated>1702203771661</updated>
</task>
<task id="LOCAL-00002" summary="commit 2">
<created>1702208133904</created>
<option name="number" value="00002" />
<option name="presentableId" value="LOCAL-00002" />
<option name="project" value="LOCAL" />
<updated>1702208133904</updated>
</task>
<task id="LOCAL-00003" summary="create README">
<created>1702208193675</created>
<option name="number" value="00003" />
<option name="presentableId" value="LOCAL-00003" />
<option name="project" value="LOCAL" />
<updated>1702208193675</updated>
</task>
<task id="LOCAL-00004" summary="commit 3">
<created>1702208529340</created>
<option name="number" value="00004" />
<option name="presentableId" value="LOCAL-00004" />
<option name="project" value="LOCAL" />
<updated>1702208529340</updated>
</task>
<option name="localTasksCounter" value="5" />
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
@@ -169,7 +224,14 @@
<entry key="branch">
<value>
<list>
<option value="HEAD" />
<option value="istyukov_timofey_lab_1" />
</list>
</value>
</entry>
<entry key="user">
<value>
<list>
<option value="*" />
</list>
</value>
</entry>
@@ -180,8 +242,43 @@
</entry>
</map>
</option>
<option name="RECENT_FILTERS">
<map>
<entry key="Branch">
<value>
<list>
<RecentGroup>
<option name="FILTER_VALUES">
<option value="istyukov_timofey_lab_1" />
</option>
</RecentGroup>
<RecentGroup>
<option name="FILTER_VALUES">
<option value="HEAD" />
</option>
</RecentGroup>
</list>
</value>
</entry>
<entry key="User">
<value>
<list>
<RecentGroup>
<option name="FILTER_VALUES">
<option value="*" />
</option>
</RecentGroup>
</list>
</value>
</entry>
</map>
</option>
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/PyCharmProjects$senkin_alexander_lab_1.coverage" NAME="senkin_alexander_lab_1 Coverage Results" MODIFIED="1697744262965" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/senkin_alexander_lab_1" />
<component name="VcsManagerConfiguration">
<MESSAGE value="commit 1" />
<MESSAGE value="commit 2" />
<MESSAGE value="create README" />
<MESSAGE value="commit 3" />
<option name="LAST_COMMIT_MESSAGE" value="commit 3" />
</component>
</project>

View File

@@ -0,0 +1,46 @@
# Лабораторная работа №7
## Рекуррентная нейронная сеть и задача генерации текста
#### ПИбд-41 Арзамаскина Милана
#### Вариант №2
### Какие технологии использовались:
Используемые библиотеки:
* numpy
* keras
* tensorflow
### Как запустить:
* установить python, numpy, keras, tensorflow
* запустить проект (стартовая точка - main.py)
### Что делает программа:
На основе выбранных художественных текстов происходит обучение рекуррентной нейронной сети для решения задачи генерации.
Необходимо подобрать архитектуру и параметры так, чтобы приблизиться к максимально осмысленному результату.
* Читает текст из файлов (english.txt, russian.txt)
* Получает входные, выходные данные (X, y), размер словаря и токенайзер. Используем Tokenizer с настройкой char_level=True
* Создаёт объект Sequential (последовательная рекуррентная нейронная сеть) и добавление двух слоёв LSTM. Dropout — это метод регуляризации для нейронных сетей и моделей глубокого обучения, решение проблемы переобучения. Слой Dense с функцией активации softmax используется для предсказания следующего слова
* Компилирует модель
* Обучает модель
* Генерирует текст
#### Сгенерированные тексты:
Генерация на русском языке:
![Result](img1.png)
Генерация на английском языке:
![Result](img2.png)
### Вывод:
Программа способна сгенерировать осмысленный текст в каждом из случаев.

View File

@@ -0,0 +1,8 @@
The cloud shuddered with blue flame. Thunder rumbled slowly.
It either intensified or almost died down. And the rain, obeying the thunder, began to fall harder at times and rustle widely through the leaves, then stopped.
Soon the sun broke through the clouds. The old Pushkin Park in Mikhailovskoye and the steep banks of Soroti were ablaze with red clay and wet grass.
A slender rainbow lit up across the cloudy distance. It sparkled and smoked, surrounded by wisps of ashen clouds.
The rainbow looked like an arch erected on the border of a protected land. Here, in Pushkins places, thoughts about the Russian language arose with particular force.
Here Pushkin wandered with his head uncovered, with his cold hair tangled by the autumn wind, listening to the wet hum of the pine tops, looking, squinting,
from where the autumn clouds rush, I rushed around the fairs. Here wonderful words overwhelmed him, oppressed his soul and, finally, were composed, one by one, with the stub of a goose feather, into ringing stanzas.

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

View File

@@ -0,0 +1,62 @@
import numpy as np
from keras.layers import LSTM, Dense
from keras.models import Sequential
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
# Чтение текста из файла
# with open('russian.txt', 'r', encoding='utf-8') as file:
# text = file.read()
with open('english.txt', 'r', encoding='utf-8') as file:
text = file.read()
# Обучение Tokenizer на тексте
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts([text])
sequences = tokenizer.texts_to_sequences([text])[0]
# Создание x, y последовательностей
X_data, y_data = [], []
seq_length = 10
for i in range(seq_length, len(sequences)):
sequence = sequences[i - seq_length:i]
target = sequences[i]
X_data.append(sequence)
y_data.append(target)
# Преобразование в массивы
X_mass = pad_sequences(X_data, maxlen=seq_length)
y_mass = np.array(y_data)
# Создание модели
vocab_size = len(tokenizer.word_index) + 1
model = Sequential()
model.add(LSTM(256, input_shape=(seq_length, 1), return_sequences=True))
model.add(LSTM(128, input_shape=(seq_length, 1)))
model.add(Dense(vocab_size, activation='softmax'))
# Компиляция
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Обучение
model.fit(X_mass, y_mass, epochs=100, verbose=1)
# Функция генерации
def generate_text(_text, gen_length):
generated_text = _text
for _ in range(gen_length):
seq = tokenizer.texts_to_sequences([_text])[0]
seq = pad_sequences([seq], maxlen=seq_length)
prediction = model.predict(seq)[0]
predicted_index = np.argmax(prediction)
predicted_char = tokenizer.index_word[predicted_index]
generated_text += predicted_char
_text += predicted_char
_text = _text[1:]
return generated_text
# Генерация текста
# _text = "Она сверкала"
_text = "It sparkled and smoked"
generate_text = generate_text(_text, 250)
print(generate_text)

View File

@@ -0,0 +1,7 @@
Тучу передернуло синим пламенем. Медленно загремел гром.
Он то усиливался, то почти затихал. И дождь, подчиняясь грому, начал временами идти сильнее и широко шуметь по листве, потом останавливался.
Вскоре сквозь тучи пробилось солнце. Старый пушкинский парк в Михайловском и крутые берега Сороти запылали рыжей глиной и мокрой травой.
Стройная радуга зажглась нал пасмурной далью. Она сверкала и дымилась, окруженная космами пепельных туч.
Радуга была похожа на арку, воздвигнутую на границе заповедной земли. С особенной силой здесь, в пушкинских местах, возникали мысли о русском языке.
Здесь Пушкин бродил с непокрытой головой, со спутанными осенним ветром холодными волосами, слушал влажный гул сосновых вершин, смотрел, прищурившись,
откуда несутся осенние тучи, толкался по ярмаркам. Здесь чудесные слова переполняли его, стесняли его душу и, наконец, слагались по огрызком гусиного пера в звенящие строфы.

View File

@@ -0,0 +1,136 @@
Annotation
The Fellowship of the Ring is the first part of J.R.R.Tolkien's epic adventure, The Lord Of The Rings.
Sauron, the Dark Lord, has gathered to him all the Rings of Power - the means by which he intends to rule Middle-earth. All he lacks in his plans for dominion is the One Ring - the ring that rules them all - which has fallen into the hands of the hobbit Bilbo Baggins.
In a sleepy village in the Shire, young Frodo Baggins finds himself faced with an immense task, as his elderly cousin Bilbo entrusts the Ring to his care. Frodo must leave his home and make a perilous journey across Middle-earh to the Cracks of Doom, there to destroy the Ring and foil the Dark Lord in his evil purpose.
* * *
JRR Tolkien The Lord of the Ring 1 - The Fellowship of the Ring
Table of Contents
Foreward
This tale grew in the telling, until it became a history of the Great War of the Ring and included many glimpses of the yet more ancient history that preceded it. It was begun soon afterThe Hobbit was written and before its publication in 1937; but I did not go on with this sequel, for I wished first to complete and set in order the mythology and legends of the Elder Days, which had then been taking shape for some years. I desired to do this for my own satisfaction, and I had little hope that other people would be interested in this work, especially since it was primarily linguistic in inspiration and was begun in order to provide the necessary background of 'history' for Elvish tongues.
When those whose advice and opinion I sought correctedlittle hope tono hope, I went back to the sequel, encouraged by requests from readers for more information concerning hobbits and their adventures. But the story was drawn irresistibly towards the older world, and became an account, as it were, of its end and passing away before its beginning and middle had been told. The process had begun in the writing ofThe Hobbit, in which there were already some references to the older matter: Elrond, Gondolin, the High-elves, and the orcs, as well as glimpses that had arisen unbidden of things higher or deeper or darker than its surface: Durin, Moria, Gandalf, the Necromancer, the Ring. The discovery of the significance of these glimpses and of their relation to the ancient histories revealed the Third Age and its culmination in the War of the Ring.
Those who had asked for more information about hobbits eventually got it, but they had to wait a long time; for the composition ofThe Lord of the Rings went on at intervals during the years 1936 to 1949, a period in which I had many duties that I did not neglect, and many other interests as a learner and teacher that often absorbed me. The delay was, of course, also increased by the outbreak of war in 1939, by the end of which year the tale had not yet reached the end of Book One. In spite of the darkness of the next five years I found that the story could not now be wholly abandoned, and I plodded on, mostly by night, till I stood by Balin's tomb in Moria. There I halted for a long while. It was almost a year later when I went on and so came to Lothlorien and the Great River late in 1941. In the next year I wrote the first drafts of the matter that now stands as Book Three, and the beginnings of chapters I and III of Book Five; and there as the beacons flared in Anorien and Theoden came to Harrowdale I stopped. Foresight had failed and there was no time for thought.
It was during 1944 that, leaving the loose ends and perplexities of a war which it was my task to conduct, or at least to report, 1 forced myself to tackle the journey of Frodo to Mordor. These chapters, eventually to become Book Four, were written and sent out as a serial to my son, Christopher, then in South Africa with the RAF. Nonetheless it took another five years before the tale was brought to its present end; in that time I changed my house, my chair, and my college, and the days though less dark were no less laborious. Then when the 'end' had at last been reached the whole story had to be revised, and indeed largely re-written backwards. And it had to be typed, and re-typed: by me; the cost of professional typing by the ten-fingered was beyond my means.
The Lord of the Ringshas been read by many people since it finally appeared in print; and I should like to say something here with reference to the many opinions or guesses that I have received or have read concerning the motives and meaning of the tale. The prime motive was the desire of a tale-teller to try his hand at a really long story that would hold the attention of readers, amuse them, delight them, and at times maybe excite them or deeply move them. As a guide I had only my own feelings for what is appealing or moving, and for many the guide was inevitably often at fault. Some who have read the book, or at any rate have reviewed it, have found it boring, absurd, or contemptible; and I have no cause to complain, since I have similar opinions of their works, or of the kinds of writing that they evidently prefer. But even from the points of view of many who have enjoyed my story there is much that fails to please. It is perhaps not possible in a long tale to please everybody at all points, nor to displease everybody at the same points; for I find from the letters that I have received that the passages or chapters that are to some a blemish are all by others specially approved. The most critical reader of all, myself, now finds many defects, minor and major, but being fortunately under no obligation either to review the book or to write it again, he will pass over these in silence, except one that has been noted by others: the book is too short.
As for any inner meaning or 'message', it has in the intention of the author none. It is neither allegorical nor topical. As the story grew it put down roots (into the past) and threw out unexpected branches: but its main theme was settled from the outset by the inevitable choice of the Ring as the link between it andThe Hobbit. The crucial chapter, "The Shadow of the Past', is one of the oldest parts of the tale. It was written long before the foreshadow of 1939 had yet become a threat of inevitable disaster, and from that point the story would have developed along essentially the same lines, if that disaster had been averted. Its sources are things long before in mind, or in some cases already written, and little or nothing in it was modified by the war that began in 1939 or its sequels.
The real war does not resemble the legendary war in its process or its conclusion. If it had inspired or directed the development of the legend, then certainly the Ring would have been seized and used against Sauron; he would not have been annihilated but enslaved, and Barad-dur would not have been destroyed but occupied. Saruman, failing to get possession of the Ring, would m the confusion and treacheries of the time have found in Mordor the missing links in his own researches into Ring-lore, and before long he would have made a Great Ring of his own with which to challenge the self-styled Ruler of Middle-earth. In that conflict both sides would have held hobbits in hatred and contempt: they would not long have survived even as slaves.
Other arrangements could be devised according to the tastes or views of those who like allegory or topical reference. But I cordially dislike allegory in all its manifestations, and always have done so since I grew old and wary enough to detect its presence. I much prefer history, true or feigned, with its varied applicability to the thought and experience of readers. I think that many confuse 'applicability' with 'allegory'; but the one resides in the freedom of the reader, and the other in the purposed domination of the author.
An author cannot of course remain wholly unaffected by his experience, but the ways in which a story-germ uses the soil of experience are extremely complex, and attempts to define the process are at best guesses from evidence that is inadequate and ambiguous. It is also false, though naturally attractive, when the lives of an author and critic have overlapped, to suppose that the movements of thought or the events of times common to both were necessarily the most powerful influences. One has indeed personally to come under the shadow of war to feel fully its oppression; but as the years go by it seems now often forgotten that to be caught in youth by 1914 was no less hideous an experience than to be involved in 1939 and the following years. By 1918 all but one of my close friends were dead. Or to take a less grievous matter: it has been supposed by some that "The Scouring of the Shire' reflects the situation in England at the time when I was finishing my tale. It does not. It is an essential part of the plot, foreseen from the outset, though in the event modified by the character of Saruman as developed in the story without, need I say, any allegorical significance or contemporary political reference whatsoever. It has indeed some basis in experience, though slender (for the economic situation was entirely different), and much further back. The country in which I lived in childhood was being shabbily destroyed before I was ten, in days when motor-cars were rare objects (I had never seen one) and men were still building suburban railways. Recently I saw in a paper a picture of the last decrepitude of the once thriving corn-mill beside its pool that long ago seemed to me so important. I never liked the looks of the Young miller, but his father, the Old miller, had a black beard, and he was not named Sandyman.
The Lord of the Ringsis now issued in a new edition, and the opportunity has been taken of revising it. A number of errors and inconsistencies that still remained in the text have been corrected, and an attempt has been made to provide information on a few points which attentive readers have raised. I have considered all their comments and enquiries, and if some seem to have been passed over that may be because I have failed to keep my notes in order; but many enquiries could only be answered by additional appendices, or indeed by the production of an accessory volume containing much of the material that I did not include in the original edition, in particular more detailed linguistic information. In the meantime this edition offers this Foreword, an addition to the Prologue, some notes, and an index of the names of persons and places. This index is in intention complete in items but not in references, since for the present purpose it has been necessary to reduce its bulk. A complete index, making full use of the material prepared for me by Mrs. N. Smith, belongs rather to the accessory volume.
Prologue
This book is largely concerned with Hobbits, and from its pages a reader may discover much of their character and a little of their history. Further information will also be found in the selection from the Red Book of Westmarch that has already been published, under the title ofThe Hobbit . That story was derived from the earlier chapters of the Red Book, composed by Bilbo himself, the first Hobbit to become famous in the world at large, and called by himThere and Back Again, since they told of his journey into the East and his return: an adventure which later involved all the Hobbits in the great events of that Age that are here related.
Many, however, may wish to know more about this remarkable people from the outset, while some may not possess the earlier book. For such readers a few notes on the more important points are here collected from Hobbit-lore, and the first adventure is briefly recalled.
Hobbits are an unobtrusive but very ancient people, more numerous formerly than they are today; for they love peace and quiet and good tilled earth: a well-ordered and well-farmed countryside was their favourite haunt. They do not and did not understand or like machines more complicated than a forge-bellows, a water-mill, or a hand-loom, though they were skilful with tools. Even in ancient days they were, as a rule, shy of 'the Big Folk', as they call us, and now they avoid us with dismay and are becoming hard to find. They are quick of hearing and sharp-eyed, and though they are inclined to be fat and do not hurry unnecessarily, they are nonetheless nimble and deft in their movements. They possessed from the first the art of disappearing swiftly and silently, when large folk whom they do not wish to meet come blundering by; and this an they have developed until to Men it may seem magical. But Hobbits have never, in fact, studied magic of any kind, and their elusiveness is due solely to a professional skill that heredity and practice, and a close friendship with the earth, have rendered inimitable by bigger and clumsier races.
For they are a little people, smaller than Dwarves: less tout and stocky, that is, even when they are not actually much shorter. Their height is variable, ranging between two and four feet of our measure. They seldom now reach three feet; but they hive dwindled, they say, and in ancient days they were taller. According to the Red Book, Bandobras Took (Bullroarer), son of Isengrim the Second, was four foot five and able to ride a horse. He was surpassed in all Hobbit records only by two famous characters of old; but that curious matter is dealt with in this book.
As for the Hobbits of the Shire, with whom these tales are concerned, in the days of their peace and prosperity they were a merry folk. They dressed in bright colours, being notably fond of yellow and green; but they seldom wore shoes, since their feet had tough leathery soles and were clad in a thick curling hair, much like the hair of their heads, which was commonly brown. Thus, the only craft little practised among them was shoe-making; but they had long and skilful fingers and could make many other useful and comely things. Their faces were as a rule good-natured rather than beautiful, broad, bright-eyed, red-cheeked, with mouths apt to laughter, and to eating and drinking. And laugh they did, and eat, and drink, often and heartily, being fond of simple jests at all times, and of six meals a day (when they could get them). They were hospitable and delighted in parties, and in presents, which they gave away freely and eagerly accepted.
It is plain indeed that in spite of later estrangement Hobbits are relatives of ours: far nearer to us than Elves, or even than Dwarves. Of old they spoke the languages of Men, after their own fashion, and liked and disliked much the same things as Men did. But what exactly our relationship is can no longer be discovered. The beginning of Hobbits lies far back in the Elder Days that are now lost and forgotten. Only the Elves still preserve any records of that vanished time, and their traditions are concerned almost entirely with their own history, in which Men appear seldom and Hobbits are not mentioned at all. Yet it is clear that Hobbits had, in fact, lived quietly in Middle-earth for many long years before other folk became even aware of them. And the world being after all full of strange creatures beyond count, these little people seemed of very little importance. But in the days of Bilbo, and of Frodo his heir, they suddenly became, by no wish of their own, both important and renowned, and troubled the counsels of the Wise and the Great.
Those days, the Third Age of Middle-earth, are now long past, and the shape of all lands has been changed; but the regions in which Hobbits then lived were doubtless the same as those in which they still linger: the North-West of the Old World, east of the Sea. Of their original home the Hobbits in Bilbo's time preserved no knowledge. A love of learning (other than genealogical lore) was far from general among them, but there remained still a few in the older families who studied their own books, and even gathered reports of old times and distant lands from Elves, Dwarves, and Men. Their own records began only after the settlement of the Shire, and their most ancient legends hardly looked further back than their Wandering Days. It is clear, nonetheless, from these legends, and from the evidence of their peculiar words and customs, that like many other folk Hobbits had in the distant past moved westward. Their earliest tales seem to glimpse a time when they dwelt in the upper vales of Anduin, between the eaves of Greenwood the Great and the Misty Mountains. Why they later undertook the hard and perilous crossing of the mountains into Eriador is no longer certain. Their own accounts speak of the multiplying of Men in the land, and of a shadow that fell on the forest, so that it became darkened and its new name was Mirkwood.
Before the crossing of the mountains the Hobbits had already become divided into three somewhat different breeds: Harfoots, Stoors, and Fallohides. The Harfoots were browner of skin, smaller, and shorter, and they were beardless and bootless; their hands and feet were neat and nimble; and they preferred highlands and hillsides. The Stoors were broader, heavier in build; their feet and hands were larger, and they preferred flat lands and riversides. The Fallohides were fairer of skin and also of hair, and they were taller and slimmer than the others; they were lovers of trees and of woodlands.
The Harfoots had much to do with Dwarves in ancient times, and long lived in the foothills of the mountains. They moved westward early, and roamed over Eriador as far as Weathertop while the others were still in the Wilderland. They were the most normal and representative variety of Hobbit, and far the most numerous. They were the most inclined to settle in one place, and longest preserved their ancestral habit of living in tunnels and holes.
The Stoors lingered long by the banks of the Great River Anduin, and were less shy of Men. They came west after the Harfoots and followed the course of the Loudwater southwards; and there many of them long dwelt between Tharbad and the borders of Dunland before they moved north again.
The Fallohides, the least numerous, were a northerly branch. They were more friendly with Elves than the other Hobbits were, and had more skill in language and song than in handicrafts; and of old they preferred hunting to tilling. They crossed the mountains north of Rivendell and came down the River Hoarwell. In Eriador they soon mingled with the other kinds that had preceded them, but being somewhat bolder and more adventurous, they were often found as leaders or chieftains among clans of Harfoots or Stoors. Even in Bilbo's time the strong Fallohidish strain could still be noted among the greater families, such as the Tooks and the Masters of Buckland.
In the westlands of Eriador, between the Misty Mountains and the Mountains of Lune, the Hobbits found both Men and Elves. Indeed, a remnant still dwelt there of the Dunedain, the kings of Men that came over the Sea out of Westernesse; but they were dwindling fast and the lands of their North Kingdom were falling far and wide into waste. There was room and to spare for incomers, and ere long the Hobbits began to settle in ordered communities. Most of their earlier settlements had long disappeared and been forgotten in Bilbo's time; but one of the first to become important still endured, though reduced in size; this was at Bree and in the Chetwood that lay round about, some forty miles east of the Shire.
It was in these early days, doubtless, that the Hobbits learned their letters and began to write after the manner of the Dunedain, who had in their turn long before learned the art from the Elves. And in those days also they forgot whatever languages they had used before, and spoke ever after the Common Speech, the Westron as it was named, that was current through all the lands of the kings from Arnor to Gondor, and about all the coasts of the Sea from Belfalas to Lune. Yet they kept a few words of their own, as well as their own names of months and days, and a great store of personal names out of the past.
About this time legend among the Hobbits first becomes history with a reckoning of years. For it was in the one thousand six hundred and first year of the Third Age that the Fallohide brothers, Marcho and Blanco, set out from Bree; and having obtained permission from the high king at Fornost, they crossed the brown river Baranduin with a great following of Hobbits. They passed over the Bridge of Stonebows, that had been built in the days of the power of the North Kingdom, and they took ail the land beyond to dwell in, between the river and the Far Downs. All that was demanded of them was that they should keep the Great Bridge in repair, and all other bridges and roads, speed the king's messengers, and acknowledge his lordship.
Thus began theShire-reckoning, for the year of the crossing of the Brandywine (as the Hobbits turned the name) became Year One of the Shire, and all later dates were reckoned from it. At once the western Hobbits fell in love with their new land, and they remained there, and soon passed once more out of the history of Men and of Elves. While there was still a king they were in name his subjects, but they were, in fact, ruled by their own chieftains and meddled not at all with events in the world outside. To the last battle at Fornost with the Witch-lord of Angmar they sent some bowmen to the aid of the king, or so they maintained, though no tales of Men record it. But in that war the North Kingdom ended; and then the Hobbits took the land for their own, and they chose from their own chiefs a Thain to hold the authority of the king that was gone. There for a thousand years they were little troubled by wars, and they prospered and multiplied after the Dark Plague (S.R. 37) until the disaster of the Long Winter and the famine that followed it. Many thousands then perished, but the Days of Dearth (1158-60) were at the time of this tale long past and the Hobbits had again become accustomed to plenty. The land was rich and kindly, and though it had long been deserted when they entered it, it had before been well tilled, and there the king had once had many farms, cornlands, vineyards, and woods.
Forty leagues it stretched from the Far Downs to the Brandywine Bridge, and fifty from the northern moors to the marshes in the south. The Hobbits named it the Shire, as the region of the authority of their Thain, and a district of well-ordered business; and there in that pleasant comer of the world they plied their well-ordered business of living, and they heeded less and less the world outside where dark things moved, until they came to think that peace and plenty were the rule in Middle-earth and the right of all sensible folk. They forgot or ignored what little they had ever known of the Guardians, and of the labours of those that made possible the long peace of the Shire. They were, in fact, sheltered, but they had ceased to remember it.
At no time had Hobbits of any kind been warlike, and they had never fought among themselves. In olden days they had, of course, been often obliged to fight to maintain themselves in a hard world; but in Bilbo's time that was very ancient history. The last battle, before this story opens, and indeed the only one that had ever been fought within the borders of the Shire, was beyond living memory: the Battle of Greenfields, S.R. 1147, in which Bandobras Took routed an invasion of Orcs. Even the weathers had grown milder, and the wolves that had once come ravening out of the North in bitter white winters were now only a grandfather's tale. So, though there was still some store of weapons in the Shire, these were used mostly as trophies, hanging above hearths or on walls, or gathered into the museum at Michel Delving. The Mathom-house it was called; for anything that Hobbits had no immediate use for, but were unwilling to throw away, they called amathom . Their dwellings were apt to become rather crowded with mathoms, and many of the presents that passed from hand to hand were of that son.
Nonetheless, ease and peace had left this people still curiously tough. They were, if it came to it, difficult to daunt or to kill; and they were, perhaps, so unwearyingly fond of good things not least because they could, when put to it, do without them, and could survive rough handling by grief, foe, or weather in a way that astonished those who did not know them well and looked no further than their bellies and their well-fed faces. Though slow to quarrel, and for sport killing nothing that lived, they were doughty at bay, and at need could still handle arms. They shot well with the bow, for they were keen-eyed and sure at the mark. Not only with bows and arrows. If any Hobbit stooped for a stone, it was well to get quickly under cover, as all trespassing beasts knew very well.
All Hobbits had originally lived in holes in the ground, or so they believed, and in such dwellings they still felt most at home; but in the course of time they had been obliged to adopt other forms of abode. Actually in the Shire in Bilbo's days it was, as a rule, only the richest and the poorest Hobbits that maintained the old custom. The poorest went on living in burrows of the most primitive kind, mere holes indeed, with only one window or none; while the well-to-do still constructed more luxurious versions of the simple diggings of old. But suitable sites for these large and ramifying tunnels (orsmials as they called them) were not everywhere to be found; and in the flats and the low-lying districts the Hobbits, as they multiplied, began to build above ground. Indeed, even in the hilly regions and the older villages, such as Hobbiton or Tuckborough, or in the chief township of the Shire, Michel Delving on the White Downs, there were now many houses of wood, brick, or stone. These were specially favoured by millers, smiths, ropers, and cartwrights, and others of that sort; for even when they had holes to live in. Hobbits had long been accustomed to build sheds and workshops.
The habit of building farmhouses and barns was said to have begun among the inhabitants of the Marish down by the Brandywine. The Hobbits of that quarter, the Eastfarthing, were rather large and heavy-legged, and they wore dwarf-boots in muddy weather. But they were well known to be Stoors in a large part of their blood, as indeed was shown by the down that many grew on their chins. No Harfoot or Fallohide had any trace of a beard. Indeed, the folk of the Marish, and of Buckland, east of the River, which they afterwards occupied, came for the most part later into the Shire up from south-away; and they still had many peculiar names and strange words not found elsewhere in the Shire.
It is probable that the craft of building, as many other crafts beside, was derived from the Dunedain. But the Hobbits may have learned it direct from the Elves, the teachers of Men in their youth. For the Elves of the High Kindred had not yet forsaken Middle-earth, and they dwelt still at that time at the Grey Havens away to the west, and in other places within reach of the Shire. Three Elf-towers of immemorial age were still to be seen on the Tower Hills beyond the western marches. They shone far off in the moonlight. The tallest was furthest away, standing alone upon a green mound. The Hobbits of the Westfarthing said that one could see the Sea from the lop of that tower; but no Hobbit had ever been known to climb it. Indeed, few Hobbits had ever seen or sailed upon the Sea, and fewer still had ever returned to report it. Most Hobbits regarded even rivers and small boats with deep misgivings, and not many of them could swim. And as the days of the Shire lengthened they spoke less and less with the Elves, and grew afraid of them, and distrustful of those that had dealings with them; and the Sea became a word of fear among them, and a token of death, and they turned their faces away from the hills in the west.
The craft of building may have come from Elves or Men, but the Hobbits used it in their own fashion. They did not go in for towers. Their houses were usually long, low, and comfortable. The oldest kind were, indeed, no more than built imitations ofsmials, thatched with dry grass or straw, or roofed with turves, and having walls somewhat bulged. That stage, however, belonged to the early days of the Shire, and hobbit-building had long since been altered, improved by devices, learned from Dwarves, or discovered by themselves. A preference for round windows, and even round doors, was the chief remaining peculiarity of hobbit-architecture.
The houses and the holes of Shire-hobbits were often large, and inhabited by large families. (Bilbo and Frodo Baggins were as bachelors very exceptional, as they were also in many other ways, such as their friendship with the Elves.) Sometimes, as in the case of the Tooks of Great Smials, or the Brandybucks of Brandy Hall, many generations of relatives lived in (comparative) peace together in one ancestral and many-tunnelled mansion. All Hobbits were, in any case, clannish and reckoned up their relationships with great care. They drew long and elaborate family-trees with innumerable branches. In dealing with Hobbits it is important to remember who is related to whom, and in what degree. It would be impossible in this book to set out a family-tree that included even the more important members of the more important families at the time which these tales tell of. The genealogical trees at the end of the Red Book of Westmarch are a small book in themselves, and all but Hobbits would find them exceedingly dull. Hobbits delighted in such things, if they were accurate: they liked to have books filled with things that they already knew, set out fair and square with no contradictions.
There is another astonishing thing about Hobbits of old that must be mentioned, an astonishing habit: they imbibed or inhaled, through pipes of clay or wood, the smoke of the burning leaves of a herb, which they calledpipe-weed orleaf, a variety probably ofNicotiana. A great deal of mystery surrounds the origin of this peculiar custom, or 'art' as the Hobbits preferred to call it. All that could be discovered about it in antiquity was put together by Meriadoc Brandybuck (later Master of Buckland), and since he and the tobacco of the Southfarthing play a part in the history that follows, his remarks in the introduction to hisHerblore of the Shire may be quoted.
"This," he says, 'is the one art that we can certainly claim to be our own invention. When Hobbits first began to smoke is not known, all the legends and family histories take it for granted; for ages folk in the Shire smoked various herbs, some fouler, some sweeter. But all accounts agree that Tobold Hornblower of Longbottom in the Southfarthing first grew the true pipe-weed in his gardens in the days of Isengrim the Second, about the year 1070 of Shire-reckoning. The best home-grown still comes from that district, especially the varieties now known as Longbottom Leaf, Old Toby, and Southern Star.
"How Old Toby came by the plant is not recorded, for to his dying day he would not tell. He knew much about herbs, but he was no traveller. It is said that in his youth he went often to Bree, though he certainly never went further from the Shire than that. It is thus quite possible that he learned of this plant in Bree, where now, at any rate, it grows well on the south slopes of the hill. The Bree-hobbits claim to have been the first actual smokers of the pipe-weed. They claim, of course, to have done everything before the people of the Shire, whom they refer to as "colonists"; but in this case their claim is, I think, likely to be true. And certainly it was from Bree that the art of smoking the genuine weed spread in the recent centuries among Dwarves and such other folk, Rangers, Wizards, or wanderers, as still passed to and fro through that ancient road-meeting. The home and centre of the an is thus to be found in the old inn of Bree,The Prancing Pony, that has been kept by the family of Butterbur from time beyond record.
"All the same, observations that I have made on my own many journeys south have convinced me that the weed itself is not native to our parts of the world, but came northward from the lower Anduin, whither it was, I suspect, originally brought over Sea by the Men of Westernesse. It grows abundantly in Gondor, and there is richer and larger than in the North, where it is never found wild, and flourishes only in warm sheltered places like Longbottom. The Men of Gondor call itsweet galenas, and esteem it only for the fragrance of its flowers. From that land it must have been carried up the Greenway during the long centuries between the coming of Elendil and our own day. But even the Dunedain of Gondor allow us this credit: Hobbits first put it into pipes. Not even the Wizards first thought of that before we did. Though one Wizard that I knew took up the art long ago, and became as skilful in it as in all other things that he put his mind to."
The Shire was divided into four quarters, the Farthings already referred to. North, South, East, and West; and these again each into a number of folklands, which still bore the names of some of the old leading families, although by the time of this history these names were no longer found only in their proper folklands. Nearly all Tooks still lived in the Tookland, but that was not true of many other families, such as the Bagginses or the Boffins. Outside the Farthings were the East and West Marches: the Buckland (see beginning of Chapter V, Book I); and the Westmarch added to the Shire in S.R. 1462.
The Shire at this time had hardly any 'government'. Families for the most part managed their own affairs. Growing food and eating it occupied most of their time. In other matters they were, as a rule, generous and not greedy, but contented and moderate, so that estates, farms, workshops, and small trades tended to remain unchanged for generations.
There remained, of course, the ancient tradition concerning the high king at Fornost, or Norbury as they called it, away north of the Shire. But there had been no king for nearly a thousand years, and even the ruins of Kings' Norbury were covered with grass. Yet the Hobbits still said of wild folk and wicked things (such as trolls) that they had not heard of the king. For they attributed to the king of old all their essential laws; and usually they kept the laws of free will, because they were The Rules (as they said), both ancient and just.
It is true that the Took family had long been pre-eminent; for the office of Thain had passed to them (from the Oldbucks) some centuries before, and the chief Took had borne that title ever since. The Thain was the master of the Shire-moot, and captain of the Shire-muster and the Hobbitry-in-arms, but as muster and moot were only held in times of emergency, which no longer occurred, the Thainship had ceased to be more than a nominal dignity. The Took family was still, indeed, accorded a special respect, for it remained both numerous and exceedingly wealthy, and was liable to produce in every generation strong characters of peculiar habits and even adventurous temperament. The latter qualities, however, were now rather tolerated (in the rich) than generally approved. The custom endured, nonetheless, of referring to the head of the family as The Took, and of adding to his name, if required, a number: such as Isengrim the Second, for instance.
The only real official in the Shire at this date was the Mayor of Michel Delving (or of the Shire), who was elected every seven years at the Free Fair on the White Downs at the Lithe, that is at Midsummer. As mayor almost his only duty was to preside at banquets, given on the Shire-holidays, which occurred at frequent intervals. But the offices of Postmaster and First Shirriff were attached to the mayoralty, so that he managed both the Messenger Service and the Watch. These were the only Shire-services, and the Messengers were the most numerous, and much the busier of the two. By no means all Hobbits were lettered, but those who were wrote constantly to all their friends (and a selection of their relations) who lived further off than an afternoon's walk.
The Shirriffs was the name that the Hobbits gave to their police, or the nearest equivalent that they possessed. They had, of course, no uniforms (such things being quite unknown), only a feather in their caps; and they were in practice rather haywards than policemen, more concerned with the strayings of beasts than of people. There were in all the Shire only twelve of them, three in each Farthing, for Inside Work. A rather larger body, varying at need, was employed to 'beat the bounds', and to see that Outsiders of any kind, great or small, did not make themselves a nuisance.
At the time when this story begins the Bounders, as they were called, had been greatly increased. There were many reports and complaints of strange persons and creatures prowling about the borders, or over them: the first sign that all was not quite as it should be, and always had been except in tales and legends of long ago. Few heeded the sign, and not even Bilbo yet had any notion of what it portended. Sixty years had passed since he set out on his memorable journey, and he was old even for Hobbits, who reached a hundred as often as not; but much evidently still remained of the considerable wealth that he had brought back. How much or how little he revealed to no one, not even to Frodo his favourite 'nephew'. And he still kept secret the ring that he bad found.
As is told in The Hobbit, there came one day to Bilbo's door the great Wizard, Gandalf the Grey, and thirteen dwarves with him: none other, indeed, than Thorin Oakenshield, descendant of kings, and his twelve companions in exile. With them he set out, to his own lasting astonishment, on a morning of April, it being then the year 1341 Shire-reckoning, on a quest of great treasure, the dwarf-hoards of the Kings under the Mountain, beneath Erebor in Dale, far off in the East. The quest was successful, and the Dragon that guarded the hoard was destroyed. Yet, though before all was won the Battle of Five Armies was fought, and Thorin was slain, and many deeds of renown were done, the matter would scarcely have concerned later history, or earned more than a note in the long annals of the Third Age, but for an 'accident' by the way. The party was assailed by Orcs in a high pass of the Misty Mountains as they went towards Wilderland; and so it happened that Bilbo was lost for a while in the black orc-mines deep under the mountains, and there, as he groped in vain in the dark, he put his hand on a ring, lying on the floor of a tunnel. He put it in his pocket. It seemed then like mere luck.
Trying to find his way out. Bilbo went on down to the roots of the mountains, until he could go no further. At the bottom of the tunnel lay a cold lake far from the light, and on an island of rock in the water lived Gollum. He was a loathsome little creature: he paddled a small boat with his large flat feet, peering with pale luminous eyes and catching blind fish with his long fingers, and eating them raw. He ate any living thing, even orc, if he could catch it and strangle it without a struggle. He possessed a secret treasure that had come to him long ages ago, when he still lived in the light: a ring of gold that made its wearer invisible. It was the one thing he loved, his 'precious', and he talked to it, even when it was not with him. For he kept it hidden safe in a hole on his island, except when he was hunting or spying on the ores of the mines.
Maybe he would have attacked Bilbo at once, if the ring had been on him when they met; but it was not, and the hobbit held in his hand an Elvish knife, which served him as a sword. So to gain time Gollum challenged Bilbo to the Riddle-game, saying that if he asked a riddle which Bilbo could not guess, then he would kill him and eat him; but if Bilbo defeated him, then he would do as Bilbo wished: he would lead him to a way out of the tunnels.
Since he was lost in the dark without hope, and could neither go on nor back. Bilbo accepted the challenge; and they asked one another many riddles. In the end Bilbo won the game, more by luck (as it seemed) than by wits; for he was stumped at last for a riddle to ask, and cried out, as his hand came upon the ring he lad picked up and forgotten:What haw I got in my pocket? This Gollum failed to answer, though he demanded three guesses.
The Authorities, it is true, differ whether this last question was a mere 'question' and not a 'riddle' according to the strict rules of the Game; but all agree that, after accepting it and trying to guess the answer, Gollum was bound by his promise. And Bilbo pressed him to keep his word; for the thought came to him that this slimy creature might prove false, even though such promises were held sacred, and of old all but the wickedest things feared to break them. But after ages alone in the dark Gollum's heart was black, and treachery was in it. He slipped away, and returned to the island, of which Bilbo knew nothing, not far off in the dark water. There, he thought, lay his ring. He was hungry now, and angry, and once his 'precious' was with him he would not fear any weapon at all.
But the ring was not on the island; he had lost it, it was gone. His screech sent a shiver down Bilbo's back, though he did not yet understand what had happened. But Gollum had at last leaped to a guess, too late.What has it got in its pocketses? he cried. The light in his eyes was like a green flame as he sped back to murder the hobbit and recover his 'precious'. Just in time Bilbo saw his peril, and he fled blindly up the passage away from the water; and once more he was saved by his luck. For just as he ran he put his hand in his pocket, and the ring slipped quietly on to his finger. So it was that Gollum passed him without seeing him, and went to guard the way out, lest the 'thief' should escape. Warily Bilbo followed him, as he went along, cursing, and talking to himself about his 'precious'; from which talk at last even Bilbo guessed the truth, and hope came to him in the darkness: he himself had found the marvellous ring and a chance of escape from the orcs and from Gollum.
At length they came to a halt before an unseen opening that led to the lower gates of the mines, on the eastward side of the mountains. There Gollum crouched at bay, smelling and listening; and Bilbo was tempted to slay him with his sword. But pity stayed him, and though he kept the ring, in which his only hope lay, he would not use it to help him kill the wretched creature at a disadvantage. In the end, gathering his courage, he leaped over Gollum in the dark, and fled away down the passage, pursued by his enemy's cries of hate and despair:Thief, thief! Baggins! We hates it for ever!
Now it is a curious fact that this is not the story as Bilbo first told it to his companions. To them his account was that Gollum had promised to give him apresent, if he won the game; but when Gollum went to fetch it from his island he found the treasure was gone: a magic ring, which had been given to him long ago on his birthday. Bilbo guessed that this was the very ring that he had found, and as he had won the game, it was already his by right. But being in a tight place, he said nothing about it, and made Gollum show him the way out, as a reward instead of a present. This account Bilbo set down in his memoirs, and he seems never to have altered it himself, not even after the Council of Elrond. Evidently it still appeared in the original Red Book, as it did in several of the copies and abstracts. But many copies contain the true account (as an alternative), derived no doubt from notes by Frodo or Samwise, both of whom learned the truth, though they seem to have been unwilling to delete anything actually written by the old hobbit himself.
Gandalf, however, disbelieved Bilbo's first story, as soon as he heard it, and he continued to be very curious about the ring. Eventually he got the true tale out of Bilbo after much questioning, which for a while strained their friendship; but the wizard seemed to think the truth important. Though he did not say so to Bilbo, he also thought it important, and disturbing, to find that the good hobbit had not told the truth from the first: quite contrary to his habit. The idea of a 'present' was not mere hobbitlike invention, all the same. It was suggested to Bilbo, as he confessed, by Gollum's talk that he overheard; for Gollum did, in fact, call the ring his 'birthday present', many times. That also Gandalf thought strange and suspicious; but he did not discover the truth in this point for many more years, as will be seen in this book.
Of Bilbo's later adventures little more need be said here. With the help of the ring he escaped from the orc-guards at the gate and rejoined his companions. He used the ring many times on his quest, chiefly for the help of his friends; but he kept it secret from them as long as he could. After his return to his home he never spoke of it again to anyone, save Gandalf and Frodo; and no one else in the Shire knew of its existence, or so he believed. Only to Frodo did he show the account of his Journey that he was writing.
His sword, Sting, Bilbo hung over his fireplace, and his coat of marvellous mail, the gift of the Dwarves from the Dragon-hoard, he lent to a museum, to the Michel Delving Mathom-house in fact. But he kept in a drawer at Bag End the old cloak and hood that he had worn on his travels; and the ring, secured by a fine chain, remained in his pocket.
He returned to his home at Bag End on June the 22nd in his fifty-second year (S.R. 1342), and nothing very notable occurred in the Shire until Mr. Baggins began the preparations for the celebration of his hundred-and-eleventh birthday (S.R. 1401). At this point this History begins.
At the end of the Third Age the part played by the Hobbits in the great events that led to the inclusion of the Shire in the Reunited Kingdom awakened among them a more widespread interest in their own history; and many of their traditions, up to that time still mainly oral, were collected and Written down. The greater families were also concerned with events in the Kingdom at large, and many of their members studied its ancient histories and legends. By the end of the first century of the Fourth Age there were already to be found in the Shire several libraries that contained many historical books and records.
The largest of these collections were probably at Undertowers, at Great Smials, and at Brandy Hall. This account of the end of the Third Age is drawn mainly from the Red Book of Westmarch. That most important source for the history of the War of the Ring was so called because it was long preserved at Undertowers, the home of the Fairbairns, Wardens of the Westmarch. It was in origin Bilbo's private diary, which he took with him to Rivendell. Frodo brought it back to the Shire, together with many loose leaves of notes, and during S.R. 1420-1 he nearly filled its pages with his account of the War. But annexed to it and preserved with it, probably m a single red case, were the three large volumes, bound in red leather, that Bilbo gave to him as a parting gift. To these four volumes there was added in Westmarch a fifth containing commentaries, genealogies, and various other matter concerning the hobbit members of the Fellowship.
The original Red Book has not been preserved, but many copies were made, especially of the first volume, for the use of the descendants of the children of Master Samwise. The most important copy, however, has a different history. It was kept at Great Smials, but it was written in Condor, probably at the request of the great-grandson of Peregrin, and completed in S.R. 1592 (F.A. 172). Its southern scribe appended this note: Findegil, King's Writer, finished this work in IV 172. It is an exact copy in all details of the Thain's Book m Minas Tirith. That book was a copy, made at the request of King Elessar, of the Red Book of the Periannath, and was brought to him by the Thain Peregrin when he retired to Gondor in IV 64.
The Thain's Book was thus the first copy made of the Red Book and contained much that was later omitted or lost. In Minas Tirith it received much annotation, and many corrections, especially of names, words, and quotations in the Elvish languages; and there was added to it an abbreviated version of those parts ofThe Tale of Aragorn and Arwen which lie outside the account of the War. The full tale is stated to have been written by Barahir, grandson of the Steward Faramir, some time after the passing of the King. But the chief importance of Findegil's copy is that it alone contains the whole of Bilbo's "Translations from the Elvish'. These three volumes were found to be a work of great skill and learning in which, between 1403 and 1418, he had used all the sources available to him in Rivendell, both living and written. But since they were little used by Frodo, being almost entirely concerned with the Elder Days, no more is said of them here.
Since Meriadoc and Peregrin became the heads of their great families, and at the same time kept up their connexions with Rohan and Gondor, the libraries at Bucklebury and Tuckborough contained much that did not appear in the Red Book. In Brandy Hall there were many works dealing with Eriador and the history of Rohan. Some of these were composed or begun by Meriadoc himself, though in the Shire he was chiefly remembered for hisHerblore of the Shire, and for hisReckoning of Years m which he discussed the relation of the calendars of the Shire and Bree to those of Rivendell, Gondor, and Rohan. He also wrote a short treatise onOld Words and Names in the Shire, having special interest in discovering the kinship with the language of the Rohirrim of such 'shire-words' asmathom and old elements in place names.
At Great Smials the books were of less interest to Shire-folk, though more important for larger history. None of them was written by Peregrin, but he and his successors collected many manuscripts written by scribes of Gondor: mainly copies or summaries of histories or legends relating to Elendil and his heirs. Only here in the Shire were to be found extensive materials for the history of Numenor and the arising of Sauron. It was probably at Great Smials thatThe Tale of Years was put together, with the assistance of material collected by Meriadoc. Though the dates given are often conjectural, especially for the Second Age, they deserve attention. It is probable that Meriadoc obtained assistance and information from Rivendell, which he visited more than once. There, though Elrond had departed, his sons long remained, together with some of the High-elven folk. It is said that Celeborn went to dwell there after the departure of Galadriel; but there is no record of the day when at last he sought the Grey Havens, and with him went the last living memory of the Elder Days in Middle-earth.

View File

@@ -0,0 +1,21 @@
## Лабораторная работа 7. Вариант 5.
### Задание
Выбрать художественный текст(четные варианты –русскоязычный, нечетные –англоязычный)и обучить на нем рекуррентную
нейронную сеть для решения задачи генерации. Подобрать архитектуру и параметры так, чтобы приблизиться к максимально
осмысленному результату.
В завершении подобрать компромиссную архитектуру, справляющуюся достаточно хорошо с обоими видами текстов.
### Ход работы
Для английской модели был взят пролог Властелина колец. Модель хоть им получилась удачнее, чем на русском, но время
обучение составило чуть больше часа.
#### Результат rus
здесь был человек прежде всего всего обманывает самого себя ибо он думает что успешно соврал а люди поняли и из
деликатности промолчали промолчали промолчали промолчали промолчали какие его неудачи могут его постигнуть не тому
помочь много ли людей не нуждаются в помощи помощи было врать врать врать молчания молчания а внести то
#### Результат eng
the harfoots were browner of skin smaller and shorter and they were beardless and bootless their hands and feet were
neat and nimble and they preferred highlands and hillsides the stoors were broader heavier in build their feet and
hands were larger and they preferred flat lands and riversides

View File

@@ -0,0 +1,70 @@
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.utils import to_categorical
with open('ru.txt', "r", encoding='utf-8') as file:
text = file.read()
# Предварительная обработка текста (в зависимости от вашей задачи)
# Создание словаря для отображения слов в индексы и обратно
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1
# Подготовка данных для обучения (в зависимости от вашей задачи)
input_sequences = []
for line in text.split('\n'):
token_list = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
input_sequences.append(n_gram_sequence)
max_sequence_length = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
X, y = input_sequences[:,:-1],input_sequences[:,-1]
y = to_categorical(y, num_classes=total_words)
# Определение архитектуры модели
model = Sequential()
model.add(Embedding(total_words, 50, input_length=max_sequence_length-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))
# Компиляция модели
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Обучение модели
model.fit(X, y, epochs=100, verbose=2)
# Генерация текста с использованием обученной модели
def generate_text(seed_text, next_words, model_, max_sequence_length):
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
predicted_probs = model.predict(token_list, verbose=0)[0]
predicted_index = np.argmax(predicted_probs)
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted_index:
output_word = word
break
seed_text += " " + output_word
return seed_text
# Пример генерации текста (замените seed_text и next_words на свои значения)
seed_text = "здесь был"
next_words = 50
generated_text = generate_text(seed_text, next_words, model, max_sequence_length)
print(generated_text)

View File

@@ -0,0 +1,9 @@
Когда человек сознательно или интуитивно выбирает себе в жизни какую-то цель, жизненную задачу, он невольно дает себе оценку. По тому, ради чего человек живет, можно судить и о его самооценке - низкой или высокой.
Если человек живет, чтобы приносить людям добро, облегчать их страдания, давать людям радость, то он оценивает себя на уровне этой своей человечности. Он ставит себе цель, достойную человека.
Только такая цель позволяет человеку прожить свою жизнь с достоинством и получить настоящую радость. Да, радость! Подумайте: если человек ставит себе задачей увеличивать в жизни добро, приносить людям счастье, какие неудачи могут его постигнуть? Не тому помочь? Но много ли людей не нуждаются в помощи?
Если жить только для себя, своими мелкими заботами о собственном благополучии, то от прожитого не останется и следа. Если же жить для других, то другие сберегут то, чему служил, чему отдавал силы.
Можно по-разному определять цель своего существования, но цель должна быть. Надо иметь и принципы в жизни. Одно правило в жизни должно быть у каждого человека, в его цели жизни, в его принципах жизни, в его поведении: надо прожить жизнь с достоинством, чтобы не стыдно было вспоминать.
Достоинство требует доброты, великодушия, умения не быть эгоистом, быть правдивым, хорошим другом, находить радость в помощи другим.
Ради достоинства жизни надо уметь отказываться от мелких удовольствий и немалых тоже… Уметь извиняться, признавать перед другими ошибку - лучше, чем врать.
Обманывая, человек прежде всего обманывает самого себя, ибо он думает, что успешно соврал, а люди поняли и из деликатности промолчали.
Жизнь - прежде всего творчество, но это не значит, что каждый человек, чтобы жить, должен родиться художником, балериной или ученым. Можно творить просто добрую атмосферу вокруг себя. Человек может принести с собой атмосферу подозрительности, какого-то тягостного молчания, а может внести сразу радость, свет. Вот это и есть творчество.

View File

@@ -0,0 +1,60 @@
# Лабораторная 6
## Вариант 9
## Задание
Использовать нейронную сеть MLPClassifier для данных из таблицы 1 по варианту, самостоятельно сформулировав задачу. Интерпретировать результаты и оценить, насколько хорошо она подходит для решения сформулированной вами задачи
Задача:
Использовать MLPClassifier для прогнозирования заработной платы на основе опыта работы (experience_level), типа занятости (employment_type), местоположения компании (company_location) и размера компании (company_size). Оценить, насколько хорошо нейронная сеть подходит для решения этой задачи.
## Описание Программы
Программа представляет собой пример использования MLPClassifier для прогнозирования заработной платы на основе различных признаков.
### Используемые библиотеки
- `pandas`: Библиотека для обработки и анализа данных, используется для загрузки и предобработки данных.
- `scikit-learn`:
- `train_test_split`: Используется для разделения данных на обучающий и тестовый наборы.
- `StandardScaler`: Применяется для нормализации числовых признаков.
- `OneHotEncoder`: Используется для кодирования категориальных признаков.
- `MLPClassifier`: Классификатор многослойного персептрона (нейронная сеть).
- `accuracy_score`: Используется для оценки точности классификации.
### Шаги программы
1. **Загрузка данных:**
- Загружаются данные из файла `ds_salaries.csv` с использованием библиотеки pandas.
2. **Определение категорий заработной платы:**
- Создаются категории заработной платы на основе бинов с использованием `pd.cut`.
3. **Добавление столбца с категориями:**
- Добавляется столбец с категориями в данные.
4. **Предварительная обработка данных:**
- Категориальные признаки ('experience_level', 'employment_type', 'job_title', 'employee_residence', 'company_location', 'company_size') обрабатываются с использованием OneHotEncoder.
- Числовые признаки ('work_year', 'remote_ratio') нормализуются с помощью StandardScaler.
- Эти шаги объединяются в ColumnTransformer и используются в качестве предварительного обработчика данных.
5. **Выбор признаков:**
- Определены признаки, которые будут использоваться для обучения модели.
6. **Разделение данных:**
- Данные разделены на обучающий и тестовый наборы в соотношении 80/20 с использованием функции `train_test_split`.
7. **Обучение модели:**
- Используется MLPClassifier, объединенный с предварительным обработчиком данных в рамках Pipeline.
8. **Оценка производительности модели:**
- Вычисляется и выводится точность модели с использованием метрики `accuracy_score`.
### Запуск программы
- Склонировать или скачать код `main.py`.
- Запустите файл в среде, поддерживающей выполнение Python. `python main.py`
### Результаты
- Точность модели оценивается метрикой accuracy, которая может быть выведена в консоль или использована для визуализации.
В данном случае accuracy получилось: 0.5901639344262295
Чем ближе результат к единице, тем лучше, но данный результат в 59% можно считать средним.

View File

@@ -0,0 +1,608 @@
,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
0,2020,MI,FT,Data Scientist,70000,EUR,79833,DE,0,DE,L
1,2020,SE,FT,Machine Learning Scientist,260000,USD,260000,JP,0,JP,S
2,2020,SE,FT,Big Data Engineer,85000,GBP,109024,GB,50,GB,M
3,2020,MI,FT,Product Data Analyst,20000,USD,20000,HN,0,HN,S
4,2020,SE,FT,Machine Learning Engineer,150000,USD,150000,US,50,US,L
5,2020,EN,FT,Data Analyst,72000,USD,72000,US,100,US,L
6,2020,SE,FT,Lead Data Scientist,190000,USD,190000,US,100,US,S
7,2020,MI,FT,Data Scientist,11000000,HUF,35735,HU,50,HU,L
8,2020,MI,FT,Business Data Analyst,135000,USD,135000,US,100,US,L
9,2020,SE,FT,Lead Data Engineer,125000,USD,125000,NZ,50,NZ,S
10,2020,EN,FT,Data Scientist,45000,EUR,51321,FR,0,FR,S
11,2020,MI,FT,Data Scientist,3000000,INR,40481,IN,0,IN,L
12,2020,EN,FT,Data Scientist,35000,EUR,39916,FR,0,FR,M
13,2020,MI,FT,Lead Data Analyst,87000,USD,87000,US,100,US,L
14,2020,MI,FT,Data Analyst,85000,USD,85000,US,100,US,L
15,2020,MI,FT,Data Analyst,8000,USD,8000,PK,50,PK,L
16,2020,EN,FT,Data Engineer,4450000,JPY,41689,JP,100,JP,S
17,2020,SE,FT,Big Data Engineer,100000,EUR,114047,PL,100,GB,S
18,2020,EN,FT,Data Science Consultant,423000,INR,5707,IN,50,IN,M
19,2020,MI,FT,Lead Data Engineer,56000,USD,56000,PT,100,US,M
20,2020,MI,FT,Machine Learning Engineer,299000,CNY,43331,CN,0,CN,M
21,2020,MI,FT,Product Data Analyst,450000,INR,6072,IN,100,IN,L
22,2020,SE,FT,Data Engineer,42000,EUR,47899,GR,50,GR,L
23,2020,MI,FT,BI Data Analyst,98000,USD,98000,US,0,US,M
24,2020,MI,FT,Lead Data Scientist,115000,USD,115000,AE,0,AE,L
25,2020,EX,FT,Director of Data Science,325000,USD,325000,US,100,US,L
26,2020,EN,FT,Research Scientist,42000,USD,42000,NL,50,NL,L
27,2020,SE,FT,Data Engineer,720000,MXN,33511,MX,0,MX,S
28,2020,EN,CT,Business Data Analyst,100000,USD,100000,US,100,US,L
29,2020,SE,FT,Machine Learning Manager,157000,CAD,117104,CA,50,CA,L
30,2020,MI,FT,Data Engineering Manager,51999,EUR,59303,DE,100,DE,S
31,2020,EN,FT,Big Data Engineer,70000,USD,70000,US,100,US,L
32,2020,SE,FT,Data Scientist,60000,EUR,68428,GR,100,US,L
33,2020,MI,FT,Research Scientist,450000,USD,450000,US,0,US,M
34,2020,MI,FT,Data Analyst,41000,EUR,46759,FR,50,FR,L
35,2020,MI,FT,Data Engineer,65000,EUR,74130,AT,50,AT,L
36,2020,MI,FT,Data Science Consultant,103000,USD,103000,US,100,US,L
37,2020,EN,FT,Machine Learning Engineer,250000,USD,250000,US,50,US,L
38,2020,EN,FT,Data Analyst,10000,USD,10000,NG,100,NG,S
39,2020,EN,FT,Machine Learning Engineer,138000,USD,138000,US,100,US,S
40,2020,MI,FT,Data Scientist,45760,USD,45760,PH,100,US,S
41,2020,EX,FT,Data Engineering Manager,70000,EUR,79833,ES,50,ES,L
42,2020,MI,FT,Machine Learning Infrastructure Engineer,44000,EUR,50180,PT,0,PT,M
43,2020,MI,FT,Data Engineer,106000,USD,106000,US,100,US,L
44,2020,MI,FT,Data Engineer,88000,GBP,112872,GB,50,GB,L
45,2020,EN,PT,ML Engineer,14000,EUR,15966,DE,100,DE,S
46,2020,MI,FT,Data Scientist,60000,GBP,76958,GB,100,GB,S
47,2020,SE,FT,Data Engineer,188000,USD,188000,US,100,US,L
48,2020,MI,FT,Data Scientist,105000,USD,105000,US,100,US,L
49,2020,MI,FT,Data Engineer,61500,EUR,70139,FR,50,FR,L
50,2020,EN,FT,Data Analyst,450000,INR,6072,IN,0,IN,S
51,2020,EN,FT,Data Analyst,91000,USD,91000,US,100,US,L
52,2020,EN,FT,AI Scientist,300000,DKK,45896,DK,50,DK,S
53,2020,EN,FT,Data Engineer,48000,EUR,54742,PK,100,DE,L
54,2020,SE,FL,Computer Vision Engineer,60000,USD,60000,RU,100,US,S
55,2020,SE,FT,Principal Data Scientist,130000,EUR,148261,DE,100,DE,M
56,2020,MI,FT,Data Scientist,34000,EUR,38776,ES,100,ES,M
57,2020,MI,FT,Data Scientist,118000,USD,118000,US,100,US,M
58,2020,SE,FT,Data Scientist,120000,USD,120000,US,50,US,L
59,2020,MI,FT,Data Scientist,138350,USD,138350,US,100,US,M
60,2020,MI,FT,Data Engineer,110000,USD,110000,US,100,US,L
61,2020,MI,FT,Data Engineer,130800,USD,130800,ES,100,US,M
62,2020,EN,PT,Data Scientist,19000,EUR,21669,IT,50,IT,S
63,2020,SE,FT,Data Scientist,412000,USD,412000,US,100,US,L
64,2020,SE,FT,Machine Learning Engineer,40000,EUR,45618,HR,100,HR,S
65,2020,EN,FT,Data Scientist,55000,EUR,62726,DE,50,DE,S
66,2020,EN,FT,Data Scientist,43200,EUR,49268,DE,0,DE,S
67,2020,SE,FT,Data Science Manager,190200,USD,190200,US,100,US,M
68,2020,EN,FT,Data Scientist,105000,USD,105000,US,100,US,S
69,2020,SE,FT,Data Scientist,80000,EUR,91237,AT,0,AT,S
70,2020,MI,FT,Data Scientist,55000,EUR,62726,FR,50,LU,S
71,2020,MI,FT,Data Scientist,37000,EUR,42197,FR,50,FR,S
72,2021,EN,FT,Research Scientist,60000,GBP,82528,GB,50,GB,L
73,2021,EX,FT,BI Data Analyst,150000,USD,150000,IN,100,US,L
74,2021,EX,FT,Head of Data,235000,USD,235000,US,100,US,L
75,2021,SE,FT,Data Scientist,45000,EUR,53192,FR,50,FR,L
76,2021,MI,FT,BI Data Analyst,100000,USD,100000,US,100,US,M
77,2021,MI,PT,3D Computer Vision Researcher,400000,INR,5409,IN,50,IN,M
78,2021,MI,CT,ML Engineer,270000,USD,270000,US,100,US,L
79,2021,EN,FT,Data Analyst,80000,USD,80000,US,100,US,M
80,2021,SE,FT,Data Analytics Engineer,67000,EUR,79197,DE,100,DE,L
81,2021,MI,FT,Data Engineer,140000,USD,140000,US,100,US,L
82,2021,MI,FT,Applied Data Scientist,68000,CAD,54238,GB,50,CA,L
83,2021,MI,FT,Machine Learning Engineer,40000,EUR,47282,ES,100,ES,S
84,2021,EX,FT,Director of Data Science,130000,EUR,153667,IT,100,PL,L
85,2021,MI,FT,Data Engineer,110000,PLN,28476,PL,100,PL,L
86,2021,EN,FT,Data Analyst,50000,EUR,59102,FR,50,FR,M
87,2021,MI,FT,Data Analytics Engineer,110000,USD,110000,US,100,US,L
88,2021,SE,FT,Lead Data Analyst,170000,USD,170000,US,100,US,L
89,2021,SE,FT,Data Analyst,80000,USD,80000,BG,100,US,S
90,2021,SE,FT,Marketing Data Analyst,75000,EUR,88654,GR,100,DK,L
91,2021,EN,FT,Data Science Consultant,65000,EUR,76833,DE,100,DE,S
92,2021,MI,FT,Lead Data Analyst,1450000,INR,19609,IN,100,IN,L
93,2021,SE,FT,Lead Data Engineer,276000,USD,276000,US,0,US,L
94,2021,EN,FT,Data Scientist,2200000,INR,29751,IN,50,IN,L
95,2021,MI,FT,Cloud Data Engineer,120000,SGD,89294,SG,50,SG,L
96,2021,EN,PT,AI Scientist,12000,USD,12000,BR,100,US,S
97,2021,MI,FT,Financial Data Analyst,450000,USD,450000,US,100,US,L
98,2021,EN,FT,Computer Vision Software Engineer,70000,USD,70000,US,100,US,M
99,2021,MI,FT,Computer Vision Software Engineer,81000,EUR,95746,DE,100,US,S
100,2021,MI,FT,Data Analyst,75000,USD,75000,US,0,US,L
101,2021,SE,FT,Data Engineer,150000,USD,150000,US,100,US,L
102,2021,MI,FT,BI Data Analyst,11000000,HUF,36259,HU,50,US,L
103,2021,MI,FT,Data Analyst,62000,USD,62000,US,0,US,L
104,2021,MI,FT,Data Scientist,73000,USD,73000,US,0,US,L
105,2021,MI,FT,Data Analyst,37456,GBP,51519,GB,50,GB,L
106,2021,MI,FT,Research Scientist,235000,CAD,187442,CA,100,CA,L
107,2021,SE,FT,Data Engineer,115000,USD,115000,US,100,US,S
108,2021,SE,FT,Data Engineer,150000,USD,150000,US,100,US,M
109,2021,EN,FT,Data Engineer,2250000,INR,30428,IN,100,IN,L
110,2021,SE,FT,Machine Learning Engineer,80000,EUR,94564,DE,50,DE,L
111,2021,SE,FT,Director of Data Engineering,82500,GBP,113476,GB,100,GB,M
112,2021,SE,FT,Lead Data Engineer,75000,GBP,103160,GB,100,GB,S
113,2021,EN,PT,AI Scientist,12000,USD,12000,PK,100,US,M
114,2021,MI,FT,Data Engineer,38400,EUR,45391,NL,100,NL,L
115,2021,EN,FT,Machine Learning Scientist,225000,USD,225000,US,100,US,L
116,2021,MI,FT,Data Scientist,50000,USD,50000,NG,100,NG,L
117,2021,MI,FT,Data Science Engineer,34000,EUR,40189,GR,100,GR,M
118,2021,EN,FT,Data Analyst,90000,USD,90000,US,100,US,S
119,2021,MI,FT,Data Engineer,200000,USD,200000,US,100,US,L
120,2021,MI,FT,Big Data Engineer,60000,USD,60000,ES,50,RO,M
121,2021,SE,FT,Principal Data Engineer,200000,USD,200000,US,100,US,M
122,2021,EN,FT,Data Analyst,50000,USD,50000,US,100,US,M
123,2021,EN,FT,Applied Data Scientist,80000,GBP,110037,GB,0,GB,L
124,2021,EN,PT,Data Analyst,8760,EUR,10354,ES,50,ES,M
125,2021,MI,FT,Principal Data Scientist,151000,USD,151000,US,100,US,L
126,2021,SE,FT,Machine Learning Scientist,120000,USD,120000,US,50,US,S
127,2021,MI,FT,Data Scientist,700000,INR,9466,IN,0,IN,S
128,2021,EN,FT,Machine Learning Engineer,20000,USD,20000,IN,100,IN,S
129,2021,SE,FT,Lead Data Scientist,3000000,INR,40570,IN,50,IN,L
130,2021,EN,FT,Machine Learning Developer,100000,USD,100000,IQ,50,IQ,S
131,2021,EN,FT,Data Scientist,42000,EUR,49646,FR,50,FR,M
132,2021,MI,FT,Applied Machine Learning Scientist,38400,USD,38400,VN,100,US,M
133,2021,SE,FT,Computer Vision Engineer,24000,USD,24000,BR,100,BR,M
134,2021,EN,FT,Data Scientist,100000,USD,100000,US,0,US,S
135,2021,MI,FT,Data Analyst,90000,USD,90000,US,100,US,M
136,2021,MI,FT,ML Engineer,7000000,JPY,63711,JP,50,JP,S
137,2021,MI,FT,ML Engineer,8500000,JPY,77364,JP,50,JP,S
138,2021,SE,FT,Principal Data Scientist,220000,USD,220000,US,0,US,L
139,2021,EN,FT,Data Scientist,80000,USD,80000,US,100,US,M
140,2021,MI,FT,Data Analyst,135000,USD,135000,US,100,US,L
141,2021,SE,FT,Data Science Manager,240000,USD,240000,US,0,US,L
142,2021,SE,FT,Data Engineering Manager,150000,USD,150000,US,0,US,L
143,2021,MI,FT,Data Scientist,82500,USD,82500,US,100,US,S
144,2021,MI,FT,Data Engineer,100000,USD,100000,US,100,US,L
145,2021,SE,FT,Machine Learning Engineer,70000,EUR,82744,BE,50,BE,M
146,2021,MI,FT,Research Scientist,53000,EUR,62649,FR,50,FR,M
147,2021,MI,FT,Data Engineer,90000,USD,90000,US,100,US,L
148,2021,SE,FT,Data Engineering Manager,153000,USD,153000,US,100,US,L
149,2021,SE,FT,Cloud Data Engineer,160000,USD,160000,BR,100,US,S
150,2021,SE,FT,Director of Data Science,168000,USD,168000,JP,0,JP,S
151,2021,MI,FT,Data Scientist,150000,USD,150000,US,100,US,M
152,2021,MI,FT,Data Scientist,95000,CAD,75774,CA,100,CA,L
153,2021,EN,FT,Data Scientist,13400,USD,13400,UA,100,UA,L
154,2021,SE,FT,Data Science Manager,144000,USD,144000,US,100,US,L
155,2021,SE,FT,Data Science Engineer,159500,CAD,127221,CA,50,CA,L
156,2021,MI,FT,Data Scientist,160000,SGD,119059,SG,100,IL,M
157,2021,MI,FT,Applied Machine Learning Scientist,423000,USD,423000,US,50,US,L
158,2021,SE,FT,Data Analytics Manager,120000,USD,120000,US,100,US,M
159,2021,EN,FT,Machine Learning Engineer,125000,USD,125000,US,100,US,S
160,2021,EX,FT,Head of Data,230000,USD,230000,RU,50,RU,L
161,2021,EX,FT,Head of Data Science,85000,USD,85000,RU,0,RU,M
162,2021,MI,FT,Data Engineer,24000,EUR,28369,MT,50,MT,L
163,2021,EN,FT,Data Science Consultant,54000,EUR,63831,DE,50,DE,L
164,2021,EX,FT,Director of Data Science,110000,EUR,130026,DE,50,DE,M
165,2021,SE,FT,Data Specialist,165000,USD,165000,US,100,US,L
166,2021,EN,FT,Data Engineer,80000,USD,80000,US,100,US,L
167,2021,EX,FT,Director of Data Science,250000,USD,250000,US,0,US,L
168,2021,EN,FT,BI Data Analyst,55000,USD,55000,US,50,US,S
169,2021,MI,FT,Data Architect,150000,USD,150000,US,100,US,L
170,2021,MI,FT,Data Architect,170000,USD,170000,US,100,US,L
171,2021,MI,FT,Data Engineer,60000,GBP,82528,GB,100,GB,L
172,2021,EN,FT,Data Analyst,60000,USD,60000,US,100,US,S
173,2021,SE,FT,Principal Data Scientist,235000,USD,235000,US,100,US,L
174,2021,SE,FT,Research Scientist,51400,EUR,60757,PT,50,PT,L
175,2021,SE,FT,Data Engineering Manager,174000,USD,174000,US,100,US,L
176,2021,MI,FT,Data Scientist,58000,MXN,2859,MX,0,MX,S
177,2021,MI,FT,Data Scientist,30400000,CLP,40038,CL,100,CL,L
178,2021,EN,FT,Machine Learning Engineer,81000,USD,81000,US,50,US,S
179,2021,MI,FT,Data Scientist,420000,INR,5679,IN,100,US,S
180,2021,MI,FT,Big Data Engineer,1672000,INR,22611,IN,0,IN,L
181,2021,MI,FT,Data Scientist,76760,EUR,90734,DE,50,DE,L
182,2021,MI,FT,Data Engineer,22000,EUR,26005,RO,0,US,L
183,2021,SE,FT,Finance Data Analyst,45000,GBP,61896,GB,50,GB,L
184,2021,MI,FL,Machine Learning Scientist,12000,USD,12000,PK,50,PK,M
185,2021,MI,FT,Data Engineer,4000,USD,4000,IR,100,IR,M
186,2021,SE,FT,Data Analytics Engineer,50000,USD,50000,VN,100,GB,M
187,2021,EX,FT,Data Science Consultant,59000,EUR,69741,FR,100,ES,S
188,2021,SE,FT,Data Engineer,65000,EUR,76833,RO,50,GB,S
189,2021,MI,FT,Machine Learning Engineer,74000,USD,74000,JP,50,JP,S
190,2021,SE,FT,Data Science Manager,152000,USD,152000,US,100,FR,L
191,2021,EN,FT,Machine Learning Engineer,21844,USD,21844,CO,50,CO,M
192,2021,MI,FT,Big Data Engineer,18000,USD,18000,MD,0,MD,S
193,2021,SE,FT,Data Science Manager,174000,USD,174000,US,100,US,L
194,2021,SE,FT,Research Scientist,120500,CAD,96113,CA,50,CA,L
195,2021,MI,FT,Data Scientist,147000,USD,147000,US,50,US,L
196,2021,EN,FT,BI Data Analyst,9272,USD,9272,KE,100,KE,S
197,2021,SE,FT,Machine Learning Engineer,1799997,INR,24342,IN,100,IN,L
198,2021,SE,FT,Data Science Manager,4000000,INR,54094,IN,50,US,L
199,2021,EN,FT,Data Science Consultant,90000,USD,90000,US,100,US,S
200,2021,MI,FT,Data Scientist,52000,EUR,61467,DE,50,AT,M
201,2021,SE,FT,Machine Learning Infrastructure Engineer,195000,USD,195000,US,100,US,M
202,2021,MI,FT,Data Scientist,32000,EUR,37825,ES,100,ES,L
203,2021,SE,FT,Research Scientist,50000,USD,50000,FR,100,US,S
204,2021,MI,FT,Data Scientist,160000,USD,160000,US,100,US,L
205,2021,MI,FT,Data Scientist,69600,BRL,12901,BR,0,BR,S
206,2021,SE,FT,Machine Learning Engineer,200000,USD,200000,US,100,US,L
207,2021,SE,FT,Data Engineer,165000,USD,165000,US,0,US,M
208,2021,MI,FL,Data Engineer,20000,USD,20000,IT,0,US,L
209,2021,SE,FT,Data Analytics Manager,120000,USD,120000,US,0,US,L
210,2021,MI,FT,Machine Learning Engineer,21000,EUR,24823,SI,50,SI,L
211,2021,MI,FT,Research Scientist,48000,EUR,56738,FR,50,FR,S
212,2021,MI,FT,Data Engineer,48000,GBP,66022,HK,50,GB,S
213,2021,EN,FT,Big Data Engineer,435000,INR,5882,IN,0,CH,L
214,2021,EN,FT,Machine Learning Engineer,21000,EUR,24823,DE,50,DE,M
215,2021,SE,FT,Principal Data Engineer,185000,USD,185000,US,100,US,L
216,2021,EN,PT,Computer Vision Engineer,180000,DKK,28609,DK,50,DK,S
217,2021,MI,FT,Data Scientist,76760,EUR,90734,DE,50,DE,L
218,2021,MI,FT,Machine Learning Engineer,75000,EUR,88654,BE,100,BE,M
219,2021,SE,FT,Data Analytics Manager,140000,USD,140000,US,100,US,L
220,2021,MI,FT,Machine Learning Engineer,180000,PLN,46597,PL,100,PL,L
221,2021,MI,FT,Data Scientist,85000,GBP,116914,GB,50,GB,L
222,2021,MI,FT,Data Scientist,2500000,INR,33808,IN,0,IN,M
223,2021,MI,FT,Data Scientist,40900,GBP,56256,GB,50,GB,L
224,2021,SE,FT,Machine Learning Scientist,225000,USD,225000,US,100,CA,L
225,2021,EX,CT,Principal Data Scientist,416000,USD,416000,US,100,US,S
226,2021,SE,FT,Data Scientist,110000,CAD,87738,CA,100,CA,S
227,2021,MI,FT,Data Scientist,75000,EUR,88654,DE,50,DE,L
228,2021,SE,FT,Data Scientist,135000,USD,135000,US,0,US,L
229,2021,SE,FT,Data Analyst,90000,CAD,71786,CA,100,CA,M
230,2021,EN,FT,Big Data Engineer,1200000,INR,16228,IN,100,IN,L
231,2021,SE,FT,ML Engineer,256000,USD,256000,US,100,US,S
232,2021,SE,FT,Director of Data Engineering,200000,USD,200000,US,100,US,L
233,2021,SE,FT,Data Analyst,200000,USD,200000,US,100,US,L
234,2021,MI,FT,Data Architect,180000,USD,180000,US,100,US,L
235,2021,MI,FT,Head of Data Science,110000,USD,110000,US,0,US,S
236,2021,MI,FT,Research Scientist,80000,CAD,63810,CA,100,CA,M
237,2021,MI,FT,Data Scientist,39600,EUR,46809,ES,100,ES,M
238,2021,EN,FT,Data Scientist,4000,USD,4000,VN,0,VN,M
239,2021,EN,FT,Data Engineer,1600000,INR,21637,IN,50,IN,M
240,2021,SE,FT,Data Scientist,130000,CAD,103691,CA,100,CA,L
241,2021,MI,FT,Data Analyst,80000,USD,80000,US,100,US,L
242,2021,MI,FT,Data Engineer,110000,USD,110000,US,100,US,L
243,2021,SE,FT,Data Scientist,165000,USD,165000,US,100,US,L
244,2021,EN,FT,AI Scientist,1335000,INR,18053,IN,100,AS,S
245,2021,MI,FT,Data Engineer,52500,GBP,72212,GB,50,GB,L
246,2021,EN,FT,Data Scientist,31000,EUR,36643,FR,50,FR,L
247,2021,MI,FT,Data Engineer,108000,TRY,12103,TR,0,TR,M
248,2021,SE,FT,Data Engineer,70000,GBP,96282,GB,50,GB,L
249,2021,SE,FT,Principal Data Analyst,170000,USD,170000,US,100,US,M
250,2021,MI,FT,Data Scientist,115000,USD,115000,US,50,US,L
251,2021,EN,FT,Data Scientist,90000,USD,90000,US,100,US,S
252,2021,EX,FT,Principal Data Engineer,600000,USD,600000,US,100,US,L
253,2021,EN,FT,Data Scientist,2100000,INR,28399,IN,100,IN,M
254,2021,MI,FT,Data Analyst,93000,USD,93000,US,100,US,L
255,2021,SE,FT,Big Data Architect,125000,CAD,99703,CA,50,CA,M
256,2021,MI,FT,Data Engineer,200000,USD,200000,US,100,US,L
257,2021,SE,FT,Principal Data Scientist,147000,EUR,173762,DE,100,DE,M
258,2021,SE,FT,Machine Learning Engineer,185000,USD,185000,US,50,US,L
259,2021,EX,FT,Director of Data Science,120000,EUR,141846,DE,0,DE,L
260,2021,MI,FT,Data Scientist,130000,USD,130000,US,50,US,L
261,2021,SE,FT,Data Analyst,54000,EUR,63831,DE,50,DE,L
262,2021,MI,FT,Data Scientist,1250000,INR,16904,IN,100,IN,S
263,2021,SE,FT,Machine Learning Engineer,4900000,INR,66265,IN,0,IN,L
264,2021,MI,FT,Data Scientist,21600,EUR,25532,RS,100,DE,S
265,2021,SE,FT,Lead Data Engineer,160000,USD,160000,PR,50,US,S
266,2021,MI,FT,Data Engineer,93150,USD,93150,US,0,US,M
267,2021,MI,FT,Data Engineer,111775,USD,111775,US,0,US,M
268,2021,MI,FT,Data Engineer,250000,TRY,28016,TR,100,TR,M
269,2021,EN,FT,Data Engineer,55000,EUR,65013,DE,50,DE,M
270,2021,EN,FT,Data Engineer,72500,USD,72500,US,100,US,L
271,2021,SE,FT,Computer Vision Engineer,102000,BRL,18907,BR,0,BR,M
272,2021,EN,FT,Data Science Consultant,65000,EUR,76833,DE,0,DE,L
273,2021,EN,FT,Machine Learning Engineer,85000,USD,85000,NL,100,DE,S
274,2021,SE,FT,Data Scientist,65720,EUR,77684,FR,50,FR,M
275,2021,EN,FT,Data Scientist,100000,USD,100000,US,100,US,M
276,2021,EN,FT,Data Scientist,58000,USD,58000,US,50,US,L
277,2021,SE,FT,AI Scientist,55000,USD,55000,ES,100,ES,L
278,2021,SE,FT,Data Scientist,180000,TRY,20171,TR,50,TR,L
279,2021,EN,FT,Business Data Analyst,50000,EUR,59102,LU,100,LU,L
280,2021,MI,FT,Data Engineer,112000,USD,112000,US,100,US,L
281,2021,EN,FT,Research Scientist,100000,USD,100000,JE,0,CN,L
282,2021,MI,PT,Data Engineer,59000,EUR,69741,NL,100,NL,L
283,2021,SE,CT,Staff Data Scientist,105000,USD,105000,US,100,US,M
284,2021,MI,FT,Research Scientist,69999,USD,69999,CZ,50,CZ,L
285,2021,SE,FT,Data Science Manager,7000000,INR,94665,IN,50,IN,L
286,2021,SE,FT,Head of Data,87000,EUR,102839,SI,100,SI,L
287,2021,MI,FT,Data Scientist,109000,USD,109000,US,50,US,L
288,2021,MI,FT,Machine Learning Engineer,43200,EUR,51064,IT,50,IT,L
289,2022,SE,FT,Data Engineer,135000,USD,135000,US,100,US,M
290,2022,SE,FT,Data Analyst,155000,USD,155000,US,100,US,M
291,2022,SE,FT,Data Analyst,120600,USD,120600,US,100,US,M
292,2022,MI,FT,Data Scientist,130000,USD,130000,US,0,US,M
293,2022,MI,FT,Data Scientist,90000,USD,90000,US,0,US,M
294,2022,MI,FT,Data Engineer,170000,USD,170000,US,100,US,M
295,2022,MI,FT,Data Engineer,150000,USD,150000,US,100,US,M
296,2022,SE,FT,Data Analyst,102100,USD,102100,US,100,US,M
297,2022,SE,FT,Data Analyst,84900,USD,84900,US,100,US,M
298,2022,SE,FT,Data Scientist,136620,USD,136620,US,100,US,M
299,2022,SE,FT,Data Scientist,99360,USD,99360,US,100,US,M
300,2022,SE,FT,Data Scientist,90000,GBP,117789,GB,0,GB,M
301,2022,SE,FT,Data Scientist,80000,GBP,104702,GB,0,GB,M
302,2022,SE,FT,Data Scientist,146000,USD,146000,US,100,US,M
303,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
304,2022,EN,FT,Data Engineer,40000,GBP,52351,GB,100,GB,M
305,2022,SE,FT,Data Analyst,99000,USD,99000,US,0,US,M
306,2022,SE,FT,Data Analyst,116000,USD,116000,US,0,US,M
307,2022,MI,FT,Data Analyst,106260,USD,106260,US,0,US,M
308,2022,MI,FT,Data Analyst,126500,USD,126500,US,0,US,M
309,2022,EX,FT,Data Engineer,242000,USD,242000,US,100,US,M
310,2022,EX,FT,Data Engineer,200000,USD,200000,US,100,US,M
311,2022,MI,FT,Data Scientist,50000,GBP,65438,GB,0,GB,M
312,2022,MI,FT,Data Scientist,30000,GBP,39263,GB,0,GB,M
313,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,0,GB,M
314,2022,MI,FT,Data Engineer,40000,GBP,52351,GB,0,GB,M
315,2022,SE,FT,Data Scientist,165220,USD,165220,US,100,US,M
316,2022,EN,FT,Data Engineer,35000,GBP,45807,GB,100,GB,M
317,2022,SE,FT,Data Scientist,120160,USD,120160,US,100,US,M
318,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
319,2022,SE,FT,Data Engineer,181940,USD,181940,US,0,US,M
320,2022,SE,FT,Data Engineer,132320,USD,132320,US,0,US,M
321,2022,SE,FT,Data Engineer,220110,USD,220110,US,0,US,M
322,2022,SE,FT,Data Engineer,160080,USD,160080,US,0,US,M
323,2022,SE,FT,Data Scientist,180000,USD,180000,US,0,US,L
324,2022,SE,FT,Data Scientist,120000,USD,120000,US,0,US,L
325,2022,SE,FT,Data Analyst,124190,USD,124190,US,100,US,M
326,2022,EX,FT,Data Analyst,130000,USD,130000,US,100,US,M
327,2022,EX,FT,Data Analyst,110000,USD,110000,US,100,US,M
328,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
329,2022,MI,FT,Data Analyst,115500,USD,115500,US,100,US,M
330,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
331,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
332,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
333,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
334,2022,SE,FT,Data Engineer,165400,USD,165400,US,100,US,M
335,2022,SE,FT,Data Engineer,132320,USD,132320,US,100,US,M
336,2022,MI,FT,Data Analyst,167000,USD,167000,US,100,US,M
337,2022,SE,FT,Data Engineer,243900,USD,243900,US,100,US,M
338,2022,SE,FT,Data Analyst,136600,USD,136600,US,100,US,M
339,2022,SE,FT,Data Analyst,109280,USD,109280,US,100,US,M
340,2022,SE,FT,Data Engineer,128875,USD,128875,US,100,US,M
341,2022,SE,FT,Data Engineer,93700,USD,93700,US,100,US,M
342,2022,EX,FT,Head of Data Science,224000,USD,224000,US,100,US,M
343,2022,EX,FT,Head of Data Science,167875,USD,167875,US,100,US,M
344,2022,EX,FT,Analytics Engineer,175000,USD,175000,US,100,US,M
345,2022,SE,FT,Data Engineer,156600,USD,156600,US,100,US,M
346,2022,SE,FT,Data Engineer,108800,USD,108800,US,0,US,M
347,2022,SE,FT,Data Scientist,95550,USD,95550,US,0,US,M
348,2022,SE,FT,Data Engineer,113000,USD,113000,US,0,US,L
349,2022,SE,FT,Data Analyst,135000,USD,135000,US,100,US,M
350,2022,SE,FT,Data Science Manager,161342,USD,161342,US,100,US,M
351,2022,SE,FT,Data Science Manager,137141,USD,137141,US,100,US,M
352,2022,SE,FT,Data Scientist,167000,USD,167000,US,100,US,M
353,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
354,2022,SE,FT,Data Engineer,60000,GBP,78526,GB,0,GB,M
355,2022,SE,FT,Data Engineer,50000,GBP,65438,GB,0,GB,M
356,2022,SE,FT,Data Scientist,150000,USD,150000,US,0,US,M
357,2022,SE,FT,Data Scientist,211500,USD,211500,US,100,US,M
358,2022,SE,FT,Data Architect,192400,USD,192400,CA,100,CA,M
359,2022,SE,FT,Data Architect,90700,USD,90700,CA,100,CA,M
360,2022,SE,FT,Data Analyst,130000,USD,130000,CA,100,CA,M
361,2022,SE,FT,Data Analyst,61300,USD,61300,CA,100,CA,M
362,2022,SE,FT,Data Analyst,130000,USD,130000,CA,100,CA,M
363,2022,SE,FT,Data Analyst,61300,USD,61300,CA,100,CA,M
364,2022,SE,FT,Data Engineer,160000,USD,160000,US,0,US,L
365,2022,SE,FT,Data Scientist,138600,USD,138600,US,100,US,M
366,2022,SE,FT,Data Engineer,136000,USD,136000,US,0,US,M
367,2022,MI,FT,Data Analyst,58000,USD,58000,US,0,US,S
368,2022,EX,FT,Analytics Engineer,135000,USD,135000,US,100,US,M
369,2022,SE,FT,Data Scientist,170000,USD,170000,US,100,US,M
370,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
371,2022,SE,FT,Machine Learning Engineer,189650,USD,189650,US,0,US,M
372,2022,SE,FT,Machine Learning Engineer,164996,USD,164996,US,0,US,M
373,2022,MI,FT,ETL Developer,50000,EUR,54957,GR,0,GR,M
374,2022,MI,FT,ETL Developer,50000,EUR,54957,GR,0,GR,M
375,2022,EX,FT,Lead Data Engineer,150000,CAD,118187,CA,100,CA,S
376,2022,SE,FT,Data Analyst,132000,USD,132000,US,0,US,M
377,2022,SE,FT,Data Engineer,165400,USD,165400,US,100,US,M
378,2022,SE,FT,Data Architect,208775,USD,208775,US,100,US,M
379,2022,SE,FT,Data Architect,147800,USD,147800,US,100,US,M
380,2022,SE,FT,Data Engineer,136994,USD,136994,US,100,US,M
381,2022,SE,FT,Data Engineer,101570,USD,101570,US,100,US,M
382,2022,SE,FT,Data Analyst,128875,USD,128875,US,100,US,M
383,2022,SE,FT,Data Analyst,93700,USD,93700,US,100,US,M
384,2022,EX,FT,Head of Machine Learning,6000000,INR,79039,IN,50,IN,L
385,2022,SE,FT,Data Engineer,132320,USD,132320,US,100,US,M
386,2022,EN,FT,Machine Learning Engineer,28500,GBP,37300,GB,100,GB,L
387,2022,SE,FT,Data Analyst,164000,USD,164000,US,0,US,M
388,2022,SE,FT,Data Engineer,155000,USD,155000,US,100,US,M
389,2022,MI,FT,Machine Learning Engineer,95000,GBP,124333,GB,0,GB,M
390,2022,MI,FT,Machine Learning Engineer,75000,GBP,98158,GB,0,GB,M
391,2022,MI,FT,AI Scientist,120000,USD,120000,US,0,US,M
392,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
393,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
394,2022,SE,FT,Data Analytics Manager,145000,USD,145000,US,100,US,M
395,2022,SE,FT,Data Analytics Manager,105400,USD,105400,US,100,US,M
396,2022,MI,FT,Machine Learning Engineer,80000,EUR,87932,FR,100,DE,M
397,2022,MI,FT,Data Engineer,90000,GBP,117789,GB,0,GB,M
398,2022,SE,FT,Data Scientist,215300,USD,215300,US,100,US,L
399,2022,SE,FT,Data Scientist,158200,USD,158200,US,100,US,L
400,2022,SE,FT,Data Engineer,209100,USD,209100,US,100,US,L
401,2022,SE,FT,Data Engineer,154600,USD,154600,US,100,US,L
402,2022,SE,FT,Data Analyst,115934,USD,115934,US,0,US,M
403,2022,SE,FT,Data Analyst,81666,USD,81666,US,0,US,M
404,2022,SE,FT,Data Engineer,175000,USD,175000,US,100,US,M
405,2022,MI,FT,Data Engineer,75000,GBP,98158,GB,0,GB,M
406,2022,MI,FT,Data Analyst,58000,USD,58000,US,0,US,S
407,2022,SE,FT,Data Engineer,183600,USD,183600,US,100,US,L
408,2022,MI,FT,Data Analyst,40000,GBP,52351,GB,100,GB,M
409,2022,SE,FT,Data Scientist,180000,USD,180000,US,100,US,M
410,2022,MI,FT,Data Scientist,55000,GBP,71982,GB,0,GB,M
411,2022,MI,FT,Data Scientist,35000,GBP,45807,GB,0,GB,M
412,2022,MI,FT,Data Engineer,60000,EUR,65949,GR,100,GR,M
413,2022,MI,FT,Data Engineer,45000,EUR,49461,GR,100,GR,M
414,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,100,GB,M
415,2022,MI,FT,Data Engineer,45000,GBP,58894,GB,100,GB,M
416,2022,SE,FT,Data Scientist,260000,USD,260000,US,100,US,M
417,2022,SE,FT,Data Science Engineer,60000,USD,60000,AR,100,MX,L
418,2022,MI,FT,Data Engineer,63900,USD,63900,US,0,US,M
419,2022,MI,FT,Machine Learning Scientist,160000,USD,160000,US,100,US,L
420,2022,MI,FT,Machine Learning Scientist,112300,USD,112300,US,100,US,L
421,2022,MI,FT,Data Science Manager,241000,USD,241000,US,100,US,M
422,2022,MI,FT,Data Science Manager,159000,USD,159000,US,100,US,M
423,2022,SE,FT,Data Scientist,180000,USD,180000,US,0,US,M
424,2022,SE,FT,Data Scientist,80000,USD,80000,US,0,US,M
425,2022,MI,FT,Data Engineer,82900,USD,82900,US,0,US,M
426,2022,SE,FT,Data Engineer,100800,USD,100800,US,100,US,L
427,2022,MI,FT,Data Engineer,45000,EUR,49461,ES,100,ES,M
428,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,L
429,2022,MI,FT,Data Analyst,30000,GBP,39263,GB,100,GB,M
430,2022,MI,FT,Data Analyst,40000,EUR,43966,ES,100,ES,M
431,2022,MI,FT,Data Analyst,30000,EUR,32974,ES,100,ES,M
432,2022,MI,FT,Data Engineer,80000,EUR,87932,ES,100,ES,M
433,2022,MI,FT,Data Engineer,70000,EUR,76940,ES,100,ES,M
434,2022,MI,FT,Data Engineer,80000,GBP,104702,GB,100,GB,M
435,2022,MI,FT,Data Engineer,70000,GBP,91614,GB,100,GB,M
436,2022,MI,FT,Data Engineer,60000,EUR,65949,ES,100,ES,M
437,2022,MI,FT,Data Engineer,80000,EUR,87932,GR,100,GR,M
438,2022,SE,FT,Machine Learning Engineer,189650,USD,189650,US,0,US,M
439,2022,SE,FT,Machine Learning Engineer,164996,USD,164996,US,0,US,M
440,2022,MI,FT,Data Analyst,40000,EUR,43966,GR,100,GR,M
441,2022,MI,FT,Data Analyst,30000,EUR,32974,GR,100,GR,M
442,2022,MI,FT,Data Engineer,75000,GBP,98158,GB,100,GB,M
443,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,100,GB,M
444,2022,SE,FT,Data Scientist,215300,USD,215300,US,0,US,L
445,2022,MI,FT,Data Engineer,70000,EUR,76940,GR,100,GR,M
446,2022,SE,FT,Data Engineer,209100,USD,209100,US,100,US,L
447,2022,SE,FT,Data Engineer,154600,USD,154600,US,100,US,L
448,2022,SE,FT,Data Engineer,180000,USD,180000,US,100,US,M
449,2022,EN,FT,ML Engineer,20000,EUR,21983,PT,100,PT,L
450,2022,SE,FT,Data Engineer,80000,USD,80000,US,100,US,M
451,2022,MI,FT,Machine Learning Developer,100000,CAD,78791,CA,100,CA,M
452,2022,EX,FT,Director of Data Science,250000,CAD,196979,CA,50,CA,L
453,2022,MI,FT,Machine Learning Engineer,120000,USD,120000,US,100,US,S
454,2022,EN,FT,Computer Vision Engineer,125000,USD,125000,US,0,US,M
455,2022,MI,FT,NLP Engineer,240000,CNY,37236,US,50,US,L
456,2022,SE,FT,Data Engineer,105000,USD,105000,US,100,US,M
457,2022,SE,FT,Lead Machine Learning Engineer,80000,EUR,87932,DE,0,DE,M
458,2022,MI,FT,Business Data Analyst,1400000,INR,18442,IN,100,IN,M
459,2022,MI,FT,Data Scientist,2400000,INR,31615,IN,100,IN,L
460,2022,MI,FT,Machine Learning Infrastructure Engineer,53000,EUR,58255,PT,50,PT,L
461,2022,EN,FT,Financial Data Analyst,100000,USD,100000,US,50,US,L
462,2022,MI,PT,Data Engineer,50000,EUR,54957,DE,50,DE,L
463,2022,EN,FT,Data Scientist,1400000,INR,18442,IN,100,IN,M
464,2022,SE,FT,Principal Data Scientist,148000,EUR,162674,DE,100,DE,M
465,2022,EN,FT,Data Engineer,120000,USD,120000,US,100,US,M
466,2022,SE,FT,Research Scientist,144000,USD,144000,US,50,US,L
467,2022,SE,FT,Data Scientist,104890,USD,104890,US,100,US,M
468,2022,SE,FT,Data Engineer,100000,USD,100000,US,100,US,M
469,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
470,2022,MI,FT,Data Analyst,135000,USD,135000,US,100,US,M
471,2022,MI,FT,Data Analyst,50000,USD,50000,US,100,US,M
472,2022,SE,FT,Data Scientist,220000,USD,220000,US,100,US,M
473,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
474,2022,MI,FT,Data Scientist,140000,GBP,183228,GB,0,GB,M
475,2022,MI,FT,Data Scientist,70000,GBP,91614,GB,0,GB,M
476,2022,SE,FT,Data Scientist,185100,USD,185100,US,100,US,M
477,2022,SE,FT,Machine Learning Engineer,220000,USD,220000,US,100,US,M
478,2022,MI,FT,Data Scientist,200000,USD,200000,US,100,US,M
479,2022,MI,FT,Data Scientist,120000,USD,120000,US,100,US,M
480,2022,SE,FT,Machine Learning Engineer,120000,USD,120000,AE,100,AE,S
481,2022,SE,FT,Machine Learning Engineer,65000,USD,65000,AE,100,AE,S
482,2022,EX,FT,Data Engineer,324000,USD,324000,US,100,US,M
483,2022,EX,FT,Data Engineer,216000,USD,216000,US,100,US,M
484,2022,SE,FT,Data Engineer,210000,USD,210000,US,100,US,M
485,2022,SE,FT,Machine Learning Engineer,120000,USD,120000,US,100,US,M
486,2022,SE,FT,Data Scientist,230000,USD,230000,US,100,US,M
487,2022,EN,PT,Data Scientist,100000,USD,100000,DZ,50,DZ,M
488,2022,MI,FL,Data Scientist,100000,USD,100000,CA,100,US,M
489,2022,EN,CT,Applied Machine Learning Scientist,29000,EUR,31875,TN,100,CZ,M
490,2022,SE,FT,Head of Data,200000,USD,200000,MY,100,US,M
491,2022,MI,FT,Principal Data Analyst,75000,USD,75000,CA,100,CA,S
492,2022,MI,FT,Data Scientist,150000,PLN,35590,PL,100,PL,L
493,2022,SE,FT,Machine Learning Developer,100000,CAD,78791,CA,100,CA,M
494,2022,SE,FT,Data Scientist,100000,USD,100000,BR,100,US,M
495,2022,MI,FT,Machine Learning Scientist,153000,USD,153000,US,50,US,M
496,2022,EN,FT,Data Engineer,52800,EUR,58035,PK,100,DE,M
497,2022,SE,FT,Data Scientist,165000,USD,165000,US,100,US,M
498,2022,SE,FT,Research Scientist,85000,EUR,93427,FR,50,FR,L
499,2022,EN,FT,Data Scientist,66500,CAD,52396,CA,100,CA,L
500,2022,SE,FT,Machine Learning Engineer,57000,EUR,62651,NL,100,NL,L
501,2022,MI,FT,Head of Data,30000,EUR,32974,EE,100,EE,S
502,2022,EN,FT,Data Scientist,40000,USD,40000,JP,100,MY,L
503,2022,MI,FT,Machine Learning Engineer,121000,AUD,87425,AU,100,AU,L
504,2022,SE,FT,Data Engineer,115000,USD,115000,US,100,US,M
505,2022,EN,FT,Data Scientist,120000,AUD,86703,AU,50,AU,M
506,2022,MI,FT,Applied Machine Learning Scientist,75000,USD,75000,BO,100,US,L
507,2022,MI,FT,Research Scientist,59000,EUR,64849,AT,0,AT,L
508,2022,EN,FT,Research Scientist,120000,USD,120000,US,100,US,L
509,2022,MI,FT,Applied Data Scientist,157000,USD,157000,US,100,US,L
510,2022,EN,FT,Computer Vision Software Engineer,150000,USD,150000,AU,100,AU,S
511,2022,MI,FT,Business Data Analyst,90000,CAD,70912,CA,50,CA,L
512,2022,EN,FT,Data Engineer,65000,USD,65000,US,100,US,S
513,2022,SE,FT,Machine Learning Engineer,65000,EUR,71444,IE,100,IE,S
514,2022,EN,FT,Data Analytics Engineer,20000,USD,20000,PK,0,PK,M
515,2022,MI,FT,Data Scientist,48000,USD,48000,RU,100,US,S
516,2022,SE,FT,Data Science Manager,152500,USD,152500,US,100,US,M
517,2022,MI,FT,Data Engineer,62000,EUR,68147,FR,100,FR,M
518,2022,MI,FT,Data Scientist,115000,CHF,122346,CH,0,CH,L
519,2022,SE,FT,Applied Data Scientist,380000,USD,380000,US,100,US,L
520,2022,MI,FT,Data Scientist,88000,CAD,69336,CA,100,CA,M
521,2022,EN,FT,Computer Vision Engineer,10000,USD,10000,PT,100,LU,M
522,2022,MI,FT,Data Analyst,20000,USD,20000,GR,100,GR,S
523,2022,SE,FT,Data Analytics Lead,405000,USD,405000,US,100,US,L
524,2022,MI,FT,Data Scientist,135000,USD,135000,US,100,US,L
525,2022,SE,FT,Applied Data Scientist,177000,USD,177000,US,100,US,L
526,2022,MI,FT,Data Scientist,78000,USD,78000,US,100,US,M
527,2022,SE,FT,Data Analyst,135000,USD,135000,US,100,US,M
528,2022,SE,FT,Data Analyst,100000,USD,100000,US,100,US,M
529,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
530,2022,MI,FT,Data Analyst,85000,USD,85000,CA,0,CA,M
531,2022,MI,FT,Data Analyst,75000,USD,75000,CA,0,CA,M
532,2022,SE,FT,Machine Learning Engineer,214000,USD,214000,US,100,US,M
533,2022,SE,FT,Machine Learning Engineer,192600,USD,192600,US,100,US,M
534,2022,SE,FT,Data Architect,266400,USD,266400,US,100,US,M
535,2022,SE,FT,Data Architect,213120,USD,213120,US,100,US,M
536,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
537,2022,SE,FT,Data Engineer,155000,USD,155000,US,100,US,M
538,2022,MI,FT,Data Scientist,141300,USD,141300,US,0,US,M
539,2022,MI,FT,Data Scientist,102100,USD,102100,US,0,US,M
540,2022,SE,FT,Data Analyst,115934,USD,115934,US,100,US,M
541,2022,SE,FT,Data Analyst,81666,USD,81666,US,100,US,M
542,2022,MI,FT,Data Engineer,206699,USD,206699,US,0,US,M
543,2022,MI,FT,Data Engineer,99100,USD,99100,US,0,US,M
544,2022,SE,FT,Data Engineer,130000,USD,130000,US,100,US,M
545,2022,SE,FT,Data Engineer,115000,USD,115000,US,100,US,M
546,2022,SE,FT,Data Engineer,110500,USD,110500,US,100,US,M
547,2022,SE,FT,Data Engineer,130000,USD,130000,US,100,US,M
548,2022,SE,FT,Data Analyst,99050,USD,99050,US,100,US,M
549,2022,SE,FT,Data Engineer,160000,USD,160000,US,100,US,M
550,2022,SE,FT,Data Scientist,205300,USD,205300,US,0,US,L
551,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,L
552,2022,SE,FT,Data Scientist,176000,USD,176000,US,100,US,M
553,2022,SE,FT,Data Scientist,144000,USD,144000,US,100,US,M
554,2022,SE,FT,Data Engineer,200100,USD,200100,US,100,US,M
555,2022,SE,FT,Data Engineer,160000,USD,160000,US,100,US,M
556,2022,SE,FT,Data Engineer,145000,USD,145000,US,100,US,M
557,2022,SE,FT,Data Engineer,70500,USD,70500,US,0,US,M
558,2022,SE,FT,Data Scientist,205300,USD,205300,US,0,US,M
559,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,M
560,2022,SE,FT,Analytics Engineer,205300,USD,205300,US,0,US,M
561,2022,SE,FT,Analytics Engineer,184700,USD,184700,US,0,US,M
562,2022,SE,FT,Data Engineer,175100,USD,175100,US,100,US,M
563,2022,SE,FT,Data Engineer,140250,USD,140250,US,100,US,M
564,2022,SE,FT,Data Analyst,116150,USD,116150,US,100,US,M
565,2022,SE,FT,Data Engineer,54000,USD,54000,US,0,US,M
566,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
567,2022,MI,FT,Data Analyst,50000,GBP,65438,GB,0,GB,M
568,2022,SE,FT,Data Analyst,80000,USD,80000,US,100,US,M
569,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
570,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
571,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
572,2022,SE,FT,Data Analyst,100000,USD,100000,US,100,US,M
573,2022,SE,FT,Data Analyst,69000,USD,69000,US,100,US,M
574,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
575,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
576,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
577,2022,SE,FT,Data Analyst,150075,USD,150075,US,100,US,M
578,2022,SE,FT,Data Engineer,100000,USD,100000,US,100,US,M
579,2022,SE,FT,Data Engineer,25000,USD,25000,US,100,US,M
580,2022,SE,FT,Data Analyst,126500,USD,126500,US,100,US,M
581,2022,SE,FT,Data Analyst,106260,USD,106260,US,100,US,M
582,2022,SE,FT,Data Engineer,220110,USD,220110,US,100,US,M
583,2022,SE,FT,Data Engineer,160080,USD,160080,US,100,US,M
584,2022,SE,FT,Data Analyst,105000,USD,105000,US,100,US,M
585,2022,SE,FT,Data Analyst,110925,USD,110925,US,100,US,M
586,2022,MI,FT,Data Analyst,35000,GBP,45807,GB,0,GB,M
587,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
588,2022,SE,FT,Data Analyst,99000,USD,99000,US,0,US,M
589,2022,SE,FT,Data Analyst,60000,USD,60000,US,100,US,M
590,2022,SE,FT,Data Architect,192564,USD,192564,US,100,US,M
591,2022,SE,FT,Data Architect,144854,USD,144854,US,100,US,M
592,2022,SE,FT,Data Scientist,230000,USD,230000,US,100,US,M
593,2022,SE,FT,Data Scientist,150000,USD,150000,US,100,US,M
594,2022,SE,FT,Data Analytics Manager,150260,USD,150260,US,100,US,M
595,2022,SE,FT,Data Analytics Manager,109280,USD,109280,US,100,US,M
596,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
597,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
598,2022,MI,FT,Data Scientist,160000,USD,160000,US,100,US,M
599,2022,MI,FT,Data Scientist,130000,USD,130000,US,100,US,M
600,2022,EN,FT,Data Analyst,67000,USD,67000,CA,0,CA,M
601,2022,EN,FT,Data Analyst,52000,USD,52000,CA,0,CA,M
602,2022,SE,FT,Data Engineer,154000,USD,154000,US,100,US,M
603,2022,SE,FT,Data Engineer,126000,USD,126000,US,100,US,M
604,2022,SE,FT,Data Analyst,129000,USD,129000,US,0,US,M
605,2022,SE,FT,Data Analyst,150000,USD,150000,US,100,US,M
606,2022,MI,FT,AI Scientist,200000,USD,200000,IN,100,US,L
1 work_year experience_level employment_type job_title salary salary_currency salary_in_usd employee_residence remote_ratio company_location company_size
2 0 2020 MI FT Data Scientist 70000 EUR 79833 DE 0 DE L
3 1 2020 SE FT Machine Learning Scientist 260000 USD 260000 JP 0 JP S
4 2 2020 SE FT Big Data Engineer 85000 GBP 109024 GB 50 GB M
5 3 2020 MI FT Product Data Analyst 20000 USD 20000 HN 0 HN S
6 4 2020 SE FT Machine Learning Engineer 150000 USD 150000 US 50 US L
7 5 2020 EN FT Data Analyst 72000 USD 72000 US 100 US L
8 6 2020 SE FT Lead Data Scientist 190000 USD 190000 US 100 US S
9 7 2020 MI FT Data Scientist 11000000 HUF 35735 HU 50 HU L
10 8 2020 MI FT Business Data Analyst 135000 USD 135000 US 100 US L
11 9 2020 SE FT Lead Data Engineer 125000 USD 125000 NZ 50 NZ S
12 10 2020 EN FT Data Scientist 45000 EUR 51321 FR 0 FR S
13 11 2020 MI FT Data Scientist 3000000 INR 40481 IN 0 IN L
14 12 2020 EN FT Data Scientist 35000 EUR 39916 FR 0 FR M
15 13 2020 MI FT Lead Data Analyst 87000 USD 87000 US 100 US L
16 14 2020 MI FT Data Analyst 85000 USD 85000 US 100 US L
17 15 2020 MI FT Data Analyst 8000 USD 8000 PK 50 PK L
18 16 2020 EN FT Data Engineer 4450000 JPY 41689 JP 100 JP S
19 17 2020 SE FT Big Data Engineer 100000 EUR 114047 PL 100 GB S
20 18 2020 EN FT Data Science Consultant 423000 INR 5707 IN 50 IN M
21 19 2020 MI FT Lead Data Engineer 56000 USD 56000 PT 100 US M
22 20 2020 MI FT Machine Learning Engineer 299000 CNY 43331 CN 0 CN M
23 21 2020 MI FT Product Data Analyst 450000 INR 6072 IN 100 IN L
24 22 2020 SE FT Data Engineer 42000 EUR 47899 GR 50 GR L
25 23 2020 MI FT BI Data Analyst 98000 USD 98000 US 0 US M
26 24 2020 MI FT Lead Data Scientist 115000 USD 115000 AE 0 AE L
27 25 2020 EX FT Director of Data Science 325000 USD 325000 US 100 US L
28 26 2020 EN FT Research Scientist 42000 USD 42000 NL 50 NL L
29 27 2020 SE FT Data Engineer 720000 MXN 33511 MX 0 MX S
30 28 2020 EN CT Business Data Analyst 100000 USD 100000 US 100 US L
31 29 2020 SE FT Machine Learning Manager 157000 CAD 117104 CA 50 CA L
32 30 2020 MI FT Data Engineering Manager 51999 EUR 59303 DE 100 DE S
33 31 2020 EN FT Big Data Engineer 70000 USD 70000 US 100 US L
34 32 2020 SE FT Data Scientist 60000 EUR 68428 GR 100 US L
35 33 2020 MI FT Research Scientist 450000 USD 450000 US 0 US M
36 34 2020 MI FT Data Analyst 41000 EUR 46759 FR 50 FR L
37 35 2020 MI FT Data Engineer 65000 EUR 74130 AT 50 AT L
38 36 2020 MI FT Data Science Consultant 103000 USD 103000 US 100 US L
39 37 2020 EN FT Machine Learning Engineer 250000 USD 250000 US 50 US L
40 38 2020 EN FT Data Analyst 10000 USD 10000 NG 100 NG S
41 39 2020 EN FT Machine Learning Engineer 138000 USD 138000 US 100 US S
42 40 2020 MI FT Data Scientist 45760 USD 45760 PH 100 US S
43 41 2020 EX FT Data Engineering Manager 70000 EUR 79833 ES 50 ES L
44 42 2020 MI FT Machine Learning Infrastructure Engineer 44000 EUR 50180 PT 0 PT M
45 43 2020 MI FT Data Engineer 106000 USD 106000 US 100 US L
46 44 2020 MI FT Data Engineer 88000 GBP 112872 GB 50 GB L
47 45 2020 EN PT ML Engineer 14000 EUR 15966 DE 100 DE S
48 46 2020 MI FT Data Scientist 60000 GBP 76958 GB 100 GB S
49 47 2020 SE FT Data Engineer 188000 USD 188000 US 100 US L
50 48 2020 MI FT Data Scientist 105000 USD 105000 US 100 US L
51 49 2020 MI FT Data Engineer 61500 EUR 70139 FR 50 FR L
52 50 2020 EN FT Data Analyst 450000 INR 6072 IN 0 IN S
53 51 2020 EN FT Data Analyst 91000 USD 91000 US 100 US L
54 52 2020 EN FT AI Scientist 300000 DKK 45896 DK 50 DK S
55 53 2020 EN FT Data Engineer 48000 EUR 54742 PK 100 DE L
56 54 2020 SE FL Computer Vision Engineer 60000 USD 60000 RU 100 US S
57 55 2020 SE FT Principal Data Scientist 130000 EUR 148261 DE 100 DE M
58 56 2020 MI FT Data Scientist 34000 EUR 38776 ES 100 ES M
59 57 2020 MI FT Data Scientist 118000 USD 118000 US 100 US M
60 58 2020 SE FT Data Scientist 120000 USD 120000 US 50 US L
61 59 2020 MI FT Data Scientist 138350 USD 138350 US 100 US M
62 60 2020 MI FT Data Engineer 110000 USD 110000 US 100 US L
63 61 2020 MI FT Data Engineer 130800 USD 130800 ES 100 US M
64 62 2020 EN PT Data Scientist 19000 EUR 21669 IT 50 IT S
65 63 2020 SE FT Data Scientist 412000 USD 412000 US 100 US L
66 64 2020 SE FT Machine Learning Engineer 40000 EUR 45618 HR 100 HR S
67 65 2020 EN FT Data Scientist 55000 EUR 62726 DE 50 DE S
68 66 2020 EN FT Data Scientist 43200 EUR 49268 DE 0 DE S
69 67 2020 SE FT Data Science Manager 190200 USD 190200 US 100 US M
70 68 2020 EN FT Data Scientist 105000 USD 105000 US 100 US S
71 69 2020 SE FT Data Scientist 80000 EUR 91237 AT 0 AT S
72 70 2020 MI FT Data Scientist 55000 EUR 62726 FR 50 LU S
73 71 2020 MI FT Data Scientist 37000 EUR 42197 FR 50 FR S
74 72 2021 EN FT Research Scientist 60000 GBP 82528 GB 50 GB L
75 73 2021 EX FT BI Data Analyst 150000 USD 150000 IN 100 US L
76 74 2021 EX FT Head of Data 235000 USD 235000 US 100 US L
77 75 2021 SE FT Data Scientist 45000 EUR 53192 FR 50 FR L
78 76 2021 MI FT BI Data Analyst 100000 USD 100000 US 100 US M
79 77 2021 MI PT 3D Computer Vision Researcher 400000 INR 5409 IN 50 IN M
80 78 2021 MI CT ML Engineer 270000 USD 270000 US 100 US L
81 79 2021 EN FT Data Analyst 80000 USD 80000 US 100 US M
82 80 2021 SE FT Data Analytics Engineer 67000 EUR 79197 DE 100 DE L
83 81 2021 MI FT Data Engineer 140000 USD 140000 US 100 US L
84 82 2021 MI FT Applied Data Scientist 68000 CAD 54238 GB 50 CA L
85 83 2021 MI FT Machine Learning Engineer 40000 EUR 47282 ES 100 ES S
86 84 2021 EX FT Director of Data Science 130000 EUR 153667 IT 100 PL L
87 85 2021 MI FT Data Engineer 110000 PLN 28476 PL 100 PL L
88 86 2021 EN FT Data Analyst 50000 EUR 59102 FR 50 FR M
89 87 2021 MI FT Data Analytics Engineer 110000 USD 110000 US 100 US L
90 88 2021 SE FT Lead Data Analyst 170000 USD 170000 US 100 US L
91 89 2021 SE FT Data Analyst 80000 USD 80000 BG 100 US S
92 90 2021 SE FT Marketing Data Analyst 75000 EUR 88654 GR 100 DK L
93 91 2021 EN FT Data Science Consultant 65000 EUR 76833 DE 100 DE S
94 92 2021 MI FT Lead Data Analyst 1450000 INR 19609 IN 100 IN L
95 93 2021 SE FT Lead Data Engineer 276000 USD 276000 US 0 US L
96 94 2021 EN FT Data Scientist 2200000 INR 29751 IN 50 IN L
97 95 2021 MI FT Cloud Data Engineer 120000 SGD 89294 SG 50 SG L
98 96 2021 EN PT AI Scientist 12000 USD 12000 BR 100 US S
99 97 2021 MI FT Financial Data Analyst 450000 USD 450000 US 100 US L
100 98 2021 EN FT Computer Vision Software Engineer 70000 USD 70000 US 100 US M
101 99 2021 MI FT Computer Vision Software Engineer 81000 EUR 95746 DE 100 US S
102 100 2021 MI FT Data Analyst 75000 USD 75000 US 0 US L
103 101 2021 SE FT Data Engineer 150000 USD 150000 US 100 US L
104 102 2021 MI FT BI Data Analyst 11000000 HUF 36259 HU 50 US L
105 103 2021 MI FT Data Analyst 62000 USD 62000 US 0 US L
106 104 2021 MI FT Data Scientist 73000 USD 73000 US 0 US L
107 105 2021 MI FT Data Analyst 37456 GBP 51519 GB 50 GB L
108 106 2021 MI FT Research Scientist 235000 CAD 187442 CA 100 CA L
109 107 2021 SE FT Data Engineer 115000 USD 115000 US 100 US S
110 108 2021 SE FT Data Engineer 150000 USD 150000 US 100 US M
111 109 2021 EN FT Data Engineer 2250000 INR 30428 IN 100 IN L
112 110 2021 SE FT Machine Learning Engineer 80000 EUR 94564 DE 50 DE L
113 111 2021 SE FT Director of Data Engineering 82500 GBP 113476 GB 100 GB M
114 112 2021 SE FT Lead Data Engineer 75000 GBP 103160 GB 100 GB S
115 113 2021 EN PT AI Scientist 12000 USD 12000 PK 100 US M
116 114 2021 MI FT Data Engineer 38400 EUR 45391 NL 100 NL L
117 115 2021 EN FT Machine Learning Scientist 225000 USD 225000 US 100 US L
118 116 2021 MI FT Data Scientist 50000 USD 50000 NG 100 NG L
119 117 2021 MI FT Data Science Engineer 34000 EUR 40189 GR 100 GR M
120 118 2021 EN FT Data Analyst 90000 USD 90000 US 100 US S
121 119 2021 MI FT Data Engineer 200000 USD 200000 US 100 US L
122 120 2021 MI FT Big Data Engineer 60000 USD 60000 ES 50 RO M
123 121 2021 SE FT Principal Data Engineer 200000 USD 200000 US 100 US M
124 122 2021 EN FT Data Analyst 50000 USD 50000 US 100 US M
125 123 2021 EN FT Applied Data Scientist 80000 GBP 110037 GB 0 GB L
126 124 2021 EN PT Data Analyst 8760 EUR 10354 ES 50 ES M
127 125 2021 MI FT Principal Data Scientist 151000 USD 151000 US 100 US L
128 126 2021 SE FT Machine Learning Scientist 120000 USD 120000 US 50 US S
129 127 2021 MI FT Data Scientist 700000 INR 9466 IN 0 IN S
130 128 2021 EN FT Machine Learning Engineer 20000 USD 20000 IN 100 IN S
131 129 2021 SE FT Lead Data Scientist 3000000 INR 40570 IN 50 IN L
132 130 2021 EN FT Machine Learning Developer 100000 USD 100000 IQ 50 IQ S
133 131 2021 EN FT Data Scientist 42000 EUR 49646 FR 50 FR M
134 132 2021 MI FT Applied Machine Learning Scientist 38400 USD 38400 VN 100 US M
135 133 2021 SE FT Computer Vision Engineer 24000 USD 24000 BR 100 BR M
136 134 2021 EN FT Data Scientist 100000 USD 100000 US 0 US S
137 135 2021 MI FT Data Analyst 90000 USD 90000 US 100 US M
138 136 2021 MI FT ML Engineer 7000000 JPY 63711 JP 50 JP S
139 137 2021 MI FT ML Engineer 8500000 JPY 77364 JP 50 JP S
140 138 2021 SE FT Principal Data Scientist 220000 USD 220000 US 0 US L
141 139 2021 EN FT Data Scientist 80000 USD 80000 US 100 US M
142 140 2021 MI FT Data Analyst 135000 USD 135000 US 100 US L
143 141 2021 SE FT Data Science Manager 240000 USD 240000 US 0 US L
144 142 2021 SE FT Data Engineering Manager 150000 USD 150000 US 0 US L
145 143 2021 MI FT Data Scientist 82500 USD 82500 US 100 US S
146 144 2021 MI FT Data Engineer 100000 USD 100000 US 100 US L
147 145 2021 SE FT Machine Learning Engineer 70000 EUR 82744 BE 50 BE M
148 146 2021 MI FT Research Scientist 53000 EUR 62649 FR 50 FR M
149 147 2021 MI FT Data Engineer 90000 USD 90000 US 100 US L
150 148 2021 SE FT Data Engineering Manager 153000 USD 153000 US 100 US L
151 149 2021 SE FT Cloud Data Engineer 160000 USD 160000 BR 100 US S
152 150 2021 SE FT Director of Data Science 168000 USD 168000 JP 0 JP S
153 151 2021 MI FT Data Scientist 150000 USD 150000 US 100 US M
154 152 2021 MI FT Data Scientist 95000 CAD 75774 CA 100 CA L
155 153 2021 EN FT Data Scientist 13400 USD 13400 UA 100 UA L
156 154 2021 SE FT Data Science Manager 144000 USD 144000 US 100 US L
157 155 2021 SE FT Data Science Engineer 159500 CAD 127221 CA 50 CA L
158 156 2021 MI FT Data Scientist 160000 SGD 119059 SG 100 IL M
159 157 2021 MI FT Applied Machine Learning Scientist 423000 USD 423000 US 50 US L
160 158 2021 SE FT Data Analytics Manager 120000 USD 120000 US 100 US M
161 159 2021 EN FT Machine Learning Engineer 125000 USD 125000 US 100 US S
162 160 2021 EX FT Head of Data 230000 USD 230000 RU 50 RU L
163 161 2021 EX FT Head of Data Science 85000 USD 85000 RU 0 RU M
164 162 2021 MI FT Data Engineer 24000 EUR 28369 MT 50 MT L
165 163 2021 EN FT Data Science Consultant 54000 EUR 63831 DE 50 DE L
166 164 2021 EX FT Director of Data Science 110000 EUR 130026 DE 50 DE M
167 165 2021 SE FT Data Specialist 165000 USD 165000 US 100 US L
168 166 2021 EN FT Data Engineer 80000 USD 80000 US 100 US L
169 167 2021 EX FT Director of Data Science 250000 USD 250000 US 0 US L
170 168 2021 EN FT BI Data Analyst 55000 USD 55000 US 50 US S
171 169 2021 MI FT Data Architect 150000 USD 150000 US 100 US L
172 170 2021 MI FT Data Architect 170000 USD 170000 US 100 US L
173 171 2021 MI FT Data Engineer 60000 GBP 82528 GB 100 GB L
174 172 2021 EN FT Data Analyst 60000 USD 60000 US 100 US S
175 173 2021 SE FT Principal Data Scientist 235000 USD 235000 US 100 US L
176 174 2021 SE FT Research Scientist 51400 EUR 60757 PT 50 PT L
177 175 2021 SE FT Data Engineering Manager 174000 USD 174000 US 100 US L
178 176 2021 MI FT Data Scientist 58000 MXN 2859 MX 0 MX S
179 177 2021 MI FT Data Scientist 30400000 CLP 40038 CL 100 CL L
180 178 2021 EN FT Machine Learning Engineer 81000 USD 81000 US 50 US S
181 179 2021 MI FT Data Scientist 420000 INR 5679 IN 100 US S
182 180 2021 MI FT Big Data Engineer 1672000 INR 22611 IN 0 IN L
183 181 2021 MI FT Data Scientist 76760 EUR 90734 DE 50 DE L
184 182 2021 MI FT Data Engineer 22000 EUR 26005 RO 0 US L
185 183 2021 SE FT Finance Data Analyst 45000 GBP 61896 GB 50 GB L
186 184 2021 MI FL Machine Learning Scientist 12000 USD 12000 PK 50 PK M
187 185 2021 MI FT Data Engineer 4000 USD 4000 IR 100 IR M
188 186 2021 SE FT Data Analytics Engineer 50000 USD 50000 VN 100 GB M
189 187 2021 EX FT Data Science Consultant 59000 EUR 69741 FR 100 ES S
190 188 2021 SE FT Data Engineer 65000 EUR 76833 RO 50 GB S
191 189 2021 MI FT Machine Learning Engineer 74000 USD 74000 JP 50 JP S
192 190 2021 SE FT Data Science Manager 152000 USD 152000 US 100 FR L
193 191 2021 EN FT Machine Learning Engineer 21844 USD 21844 CO 50 CO M
194 192 2021 MI FT Big Data Engineer 18000 USD 18000 MD 0 MD S
195 193 2021 SE FT Data Science Manager 174000 USD 174000 US 100 US L
196 194 2021 SE FT Research Scientist 120500 CAD 96113 CA 50 CA L
197 195 2021 MI FT Data Scientist 147000 USD 147000 US 50 US L
198 196 2021 EN FT BI Data Analyst 9272 USD 9272 KE 100 KE S
199 197 2021 SE FT Machine Learning Engineer 1799997 INR 24342 IN 100 IN L
200 198 2021 SE FT Data Science Manager 4000000 INR 54094 IN 50 US L
201 199 2021 EN FT Data Science Consultant 90000 USD 90000 US 100 US S
202 200 2021 MI FT Data Scientist 52000 EUR 61467 DE 50 AT M
203 201 2021 SE FT Machine Learning Infrastructure Engineer 195000 USD 195000 US 100 US M
204 202 2021 MI FT Data Scientist 32000 EUR 37825 ES 100 ES L
205 203 2021 SE FT Research Scientist 50000 USD 50000 FR 100 US S
206 204 2021 MI FT Data Scientist 160000 USD 160000 US 100 US L
207 205 2021 MI FT Data Scientist 69600 BRL 12901 BR 0 BR S
208 206 2021 SE FT Machine Learning Engineer 200000 USD 200000 US 100 US L
209 207 2021 SE FT Data Engineer 165000 USD 165000 US 0 US M
210 208 2021 MI FL Data Engineer 20000 USD 20000 IT 0 US L
211 209 2021 SE FT Data Analytics Manager 120000 USD 120000 US 0 US L
212 210 2021 MI FT Machine Learning Engineer 21000 EUR 24823 SI 50 SI L
213 211 2021 MI FT Research Scientist 48000 EUR 56738 FR 50 FR S
214 212 2021 MI FT Data Engineer 48000 GBP 66022 HK 50 GB S
215 213 2021 EN FT Big Data Engineer 435000 INR 5882 IN 0 CH L
216 214 2021 EN FT Machine Learning Engineer 21000 EUR 24823 DE 50 DE M
217 215 2021 SE FT Principal Data Engineer 185000 USD 185000 US 100 US L
218 216 2021 EN PT Computer Vision Engineer 180000 DKK 28609 DK 50 DK S
219 217 2021 MI FT Data Scientist 76760 EUR 90734 DE 50 DE L
220 218 2021 MI FT Machine Learning Engineer 75000 EUR 88654 BE 100 BE M
221 219 2021 SE FT Data Analytics Manager 140000 USD 140000 US 100 US L
222 220 2021 MI FT Machine Learning Engineer 180000 PLN 46597 PL 100 PL L
223 221 2021 MI FT Data Scientist 85000 GBP 116914 GB 50 GB L
224 222 2021 MI FT Data Scientist 2500000 INR 33808 IN 0 IN M
225 223 2021 MI FT Data Scientist 40900 GBP 56256 GB 50 GB L
226 224 2021 SE FT Machine Learning Scientist 225000 USD 225000 US 100 CA L
227 225 2021 EX CT Principal Data Scientist 416000 USD 416000 US 100 US S
228 226 2021 SE FT Data Scientist 110000 CAD 87738 CA 100 CA S
229 227 2021 MI FT Data Scientist 75000 EUR 88654 DE 50 DE L
230 228 2021 SE FT Data Scientist 135000 USD 135000 US 0 US L
231 229 2021 SE FT Data Analyst 90000 CAD 71786 CA 100 CA M
232 230 2021 EN FT Big Data Engineer 1200000 INR 16228 IN 100 IN L
233 231 2021 SE FT ML Engineer 256000 USD 256000 US 100 US S
234 232 2021 SE FT Director of Data Engineering 200000 USD 200000 US 100 US L
235 233 2021 SE FT Data Analyst 200000 USD 200000 US 100 US L
236 234 2021 MI FT Data Architect 180000 USD 180000 US 100 US L
237 235 2021 MI FT Head of Data Science 110000 USD 110000 US 0 US S
238 236 2021 MI FT Research Scientist 80000 CAD 63810 CA 100 CA M
239 237 2021 MI FT Data Scientist 39600 EUR 46809 ES 100 ES M
240 238 2021 EN FT Data Scientist 4000 USD 4000 VN 0 VN M
241 239 2021 EN FT Data Engineer 1600000 INR 21637 IN 50 IN M
242 240 2021 SE FT Data Scientist 130000 CAD 103691 CA 100 CA L
243 241 2021 MI FT Data Analyst 80000 USD 80000 US 100 US L
244 242 2021 MI FT Data Engineer 110000 USD 110000 US 100 US L
245 243 2021 SE FT Data Scientist 165000 USD 165000 US 100 US L
246 244 2021 EN FT AI Scientist 1335000 INR 18053 IN 100 AS S
247 245 2021 MI FT Data Engineer 52500 GBP 72212 GB 50 GB L
248 246 2021 EN FT Data Scientist 31000 EUR 36643 FR 50 FR L
249 247 2021 MI FT Data Engineer 108000 TRY 12103 TR 0 TR M
250 248 2021 SE FT Data Engineer 70000 GBP 96282 GB 50 GB L
251 249 2021 SE FT Principal Data Analyst 170000 USD 170000 US 100 US M
252 250 2021 MI FT Data Scientist 115000 USD 115000 US 50 US L
253 251 2021 EN FT Data Scientist 90000 USD 90000 US 100 US S
254 252 2021 EX FT Principal Data Engineer 600000 USD 600000 US 100 US L
255 253 2021 EN FT Data Scientist 2100000 INR 28399 IN 100 IN M
256 254 2021 MI FT Data Analyst 93000 USD 93000 US 100 US L
257 255 2021 SE FT Big Data Architect 125000 CAD 99703 CA 50 CA M
258 256 2021 MI FT Data Engineer 200000 USD 200000 US 100 US L
259 257 2021 SE FT Principal Data Scientist 147000 EUR 173762 DE 100 DE M
260 258 2021 SE FT Machine Learning Engineer 185000 USD 185000 US 50 US L
261 259 2021 EX FT Director of Data Science 120000 EUR 141846 DE 0 DE L
262 260 2021 MI FT Data Scientist 130000 USD 130000 US 50 US L
263 261 2021 SE FT Data Analyst 54000 EUR 63831 DE 50 DE L
264 262 2021 MI FT Data Scientist 1250000 INR 16904 IN 100 IN S
265 263 2021 SE FT Machine Learning Engineer 4900000 INR 66265 IN 0 IN L
266 264 2021 MI FT Data Scientist 21600 EUR 25532 RS 100 DE S
267 265 2021 SE FT Lead Data Engineer 160000 USD 160000 PR 50 US S
268 266 2021 MI FT Data Engineer 93150 USD 93150 US 0 US M
269 267 2021 MI FT Data Engineer 111775 USD 111775 US 0 US M
270 268 2021 MI FT Data Engineer 250000 TRY 28016 TR 100 TR M
271 269 2021 EN FT Data Engineer 55000 EUR 65013 DE 50 DE M
272 270 2021 EN FT Data Engineer 72500 USD 72500 US 100 US L
273 271 2021 SE FT Computer Vision Engineer 102000 BRL 18907 BR 0 BR M
274 272 2021 EN FT Data Science Consultant 65000 EUR 76833 DE 0 DE L
275 273 2021 EN FT Machine Learning Engineer 85000 USD 85000 NL 100 DE S
276 274 2021 SE FT Data Scientist 65720 EUR 77684 FR 50 FR M
277 275 2021 EN FT Data Scientist 100000 USD 100000 US 100 US M
278 276 2021 EN FT Data Scientist 58000 USD 58000 US 50 US L
279 277 2021 SE FT AI Scientist 55000 USD 55000 ES 100 ES L
280 278 2021 SE FT Data Scientist 180000 TRY 20171 TR 50 TR L
281 279 2021 EN FT Business Data Analyst 50000 EUR 59102 LU 100 LU L
282 280 2021 MI FT Data Engineer 112000 USD 112000 US 100 US L
283 281 2021 EN FT Research Scientist 100000 USD 100000 JE 0 CN L
284 282 2021 MI PT Data Engineer 59000 EUR 69741 NL 100 NL L
285 283 2021 SE CT Staff Data Scientist 105000 USD 105000 US 100 US M
286 284 2021 MI FT Research Scientist 69999 USD 69999 CZ 50 CZ L
287 285 2021 SE FT Data Science Manager 7000000 INR 94665 IN 50 IN L
288 286 2021 SE FT Head of Data 87000 EUR 102839 SI 100 SI L
289 287 2021 MI FT Data Scientist 109000 USD 109000 US 50 US L
290 288 2021 MI FT Machine Learning Engineer 43200 EUR 51064 IT 50 IT L
291 289 2022 SE FT Data Engineer 135000 USD 135000 US 100 US M
292 290 2022 SE FT Data Analyst 155000 USD 155000 US 100 US M
293 291 2022 SE FT Data Analyst 120600 USD 120600 US 100 US M
294 292 2022 MI FT Data Scientist 130000 USD 130000 US 0 US M
295 293 2022 MI FT Data Scientist 90000 USD 90000 US 0 US M
296 294 2022 MI FT Data Engineer 170000 USD 170000 US 100 US M
297 295 2022 MI FT Data Engineer 150000 USD 150000 US 100 US M
298 296 2022 SE FT Data Analyst 102100 USD 102100 US 100 US M
299 297 2022 SE FT Data Analyst 84900 USD 84900 US 100 US M
300 298 2022 SE FT Data Scientist 136620 USD 136620 US 100 US M
301 299 2022 SE FT Data Scientist 99360 USD 99360 US 100 US M
302 300 2022 SE FT Data Scientist 90000 GBP 117789 GB 0 GB M
303 301 2022 SE FT Data Scientist 80000 GBP 104702 GB 0 GB M
304 302 2022 SE FT Data Scientist 146000 USD 146000 US 100 US M
305 303 2022 SE FT Data Scientist 123000 USD 123000 US 100 US M
306 304 2022 EN FT Data Engineer 40000 GBP 52351 GB 100 GB M
307 305 2022 SE FT Data Analyst 99000 USD 99000 US 0 US M
308 306 2022 SE FT Data Analyst 116000 USD 116000 US 0 US M
309 307 2022 MI FT Data Analyst 106260 USD 106260 US 0 US M
310 308 2022 MI FT Data Analyst 126500 USD 126500 US 0 US M
311 309 2022 EX FT Data Engineer 242000 USD 242000 US 100 US M
312 310 2022 EX FT Data Engineer 200000 USD 200000 US 100 US M
313 311 2022 MI FT Data Scientist 50000 GBP 65438 GB 0 GB M
314 312 2022 MI FT Data Scientist 30000 GBP 39263 GB 0 GB M
315 313 2022 MI FT Data Engineer 60000 GBP 78526 GB 0 GB M
316 314 2022 MI FT Data Engineer 40000 GBP 52351 GB 0 GB M
317 315 2022 SE FT Data Scientist 165220 USD 165220 US 100 US M
318 316 2022 EN FT Data Engineer 35000 GBP 45807 GB 100 GB M
319 317 2022 SE FT Data Scientist 120160 USD 120160 US 100 US M
320 318 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
321 319 2022 SE FT Data Engineer 181940 USD 181940 US 0 US M
322 320 2022 SE FT Data Engineer 132320 USD 132320 US 0 US M
323 321 2022 SE FT Data Engineer 220110 USD 220110 US 0 US M
324 322 2022 SE FT Data Engineer 160080 USD 160080 US 0 US M
325 323 2022 SE FT Data Scientist 180000 USD 180000 US 0 US L
326 324 2022 SE FT Data Scientist 120000 USD 120000 US 0 US L
327 325 2022 SE FT Data Analyst 124190 USD 124190 US 100 US M
328 326 2022 EX FT Data Analyst 130000 USD 130000 US 100 US M
329 327 2022 EX FT Data Analyst 110000 USD 110000 US 100 US M
330 328 2022 SE FT Data Analyst 170000 USD 170000 US 100 US M
331 329 2022 MI FT Data Analyst 115500 USD 115500 US 100 US M
332 330 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
333 331 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
334 332 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
335 333 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
336 334 2022 SE FT Data Engineer 165400 USD 165400 US 100 US M
337 335 2022 SE FT Data Engineer 132320 USD 132320 US 100 US M
338 336 2022 MI FT Data Analyst 167000 USD 167000 US 100 US M
339 337 2022 SE FT Data Engineer 243900 USD 243900 US 100 US M
340 338 2022 SE FT Data Analyst 136600 USD 136600 US 100 US M
341 339 2022 SE FT Data Analyst 109280 USD 109280 US 100 US M
342 340 2022 SE FT Data Engineer 128875 USD 128875 US 100 US M
343 341 2022 SE FT Data Engineer 93700 USD 93700 US 100 US M
344 342 2022 EX FT Head of Data Science 224000 USD 224000 US 100 US M
345 343 2022 EX FT Head of Data Science 167875 USD 167875 US 100 US M
346 344 2022 EX FT Analytics Engineer 175000 USD 175000 US 100 US M
347 345 2022 SE FT Data Engineer 156600 USD 156600 US 100 US M
348 346 2022 SE FT Data Engineer 108800 USD 108800 US 0 US M
349 347 2022 SE FT Data Scientist 95550 USD 95550 US 0 US M
350 348 2022 SE FT Data Engineer 113000 USD 113000 US 0 US L
351 349 2022 SE FT Data Analyst 135000 USD 135000 US 100 US M
352 350 2022 SE FT Data Science Manager 161342 USD 161342 US 100 US M
353 351 2022 SE FT Data Science Manager 137141 USD 137141 US 100 US M
354 352 2022 SE FT Data Scientist 167000 USD 167000 US 100 US M
355 353 2022 SE FT Data Scientist 123000 USD 123000 US 100 US M
356 354 2022 SE FT Data Engineer 60000 GBP 78526 GB 0 GB M
357 355 2022 SE FT Data Engineer 50000 GBP 65438 GB 0 GB M
358 356 2022 SE FT Data Scientist 150000 USD 150000 US 0 US M
359 357 2022 SE FT Data Scientist 211500 USD 211500 US 100 US M
360 358 2022 SE FT Data Architect 192400 USD 192400 CA 100 CA M
361 359 2022 SE FT Data Architect 90700 USD 90700 CA 100 CA M
362 360 2022 SE FT Data Analyst 130000 USD 130000 CA 100 CA M
363 361 2022 SE FT Data Analyst 61300 USD 61300 CA 100 CA M
364 362 2022 SE FT Data Analyst 130000 USD 130000 CA 100 CA M
365 363 2022 SE FT Data Analyst 61300 USD 61300 CA 100 CA M
366 364 2022 SE FT Data Engineer 160000 USD 160000 US 0 US L
367 365 2022 SE FT Data Scientist 138600 USD 138600 US 100 US M
368 366 2022 SE FT Data Engineer 136000 USD 136000 US 0 US M
369 367 2022 MI FT Data Analyst 58000 USD 58000 US 0 US S
370 368 2022 EX FT Analytics Engineer 135000 USD 135000 US 100 US M
371 369 2022 SE FT Data Scientist 170000 USD 170000 US 100 US M
372 370 2022 SE FT Data Scientist 123000 USD 123000 US 100 US M
373 371 2022 SE FT Machine Learning Engineer 189650 USD 189650 US 0 US M
374 372 2022 SE FT Machine Learning Engineer 164996 USD 164996 US 0 US M
375 373 2022 MI FT ETL Developer 50000 EUR 54957 GR 0 GR M
376 374 2022 MI FT ETL Developer 50000 EUR 54957 GR 0 GR M
377 375 2022 EX FT Lead Data Engineer 150000 CAD 118187 CA 100 CA S
378 376 2022 SE FT Data Analyst 132000 USD 132000 US 0 US M
379 377 2022 SE FT Data Engineer 165400 USD 165400 US 100 US M
380 378 2022 SE FT Data Architect 208775 USD 208775 US 100 US M
381 379 2022 SE FT Data Architect 147800 USD 147800 US 100 US M
382 380 2022 SE FT Data Engineer 136994 USD 136994 US 100 US M
383 381 2022 SE FT Data Engineer 101570 USD 101570 US 100 US M
384 382 2022 SE FT Data Analyst 128875 USD 128875 US 100 US M
385 383 2022 SE FT Data Analyst 93700 USD 93700 US 100 US M
386 384 2022 EX FT Head of Machine Learning 6000000 INR 79039 IN 50 IN L
387 385 2022 SE FT Data Engineer 132320 USD 132320 US 100 US M
388 386 2022 EN FT Machine Learning Engineer 28500 GBP 37300 GB 100 GB L
389 387 2022 SE FT Data Analyst 164000 USD 164000 US 0 US M
390 388 2022 SE FT Data Engineer 155000 USD 155000 US 100 US M
391 389 2022 MI FT Machine Learning Engineer 95000 GBP 124333 GB 0 GB M
392 390 2022 MI FT Machine Learning Engineer 75000 GBP 98158 GB 0 GB M
393 391 2022 MI FT AI Scientist 120000 USD 120000 US 0 US M
394 392 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
395 393 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
396 394 2022 SE FT Data Analytics Manager 145000 USD 145000 US 100 US M
397 395 2022 SE FT Data Analytics Manager 105400 USD 105400 US 100 US M
398 396 2022 MI FT Machine Learning Engineer 80000 EUR 87932 FR 100 DE M
399 397 2022 MI FT Data Engineer 90000 GBP 117789 GB 0 GB M
400 398 2022 SE FT Data Scientist 215300 USD 215300 US 100 US L
401 399 2022 SE FT Data Scientist 158200 USD 158200 US 100 US L
402 400 2022 SE FT Data Engineer 209100 USD 209100 US 100 US L
403 401 2022 SE FT Data Engineer 154600 USD 154600 US 100 US L
404 402 2022 SE FT Data Analyst 115934 USD 115934 US 0 US M
405 403 2022 SE FT Data Analyst 81666 USD 81666 US 0 US M
406 404 2022 SE FT Data Engineer 175000 USD 175000 US 100 US M
407 405 2022 MI FT Data Engineer 75000 GBP 98158 GB 0 GB M
408 406 2022 MI FT Data Analyst 58000 USD 58000 US 0 US S
409 407 2022 SE FT Data Engineer 183600 USD 183600 US 100 US L
410 408 2022 MI FT Data Analyst 40000 GBP 52351 GB 100 GB M
411 409 2022 SE FT Data Scientist 180000 USD 180000 US 100 US M
412 410 2022 MI FT Data Scientist 55000 GBP 71982 GB 0 GB M
413 411 2022 MI FT Data Scientist 35000 GBP 45807 GB 0 GB M
414 412 2022 MI FT Data Engineer 60000 EUR 65949 GR 100 GR M
415 413 2022 MI FT Data Engineer 45000 EUR 49461 GR 100 GR M
416 414 2022 MI FT Data Engineer 60000 GBP 78526 GB 100 GB M
417 415 2022 MI FT Data Engineer 45000 GBP 58894 GB 100 GB M
418 416 2022 SE FT Data Scientist 260000 USD 260000 US 100 US M
419 417 2022 SE FT Data Science Engineer 60000 USD 60000 AR 100 MX L
420 418 2022 MI FT Data Engineer 63900 USD 63900 US 0 US M
421 419 2022 MI FT Machine Learning Scientist 160000 USD 160000 US 100 US L
422 420 2022 MI FT Machine Learning Scientist 112300 USD 112300 US 100 US L
423 421 2022 MI FT Data Science Manager 241000 USD 241000 US 100 US M
424 422 2022 MI FT Data Science Manager 159000 USD 159000 US 100 US M
425 423 2022 SE FT Data Scientist 180000 USD 180000 US 0 US M
426 424 2022 SE FT Data Scientist 80000 USD 80000 US 0 US M
427 425 2022 MI FT Data Engineer 82900 USD 82900 US 0 US M
428 426 2022 SE FT Data Engineer 100800 USD 100800 US 100 US L
429 427 2022 MI FT Data Engineer 45000 EUR 49461 ES 100 ES M
430 428 2022 SE FT Data Scientist 140400 USD 140400 US 0 US L
431 429 2022 MI FT Data Analyst 30000 GBP 39263 GB 100 GB M
432 430 2022 MI FT Data Analyst 40000 EUR 43966 ES 100 ES M
433 431 2022 MI FT Data Analyst 30000 EUR 32974 ES 100 ES M
434 432 2022 MI FT Data Engineer 80000 EUR 87932 ES 100 ES M
435 433 2022 MI FT Data Engineer 70000 EUR 76940 ES 100 ES M
436 434 2022 MI FT Data Engineer 80000 GBP 104702 GB 100 GB M
437 435 2022 MI FT Data Engineer 70000 GBP 91614 GB 100 GB M
438 436 2022 MI FT Data Engineer 60000 EUR 65949 ES 100 ES M
439 437 2022 MI FT Data Engineer 80000 EUR 87932 GR 100 GR M
440 438 2022 SE FT Machine Learning Engineer 189650 USD 189650 US 0 US M
441 439 2022 SE FT Machine Learning Engineer 164996 USD 164996 US 0 US M
442 440 2022 MI FT Data Analyst 40000 EUR 43966 GR 100 GR M
443 441 2022 MI FT Data Analyst 30000 EUR 32974 GR 100 GR M
444 442 2022 MI FT Data Engineer 75000 GBP 98158 GB 100 GB M
445 443 2022 MI FT Data Engineer 60000 GBP 78526 GB 100 GB M
446 444 2022 SE FT Data Scientist 215300 USD 215300 US 0 US L
447 445 2022 MI FT Data Engineer 70000 EUR 76940 GR 100 GR M
448 446 2022 SE FT Data Engineer 209100 USD 209100 US 100 US L
449 447 2022 SE FT Data Engineer 154600 USD 154600 US 100 US L
450 448 2022 SE FT Data Engineer 180000 USD 180000 US 100 US M
451 449 2022 EN FT ML Engineer 20000 EUR 21983 PT 100 PT L
452 450 2022 SE FT Data Engineer 80000 USD 80000 US 100 US M
453 451 2022 MI FT Machine Learning Developer 100000 CAD 78791 CA 100 CA M
454 452 2022 EX FT Director of Data Science 250000 CAD 196979 CA 50 CA L
455 453 2022 MI FT Machine Learning Engineer 120000 USD 120000 US 100 US S
456 454 2022 EN FT Computer Vision Engineer 125000 USD 125000 US 0 US M
457 455 2022 MI FT NLP Engineer 240000 CNY 37236 US 50 US L
458 456 2022 SE FT Data Engineer 105000 USD 105000 US 100 US M
459 457 2022 SE FT Lead Machine Learning Engineer 80000 EUR 87932 DE 0 DE M
460 458 2022 MI FT Business Data Analyst 1400000 INR 18442 IN 100 IN M
461 459 2022 MI FT Data Scientist 2400000 INR 31615 IN 100 IN L
462 460 2022 MI FT Machine Learning Infrastructure Engineer 53000 EUR 58255 PT 50 PT L
463 461 2022 EN FT Financial Data Analyst 100000 USD 100000 US 50 US L
464 462 2022 MI PT Data Engineer 50000 EUR 54957 DE 50 DE L
465 463 2022 EN FT Data Scientist 1400000 INR 18442 IN 100 IN M
466 464 2022 SE FT Principal Data Scientist 148000 EUR 162674 DE 100 DE M
467 465 2022 EN FT Data Engineer 120000 USD 120000 US 100 US M
468 466 2022 SE FT Research Scientist 144000 USD 144000 US 50 US L
469 467 2022 SE FT Data Scientist 104890 USD 104890 US 100 US M
470 468 2022 SE FT Data Engineer 100000 USD 100000 US 100 US M
471 469 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
472 470 2022 MI FT Data Analyst 135000 USD 135000 US 100 US M
473 471 2022 MI FT Data Analyst 50000 USD 50000 US 100 US M
474 472 2022 SE FT Data Scientist 220000 USD 220000 US 100 US M
475 473 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
476 474 2022 MI FT Data Scientist 140000 GBP 183228 GB 0 GB M
477 475 2022 MI FT Data Scientist 70000 GBP 91614 GB 0 GB M
478 476 2022 SE FT Data Scientist 185100 USD 185100 US 100 US M
479 477 2022 SE FT Machine Learning Engineer 220000 USD 220000 US 100 US M
480 478 2022 MI FT Data Scientist 200000 USD 200000 US 100 US M
481 479 2022 MI FT Data Scientist 120000 USD 120000 US 100 US M
482 480 2022 SE FT Machine Learning Engineer 120000 USD 120000 AE 100 AE S
483 481 2022 SE FT Machine Learning Engineer 65000 USD 65000 AE 100 AE S
484 482 2022 EX FT Data Engineer 324000 USD 324000 US 100 US M
485 483 2022 EX FT Data Engineer 216000 USD 216000 US 100 US M
486 484 2022 SE FT Data Engineer 210000 USD 210000 US 100 US M
487 485 2022 SE FT Machine Learning Engineer 120000 USD 120000 US 100 US M
488 486 2022 SE FT Data Scientist 230000 USD 230000 US 100 US M
489 487 2022 EN PT Data Scientist 100000 USD 100000 DZ 50 DZ M
490 488 2022 MI FL Data Scientist 100000 USD 100000 CA 100 US M
491 489 2022 EN CT Applied Machine Learning Scientist 29000 EUR 31875 TN 100 CZ M
492 490 2022 SE FT Head of Data 200000 USD 200000 MY 100 US M
493 491 2022 MI FT Principal Data Analyst 75000 USD 75000 CA 100 CA S
494 492 2022 MI FT Data Scientist 150000 PLN 35590 PL 100 PL L
495 493 2022 SE FT Machine Learning Developer 100000 CAD 78791 CA 100 CA M
496 494 2022 SE FT Data Scientist 100000 USD 100000 BR 100 US M
497 495 2022 MI FT Machine Learning Scientist 153000 USD 153000 US 50 US M
498 496 2022 EN FT Data Engineer 52800 EUR 58035 PK 100 DE M
499 497 2022 SE FT Data Scientist 165000 USD 165000 US 100 US M
500 498 2022 SE FT Research Scientist 85000 EUR 93427 FR 50 FR L
501 499 2022 EN FT Data Scientist 66500 CAD 52396 CA 100 CA L
502 500 2022 SE FT Machine Learning Engineer 57000 EUR 62651 NL 100 NL L
503 501 2022 MI FT Head of Data 30000 EUR 32974 EE 100 EE S
504 502 2022 EN FT Data Scientist 40000 USD 40000 JP 100 MY L
505 503 2022 MI FT Machine Learning Engineer 121000 AUD 87425 AU 100 AU L
506 504 2022 SE FT Data Engineer 115000 USD 115000 US 100 US M
507 505 2022 EN FT Data Scientist 120000 AUD 86703 AU 50 AU M
508 506 2022 MI FT Applied Machine Learning Scientist 75000 USD 75000 BO 100 US L
509 507 2022 MI FT Research Scientist 59000 EUR 64849 AT 0 AT L
510 508 2022 EN FT Research Scientist 120000 USD 120000 US 100 US L
511 509 2022 MI FT Applied Data Scientist 157000 USD 157000 US 100 US L
512 510 2022 EN FT Computer Vision Software Engineer 150000 USD 150000 AU 100 AU S
513 511 2022 MI FT Business Data Analyst 90000 CAD 70912 CA 50 CA L
514 512 2022 EN FT Data Engineer 65000 USD 65000 US 100 US S
515 513 2022 SE FT Machine Learning Engineer 65000 EUR 71444 IE 100 IE S
516 514 2022 EN FT Data Analytics Engineer 20000 USD 20000 PK 0 PK M
517 515 2022 MI FT Data Scientist 48000 USD 48000 RU 100 US S
518 516 2022 SE FT Data Science Manager 152500 USD 152500 US 100 US M
519 517 2022 MI FT Data Engineer 62000 EUR 68147 FR 100 FR M
520 518 2022 MI FT Data Scientist 115000 CHF 122346 CH 0 CH L
521 519 2022 SE FT Applied Data Scientist 380000 USD 380000 US 100 US L
522 520 2022 MI FT Data Scientist 88000 CAD 69336 CA 100 CA M
523 521 2022 EN FT Computer Vision Engineer 10000 USD 10000 PT 100 LU M
524 522 2022 MI FT Data Analyst 20000 USD 20000 GR 100 GR S
525 523 2022 SE FT Data Analytics Lead 405000 USD 405000 US 100 US L
526 524 2022 MI FT Data Scientist 135000 USD 135000 US 100 US L
527 525 2022 SE FT Applied Data Scientist 177000 USD 177000 US 100 US L
528 526 2022 MI FT Data Scientist 78000 USD 78000 US 100 US M
529 527 2022 SE FT Data Analyst 135000 USD 135000 US 100 US M
530 528 2022 SE FT Data Analyst 100000 USD 100000 US 100 US M
531 529 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
532 530 2022 MI FT Data Analyst 85000 USD 85000 CA 0 CA M
533 531 2022 MI FT Data Analyst 75000 USD 75000 CA 0 CA M
534 532 2022 SE FT Machine Learning Engineer 214000 USD 214000 US 100 US M
535 533 2022 SE FT Machine Learning Engineer 192600 USD 192600 US 100 US M
536 534 2022 SE FT Data Architect 266400 USD 266400 US 100 US M
537 535 2022 SE FT Data Architect 213120 USD 213120 US 100 US M
538 536 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
539 537 2022 SE FT Data Engineer 155000 USD 155000 US 100 US M
540 538 2022 MI FT Data Scientist 141300 USD 141300 US 0 US M
541 539 2022 MI FT Data Scientist 102100 USD 102100 US 0 US M
542 540 2022 SE FT Data Analyst 115934 USD 115934 US 100 US M
543 541 2022 SE FT Data Analyst 81666 USD 81666 US 100 US M
544 542 2022 MI FT Data Engineer 206699 USD 206699 US 0 US M
545 543 2022 MI FT Data Engineer 99100 USD 99100 US 0 US M
546 544 2022 SE FT Data Engineer 130000 USD 130000 US 100 US M
547 545 2022 SE FT Data Engineer 115000 USD 115000 US 100 US M
548 546 2022 SE FT Data Engineer 110500 USD 110500 US 100 US M
549 547 2022 SE FT Data Engineer 130000 USD 130000 US 100 US M
550 548 2022 SE FT Data Analyst 99050 USD 99050 US 100 US M
551 549 2022 SE FT Data Engineer 160000 USD 160000 US 100 US M
552 550 2022 SE FT Data Scientist 205300 USD 205300 US 0 US L
553 551 2022 SE FT Data Scientist 140400 USD 140400 US 0 US L
554 552 2022 SE FT Data Scientist 176000 USD 176000 US 100 US M
555 553 2022 SE FT Data Scientist 144000 USD 144000 US 100 US M
556 554 2022 SE FT Data Engineer 200100 USD 200100 US 100 US M
557 555 2022 SE FT Data Engineer 160000 USD 160000 US 100 US M
558 556 2022 SE FT Data Engineer 145000 USD 145000 US 100 US M
559 557 2022 SE FT Data Engineer 70500 USD 70500 US 0 US M
560 558 2022 SE FT Data Scientist 205300 USD 205300 US 0 US M
561 559 2022 SE FT Data Scientist 140400 USD 140400 US 0 US M
562 560 2022 SE FT Analytics Engineer 205300 USD 205300 US 0 US M
563 561 2022 SE FT Analytics Engineer 184700 USD 184700 US 0 US M
564 562 2022 SE FT Data Engineer 175100 USD 175100 US 100 US M
565 563 2022 SE FT Data Engineer 140250 USD 140250 US 100 US M
566 564 2022 SE FT Data Analyst 116150 USD 116150 US 100 US M
567 565 2022 SE FT Data Engineer 54000 USD 54000 US 0 US M
568 566 2022 SE FT Data Analyst 170000 USD 170000 US 100 US M
569 567 2022 MI FT Data Analyst 50000 GBP 65438 GB 0 GB M
570 568 2022 SE FT Data Analyst 80000 USD 80000 US 100 US M
571 569 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
572 570 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
573 571 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
574 572 2022 SE FT Data Analyst 100000 USD 100000 US 100 US M
575 573 2022 SE FT Data Analyst 69000 USD 69000 US 100 US M
576 574 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
577 575 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
578 576 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
579 577 2022 SE FT Data Analyst 150075 USD 150075 US 100 US M
580 578 2022 SE FT Data Engineer 100000 USD 100000 US 100 US M
581 579 2022 SE FT Data Engineer 25000 USD 25000 US 100 US M
582 580 2022 SE FT Data Analyst 126500 USD 126500 US 100 US M
583 581 2022 SE FT Data Analyst 106260 USD 106260 US 100 US M
584 582 2022 SE FT Data Engineer 220110 USD 220110 US 100 US M
585 583 2022 SE FT Data Engineer 160080 USD 160080 US 100 US M
586 584 2022 SE FT Data Analyst 105000 USD 105000 US 100 US M
587 585 2022 SE FT Data Analyst 110925 USD 110925 US 100 US M
588 586 2022 MI FT Data Analyst 35000 GBP 45807 GB 0 GB M
589 587 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
590 588 2022 SE FT Data Analyst 99000 USD 99000 US 0 US M
591 589 2022 SE FT Data Analyst 60000 USD 60000 US 100 US M
592 590 2022 SE FT Data Architect 192564 USD 192564 US 100 US M
593 591 2022 SE FT Data Architect 144854 USD 144854 US 100 US M
594 592 2022 SE FT Data Scientist 230000 USD 230000 US 100 US M
595 593 2022 SE FT Data Scientist 150000 USD 150000 US 100 US M
596 594 2022 SE FT Data Analytics Manager 150260 USD 150260 US 100 US M
597 595 2022 SE FT Data Analytics Manager 109280 USD 109280 US 100 US M
598 596 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
599 597 2022 SE FT Data Analyst 170000 USD 170000 US 100 US M
600 598 2022 MI FT Data Scientist 160000 USD 160000 US 100 US M
601 599 2022 MI FT Data Scientist 130000 USD 130000 US 100 US M
602 600 2022 EN FT Data Analyst 67000 USD 67000 CA 0 CA M
603 601 2022 EN FT Data Analyst 52000 USD 52000 CA 0 CA M
604 602 2022 SE FT Data Engineer 154000 USD 154000 US 100 US M
605 603 2022 SE FT Data Engineer 126000 USD 126000 US 100 US M
606 604 2022 SE FT Data Analyst 129000 USD 129000 US 0 US M
607 605 2022 SE FT Data Analyst 150000 USD 150000 US 100 US M
608 606 2022 MI FT AI Scientist 200000 USD 200000 IN 100 US L

View File

@@ -0,0 +1,60 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
# Загрузка данных
file_path = 'ds_salaries.csv'
data = pd.read_csv(file_path)
# Предварительная обработка данных
categorical_features = ['experience_level', 'employment_type', 'company_location', 'company_size']
numeric_features = ['work_year']
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numeric_features),
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])
# Выбор признаков
features = ['work_year', 'experience_level', 'employment_type', 'company_location', 'company_size']
X = data[features]
y = data['salary_in_usd']
# Разделение данных на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Создание и обучение модели с использованием предварительного обработчика данных
alpha = 0.01
lasso_model = Pipeline([
('preprocessor', preprocessor),
('lasso', Lasso(alpha=alpha))
])
lasso_model.fit(X_train, y_train)
# Получение прогнозов
y_pred = lasso_model.predict(X_test)
# Оценка точности модели
accuracy = lasso_model.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)
print(f"R^2 Score: {accuracy:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
# Вывод предсказанных и фактических значений
predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(predictions_df)
# Визуализация весов (коэффициентов) модели
coefficients = pd.Series(lasso_model.named_steps['lasso'].coef_, index=numeric_features + list(lasso_model.named_steps['preprocessor'].transformers_[1][1].get_feature_names(categorical_features)))
plt.figure(figsize=(10, 6))
coefficients.sort_values().plot(kind='barh')
plt.title('Lasso Regression Coefficients')
plt.show()

View File

@@ -0,0 +1,55 @@
# Лабораторная 7
## Вариант 9
## Задание
Выбрать художественный текст (четные варианты русскоязычный, нечетные англоязычный) и обучить на нем рекуррентную нейронную сеть для решения задачи генерации. Подобрать архитектуру и параметры так, чтобы приблизиться к максимально осмысленному результату.Далее разбиться на пары четный-нечетный вариант, обменяться разработанными сетями и проверить, как архитектура товарища справляется с вашим текстом.
## Описание Программы
Программа представляет собой пример использования рекуррентной нейронной сети (LSTM) для генерации текста на основе художественного произведения.
### Используемые библиотеки
- `numpy`: Библиотека для работы с многомерными массивами и математическими функциями.
- `keras`:
- `Sequential`: Модель нейронной сети, представляющая собой линейный стек слоев.
- `Embedding`: Слой для преобразования целых чисел (индексов слов) в плотные вектора фиксированной размерности.
- `LSTM`: Рекуррентный слой долгой краткосрочной памяти.
- `Dense`: Полносвязный слой с активацией softmax для генерации вероятностного распределения слов.
- `Tokenizer`, `pad_sequences`: Инструменты для токенизации и последовательной обработки текста.
### Шаги программы
1. **Загрузка данных:**
- Текст загружается из файла `text.txt` (англоязычный текст) с использованием стандартных средств языка Python.
2. **Подготовка данных для обучения:**
- Текст разбивается на последовательности токенов для обучения рекуррентной нейронной сети.
- Используется `Tokenizer` для создания словаря и преобразования текста в числовое представление.
- Последовательности дополняются до максимальной длины с использованием `pad_sequences`.
3. **Создание и компиляция модели:**
- Создается последовательная модель с вложенным слоем, рекуррентным слоем LSTM и полносвязным слоем.
- Модель компилируется с использованием категориальной кросс-энтропии в качестве функции потерь и оптимизатора Adam.
4. **Обучение модели:**
- Модель обучается на подготовленных данных в течение 100 эпох.
5. **Оценка производительности модели:**
- Выводится окончательная ошибка на обучающих данных.
6. **Генерация текста:**
- Создается начальный текст "Amidst the golden hues of autumn leaves".
- Модель используется для предсказания следующего слова в последовательности.
- Сгенерированный текст выводится на экран.
### Запуск программы
- Замените `'text.txt'` на актуальный путь или имя вашего файла с англоязычным текстом.
- Склонируйте или скачайте код из файла `main.py`.
- Запустите файл в среде, поддерживающей выполнение Python. `python main.py`
### Результаты
Потери на тренировочных данных составили не такое большое значение: 0.029374321853453274327
Результат сгенерированного англоязычного текста:
In the quietude of the woods, mystical creatures stirred, their silhouettes dancing in the dappling sunlight. A mysterious energy enveloped the surroundings, as if the very essence of nature had come alive. The rustling leaves seemed to carry ancient tales, whispered secrets of times long past. Each step through the foliage unveiled a new chapter in the enchanted story of the woodland realm.

View File

@@ -0,0 +1,60 @@
import numpy as np
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# Load the text
with open('text.txt', 'r', encoding='utf-8') as file:
text = file.read()
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1
# Create the sequence of training data
input_sequences = []
for line in text.split('\n'):
token_list = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[:i+1]
input_sequences.append(n_gram_sequence)
# Padding sequences
max_sequence_length = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')
# Create input and output data
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = np.eye(total_words)[y]
# Create the model
model = Sequential()
model.add(Embedding(total_words, 50, input_length=max_sequence_length-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train the model
history = model.fit(X, y, epochs=100, verbose=2)
print(f"Final Loss on Training Data: {history.history['loss'][-1]}")
# Generate text
seed_text = "Amidst the golden hues of autumn leaves"
next_words = 100
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
predicted = model.predict_classes(token_list, verbose=0)
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted:
output_word = word
break
seed_text += " " + output_word
print(seed_text)

View File

@@ -0,0 +1 @@
Amidst the golden hues of autumn leaves, a gentle breeze whispered through the trees. The air was filled with the sweet fragrance of blooming flowers, and the sun cast a warm glow on the peaceful landscape. Birds chirped melodiously, creating a symphony of nature's harmonious melodies. As the day unfolded, the sky painted itself in vibrant shades of orange and pink, showcasing the breathtaking beauty of the changing seasons.

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -0,0 +1,23 @@
## Данные
Я использую следующие данные:
* Ссылка на изображение картины
* Размер картины в см
* Средняя оценка по отзывам
* Количество заказов
* Стоимость
Чтобы сделать анализ конкретнее были добавлены вручную следующие
данные:
* Жанр (Например: пейзаж, животные, портрет и т.д)
* Поджанр (Например: городской пейзаж, коты, собаки и т.д)
## Задание и решение классификации (нейронная сеть)
Необходимо посоветовать/предсказать пользователю поджанр на основе выбранного
жанра и категории стоимости. Нет необходимости разбивать на группы, так как сам
параметр является категориальным. Для выполнения классификации все категориальные
параметры переводим в числа. Точность модель не превышает 0.30, что можно сказать,
что модель не удачная. На это влияет то, что в данные достаточно много классов, что
делает модель сложнее. Результат предсказания представлен на рисунке 5 и 6
## Результат
![Alt text](cAofDwrO6o4.jpg "Optional Title")
![Alt text](1aIk7s_b66s.jpg "Optional Title")

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -0,0 +1,74 @@
import pandas as pd
import streamlit as st
import statsmodels.api as sm
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
data = pd.read_csv('222.csv')
genre_mapping = {genre: code for code, genre in enumerate(data['Жанр'].unique())}
subgenre_mapping = {subgenre: code for code, subgenre in enumerate(data['Поджанр'].unique())}
price_mapping = {price: code for code, price in enumerate(data['Категория стоимости'].unique())}
# Преобразование категориальных значений
data['Жанр'] = data['Жанр'].map(genre_mapping)
data['Поджанр'] = data['Поджанр'].map(subgenre_mapping)
data['Категория стоимости'] = data['Категория стоимости'].map(price_mapping)
columns_to_check = ['Размер', 'Жанр', 'Поджанр', 'Категория стоимости']
data = data.dropna(subset=columns_to_check)
# Разделение данных на признаки (X) и целевую переменную (y)
X = data[['Жанр', 'Категория стоимости']]
y = data['Поджанр']
# Разделение на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Нормализация данных
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Инициализация MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, alpha=0.1, solver='adam', random_state=42)
# Обучение модели
clf.fit(X_train, y_train)
# Предсказание на тестовом наборе
predictions = clf.predict(X_test)
# Оценка точности модели
accuracy = accuracy_score(y_test, predictions)
st.write(f"Точность модели: {accuracy}")
on_pred = st.toggle('')
if on_pred:
selected_genre = st.selectbox('Выберите жанр:', genre_mapping)
selected_price = st.selectbox('Выберите категорию стоимости:', price_mapping)
new_data = pd.DataFrame({'Жанр': [selected_genre], 'Категория стоимости': [selected_price]}, index=[0])
new_data['Жанр'] = new_data['Жанр'].map(genre_mapping)
new_data['Категория стоимости'] = new_data['Категория стоимости'].map(price_mapping)
new_data_normalized = scaler.transform(new_data.values)
new_predictions = clf.predict(new_data_normalized)
# Создание обратного словаря для обратного маппинга числовых кодов поджанров в текстовые метки
reverse_subgenre_mapping = {code: subgenre for subgenre, code in subgenre_mapping.items()}
# Преобразование числовых предсказаний обратно в текстовые метки поджанров
predicted_subgenres = [reverse_subgenre_mapping[code] for code in new_predictions]
# Вывод предсказанных поджанров для новых данных
st.write("Предсказанный поджанр:")
for subgenre in predicted_subgenres:
if isinstance(subgenre, float) and np.isnan(subgenre):
st.write("Не удалось предсказать, мало данных по данному жанру")
else:
st.write(subgenre)

View File

@@ -0,0 +1,43 @@
## Задание
Выбрать художественный текст и обучить на нем рекуррентную нейронную сеть для решения задачи генерации.
## Зависимости
Для работы этого приложения необходимы следующие библиотеки Python:
* NumPy
* TensorFlow
* Streamlit
## Запуск
```bash
streamlit laba7.py
```
## Описание кода
1. Импорт библиотек:
Импортируются необходимые библиотеки, такие как docx для чтения текстов из файлов Word, streamlit для создания веб-приложения, numpy, tensorflow и keras для обучения нейронных сетей.
2. Извлечение текста из файлов Word:
Функция extract_text_from_docx используется для извлечения текста из двух файлов Word на русском (textru) и английском (texten). Это делается с помощью библиотеки docx.
3. Подготовка данных для обучения моделей:
Текст из файлов разбивается на последовательности для обучения рекуррентных нейронных сетей (LSTM). Текст разбивается на последовательности определенной длины (maxlen) и используется для обучения моделей на русском и английском текстах.
4. Создание и обучение моделей:
Два отдельных экземпляра модели (model_russian и model_english) создаются и обучаются на соответствующих данных русского и английского текстов.
5. Генерация текста на основе обученных моделей:
Функция generate_text используется для генерации текста на основе обученных моделей. Этот текст выводится с помощью streamlit в веб-приложении.
## Результат
Сгенерированный русский текст:
Ты к моему несчастью верь как в святыню верит монах как в чудо чудо верит дева как верят в вечернюю печальные странники в пути
Сгенерированный английский текст:
In the to my distress as the monk believes in a shrine as the maiden believes in a miracle as weary travelers believe in the evening star on their journey

View File

@@ -0,0 +1,99 @@
import docx
import streamlit as st
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
def extract_text_from_docx(file_path):
doc = docx.Document(file_path)
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
return '\n'.join(full_text)
file_path1 = '"C:/Users/79084/Desktop/textru.doc"'
file_path2 = '"C:/Users/79084/Desktop/texten.doc"'
# Извлечение текста из файла
textru = extract_text_from_docx(file_path1)
texten = extract_text_from_docx(file_path2)
# Предобработка текста
tokenizer_russian = tf.keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer_russian.fit_on_texts(textru)
tokenized_text_russian = tokenizer_russian.texts_to_sequences([textru])[0]
tokenizer_english = tf.keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer_english.fit_on_texts(texten)
tokenized_text_english = tokenizer_english.texts_to_sequences([texten])[0]
# Создание последовательных последовательностей для обучения
maxlen = 40
step = 3
sentences_russian = []
next_chars_russian = []
sentences_english = []
next_chars_english = []
for i in range(0, len(tokenized_text_russian) - maxlen, step):
sentences_russian.append(tokenized_text_russian[i: i + maxlen])
next_chars_russian.append(tokenized_text_russian[i + maxlen])
for i in range(0, len(tokenized_text_english) - maxlen, step):
sentences_english.append(tokenized_text_english[i: i + maxlen])
next_chars_english.append(tokenized_text_english[i + maxlen])
# Преобразование данных в массивы numpy
x_russian = np.array(sentences_russian)
y_russian = np.array(next_chars_russian)
x_english = np.array(sentences_english)
y_english = np.array(next_chars_english)
# Создание модели для русского текста
model_russian = Sequential()
model_russian.add(Embedding(len(tokenizer_russian.word_index) + 1, 128))
model_russian.add(LSTM(128))
model_russian.add(Dense(len(tokenizer_russian.word_index) + 1, activation='softmax'))
model_russian.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
# Обучение модели на русском тексте
model_russian.fit(x_russian, y_russian, batch_size=128, epochs=50)
# Создание модели для английского текста
model_english = Sequential()
model_english.add(Embedding(len(tokenizer_english.word_index) + 1, 128))
model_english.add(LSTM(128))
model_english.add(Dense(len(tokenizer_english.word_index) + 1, activation='softmax'))
model_english.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
# Обучение модели на английском тексте
model_english.fit(x_english, y_english, batch_size=128, epochs=50)
# Функция для генерации текста на основе обученной модели
def generate_text(model, tokenizer, seed_text, maxlen, temperature=1.0, num_chars=400):
generated_text = seed_text
for _ in range(num_chars):
encoded = tokenizer.texts_to_sequences([seed_text])[0]
encoded = np.array(encoded)
predicted_probs = model.predict(encoded, verbose=0)[0]
# Используем temperature для более разнообразных предсказаний
predicted_probs = np.log(predicted_probs) / temperature
exp_preds = np.exp(predicted_probs)
predicted_probs = exp_preds / np.sum(exp_preds)
predicted = np.random.choice(len(predicted_probs), p=predicted_probs)
next_char = tokenizer.index_word.get(predicted, '')
generated_text += next_char
seed_text += next_char
seed_text = seed_text[1:]
return generated_text
generated_russian_text = generate_text(model_russian, tokenizer_russian, 'Ты к моему', maxlen, temperature=0.5, num_chars=400)
st.write(generated_russian_text)
generated_english_text = generate_text(model_english, tokenizer_english, 'In the', maxlen, temperature=0.5, num_chars=400)
st.write(generated_english_text)

View File

@@ -0,0 +1,5 @@
Believe in me, to my distress,
As the monk believes in a shrine,
As the maiden believes in a miracle,
As weary travelers believe
In the evening star on their journey.

View File

@@ -0,0 +1,5 @@
Ты, к моему несчастью, верь,
Как в святыню, верит монах,
Как в чудо, верит дева,
Как верят в вечернюю звезду
Печальные странники в пути.

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

View File

@@ -0,0 +1,61 @@
# Лабораторная работа №1. Работа с типовыми наборами данных и различными моделями
## 12 вариант
___
### Задание:
Используя код из пункта «Регуляризация и сеть прямого распространения», сгенерируйте определенный тип данных и сравните на нем 3 модели (по варианту). Постройте графики, отобразите качество моделей, объясните полученные результаты.
### Данные по варианту:
- make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1)
### Модели по варианту:
- Линейная регрессия
- Персептрон
- Гребневая полиномиальная регрессия (со степенью 4, alpha = 1.0)
___
### Запуск
- Запустить файл lab1.py
### Используемые технологии
- Язык программирования **Python**
- Среда разработки **PyCharm**
- Библиотеки:
* numpy
* sklearn
* matplotlib
### Описание программы
Программа генерирует набор данных с помощью функции make_classification()
с заданными по варианту параметрами. После этого происходит вывод в консоль
качества данных моделей по варианту и построение графикиков для этих моделей.
Оценка точности происходит при помощи встроенного в модели метода метода
**.score()**, который вычисляет правильность модели для набора данных.
___
### Пример работы
![Graphics](1_linear_regression.png)
```text
===> Линейная регрессия <===
Оценка точности: 0.4513003751817972
```
___
![Graphics](2_perceptron.png)
```text
===> Персептрон <===
Оценка точности: 0.7591836734693878
```
___
![Graphics](3_poly_ridge.png)
```text
===> Гребневая полиномиальная регрессия <===
Оценка точности: 0.5312017992195672
```
### Вывод
Согласно выводу в консоль оценок точности, лучший результат показала модель **персептрона**

View File

@@ -0,0 +1,101 @@
# 12 вариант
# Данные: make_classification (n_samples=500, n_features=2, n_redundant=0,
# n_informative=2, random_state=rs, n_clusters_per_class=1)
# Модели:
# -- Линейную регрессию
# -- Персептрон
# -- Гребневую полиномиальную регрессию (со степенью 4, alpha = 1.0)
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LinearRegression, Perceptron, Ridge
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
cm_bright_1 = ListedColormap(['#7FFFD4', '#00FFFF'])
cm_bright_2 = ListedColormap(['#FF69B4', '#FF1493'])
def main():
X, y = make_classification(
n_samples=500,
n_features=2,
n_redundant=0,
n_informative=2,
random_state=0,
n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10, random_state=40)
# модели на основе сгенерированных данных
my_linear_regression(X_train, X_test, y_train, y_test)
my_perceptron(X_train, X_test, y_train, y_test)
my_poly_ridge(X_train, X_test, y_train, y_test)
# Линейная регрессия
def my_linear_regression(X_train, X_test, y_train, y_test):
lin_reg_model = LinearRegression() # создание модели регрессии
lin_reg_model.fit(X_train, y_train) # обучение
y_pred = lin_reg_model.predict(X_test) # предсказание по тестовым даннным
# вывод в консоль
print()
print('===> Линейная регрессия <===')
print('Оценка точности: ', lin_reg_model.score(X_train, y_train))
# вывод в график
plt.title('Линейная регрессия')
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright_1)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright_2, alpha=0.8)
plt.plot(X_test, y_pred, color='red', linewidth=1)
plt.savefig('1_linear_regression.png')
plt.show()
# Персептрон
def my_perceptron(X_train, X_test, y_train, y_test):
perceptron_model = Perceptron()
perceptron_model.fit(X_train, y_train)
y_pred = perceptron_model.predict(X_test)
# вывод в консоль
print()
print('===> Персептрон <===')
print('Оценка точности: ', perceptron_model.score(X_train, y_train))
# вывод в график
plt.title('Персептрон')
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright_1)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright_2, alpha=0.8)
plt.plot(X_test, y_pred, color='red', linewidth=1)
plt.savefig('2_perceptron.png')
plt.show()
# Гребневая полиномиальная регрессия (степень=4, alpha=1.0)
def my_poly_ridge(X_train, X_test, y_train, y_test):
poly_rige_model = make_pipeline(PolynomialFeatures(degree=4), Ridge(alpha=1.0))
poly_rige_model.fit(X_train, y_train)
y_pred = poly_rige_model.predict(X_test)
# вывод в консоль
print()
print('===> Гребневая полиномиальная регрессия <===')
print('Оценка точности: ', poly_rige_model.score(X_train, y_train))
# вывод в график
plt.title('Гребневая полиномиальная регрессия')
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright_1)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright_2, alpha=0.8)
plt.plot(X_test, y_pred, color='red', linewidth=1)
plt.savefig('3_poly_ridge.png')
plt.show()
main()

View File

@@ -0,0 +1,71 @@
# Лабораторная работа №2. Ранжирование признаков
## 12 вариант
___
### Задание:
Используя код из пункта «Решение задачи ранжирования признаков»,
выполните ранжирование признаков с помощью указанных по варианту моделей.
Отобразите получившиеся значения\оценки каждого признака каждым методом\моделью
и среднюю оценку. Проведите анализ получившихся результатов.
Какие четыре признака оказались самыми важными по среднему значению?
(Названия\индексы признаков и будут ответом на задание).
### Модели по варианту:
- Лассо (Lasso)
- Рекурсивное сокращение признаков (Recursive Feature Elimination RFE)
- Линейная корреляция (f_regression)
___
### Запуск
- Запустить файл lab2.py
### Используемые технологии
- Язык программирования **Python**
- Среда разработки **PyCharm**
- Библиотеки:
* numpy
* sklearn
### Описание программы
В качестве примера взята регрессионная проблема Фридмана. На вход
моделей подано 15 факторов. Выход рассчитывается по формуле, использующей
только пять факторов, но факторы 1-5, а также 10-15 взаимозависимы.
Последовательность действий:
1. Генерация данных по Фридману
2. Создание и обучение моделей по варианту
3. Ранжирование признаков по этим моделям с присвоением имён этим признакам
4. Вывод признаков моделей по убыванию значения оценки
5. Вывод среднего значения по каждому признакому по убыванию
Программа показывает, как разные виды регрессий оценят важности
факторов и какой из них будет иметь наибольшую среднюю значимость
по всем трём моделям по варианту.
---
### Пример работы
![Graphics](result.jpg)
```text
---> Lasso <---
[('x15', 1.0), ('x2', 0.88), ('x1', 0.82), ('x4', 0.38), ('x5', 0.38), ('x11', 0.01), ('x3', 0.0), ('x6', 0.0), ('x7', 0.0), ('x8', 0.0), ('x9', 0.0), ('x10', 0.0), ('x12', 0.0), ('x13', 0.0), ('x14', 0.0)]
---> RFE <---
[('x9', 1.0), ('x12', 0.88), ('x10', 0.75), ('x6', 0.62), ('x7', 0.5), ('x11', 0.38), ('x8', 0.25), ('x4', 0.12), ('x1', 0.0), ('x2', 0.0), ('x3', 0.0), ('x5', 0.0), ('x13', 0.0), ('x14', 0.0), ('x15', 0.0)]
---> F_reg <---
[('x4', 1.0), ('x15', 1.0), ('x2', 0.34), ('x13', 0.34), ('x1', 0.3), ('x12', 0.29), ('x5', 0.07), ('x6', 0.01), ('x3', 0.0), ('x7', 0.0), ('x8', 0.0), ('x9', 0.0), ('x10', 0.0), ('x11', 0.0), ('x14', 0.0)]
Средния значения по каждому признаку:
[('x15', 0.67), ('x4', 0.5), ('x2', 0.41), ('x12', 0.39), ('x1', 0.37), ('x9', 0.33), ('x10', 0.25), ('x6', 0.21), ('x7', 0.17), ('x5', 0.15), ('x11', 0.13), ('x13', 0.11), ('x8', 0.08), ('x3', 0.0), ('x14', 0.0)]
```
---
### Вывод
Согласно выводу в консоль ранжированных признаков, был выявлен топ-4 самых важных признаков по среднему значению:
1. **x15**
2. **x4**
3. **x2**
4. **x12**

View File

@@ -0,0 +1,114 @@
"""
Используя код из пункта «Решение задачи ранжирования признаков»,
выполните ранжирование признаков с помощью указанных по варианту моделей.
Отобразите получившиеся значения\оценки каждого признака каждым методом\моделью и среднюю оценку.
Проведите анализ получившихся результатов. Какие четыре признака оказались самыми важными по среднему значению?
(Названия\индексы признаков и будут ответом на задание)
"""
# 12 вариант
# Лассо (Lasso)
# Рекурсивное сокращение признаков (Recursive Feature Elimination RFE)
# Линейная корреляция (f_regression)
import numpy as np
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.feature_selection import RFE
from sklearn.feature_selection import f_regression
from sklearn.preprocessing import MinMaxScaler
def main():
X, Y = friedman_regression_problem(800)
''' Создание и обучение моделей '''
# Лассо
lasso_model = Lasso(alpha=.05)
lasso_model.fit(X, Y)
# Рекурсивное сокращение признаков
lr = LinearRegression()
lr.fit(X, Y)
rfe_model = RFE(estimator=lr)
rfe_model.fit(X, Y)
# Линейная корреляция
f, p_val = f_regression(X, Y)
# список имён признаков
names = ["x%s" % i for i in range(1, 16)]
# словарь вызова функций моделей
ranks = {}
ranks["Lasso"] = rank_to_dict(lasso_model.coef_, names)
ranks["RFE"] = rank_to_dict(rfe_model.ranking_, names)
ranks["F_reg"] = rank_to_dict(f, names)
# вывод признаков и оценок каждой модели
print_sorted_model(ranks)
# пустой список данных
mean = {}
# Формирование среднего по каждому признаку
for key, value in ranks.items():
for item in value.items():
if item[0] not in mean: #если элемента с текущим ключём нет
mean[item[0]] = 0 #добавляем
mean[item[0]] += item[1] #суммируем значения по каждому ключу-имени признака
# Поиск среднего по каждому признаку
for key, value in mean.items():
res = value / len(ranks)
mean[key] = round(res, 2)
# Сортировка и распечатка списка
mean = sorted(mean.items(), key=lambda item: item[1], reverse=True)
print("\033[92mСредния значения по каждому признаку:\033[00m")
print(mean)
# Генерация набора данных по регрессионной проблеме Фридмана
def friedman_regression_problem(size):
# генерируем исходные данные: 800 строк-наблюдений и 15 столбцов-признаков
np.random.seed(0)
X = np.random.uniform(0, 1, (size, 15))
# Задание функции-выхода (регриссионную проблему Фридмана)
Y = (10 * np.sin(np.pi * X[:,0] * X[:,1]) + 20 * (X[:,2] - .5)**2 + 10*X[:,3] + 5*X[:,4]**5) + np.random.normal(0, 1)
# Добавление в зависимость признаков
X[:,11:] = X[:,:4] + np.random.normal(0, .025, (size, 4))
return X, Y
# Функция формирования словаря пар "имя_признака: оценка признака"
def rank_to_dict(ranks, names):
ranks = np.abs(ranks) #получение абсолютных значений оценок
r_array = np.array(ranks) #создание массива списка оценок
r_array = r_array.reshape(15, 1) #переформирование строк и столбцов в массиве
minmax = MinMaxScaler() # экземпляр для нормализации данных
ranks = minmax.fit_transform(r_array) #обучение и преобразование данных
ranks = ranks.ravel() #преобразование двумерного массива в одномерный
ranks = map(lambda x: round(x, 2), ranks) #округление каждого элемента массива до сотых
return dict(zip(names, ranks))
# Функция вывода признаков моделей по убыванию значения оценки
def print_sorted_model(ranks):
ranks_copy = dict(ranks)
for key, value in ranks_copy.items():
ranks_copy[key] = sorted(value.items(), key=lambda item: item[1], reverse=True)
for key, value in ranks_copy.items():
print("\033[92m---> {} <---\033[00m" .format(key))
print(value)
print()
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@@ -0,0 +1,73 @@
# Лабораторная работа №3. Деревья решений
## 12 вариант
___
### Задание:
Решите с помощью библиотечной реализации дерева решений задачу из
лабораторной работы «Веб-сервис «Дерево решений» по предмету
«Методы искусственного интеллекта» на 99% ваших данных.
Проверьте работу модели на оставшемся проценте, сделайте вывод.
### Вариант набора данных по курсовой работе:
- Прогнозирование музыкальных жанров
___
### Запуск
- Запустить файл lab3.py
### Используемые технологии
- Язык программирования **Python**
- Среда разработки **PyCharm**
- Библиотеки:
* pandas
* sklearn
### Описание программы
**Набор данных (Kaggle):** Полный список жанров, включенных в CSV:
«Электронная музыка», «Аниме», «Джаз», «Альтернатива», «Кантри», «Рэп»,
«Блюз», «Рок», «Классика», «Хип-хоп».
**Задача, решаемая деревом решений:** Классификация музыкальных треков на
основе их характеристик, таких как темп, инструментальность, акустичность,
речевость, танцевальность, энергичность, живость. Дерево решений может
предсказывать жанр трека, основываясь на его характеристиках.
**Задачи оценки:** оценить качество работы модели дерева решений и выявить
наиболее значимые признаки набора данных.
---
### Пример работы
*Датасет, сформированный из случайных строк csv-файла.*
![Graphics](1_dataset.jpg)
---
*Сравнение на оставшихся неиспользованных 0,5% строк датасета
предсказнных и действительных жанров.*
![Graphics](2_accuracy_score.jpg)
---
*Вычисленнные коэффициенты влияния признаков на прогноз жанра*
![Graphics](3_feature_importances.jpg)
---
### Вывод
Посредством предобработки датасета дерево решений без проблем обучилось и
частично верно предсказало некоторые жанры (в частности, Электро, Классику
и Рэп). Также модель показала оценку влиятельности признаков на прогноз
жанра. Самым влиятельным признаком оказалась **акустичность** музыкального
трека. Менее значимыми оказались речевость (преобладание голосов в треке) и
инструментальность (преобладание живых инструментов в треке), что звучит
вполне разумно.
На практике дерево решений по качеству классификации уступает некоторым
другим методам. Помимо этого, небольшие изменения в данных могут существенно
изменять построенное дерево решений. На примере моего датасета дерево решений
справилось не очень успешно. Это можно объяснить тем, что данных в нём
недостаточно для предсказания жанра. Но также стоит отметить, что
жанр одно из самых неоднозначных, самых многосоставных музыкальных понятий.

View File

@@ -0,0 +1,69 @@
"""
Решите с помощью библиотечной реализации дерева решений задачу изnлабораторной работы
«Веб-сервис «Дерево решений» по предмету «Методы искусственного интеллекта» на 99% ваших данных.
Проверьте работу модели на оставшемся проценте, сделайте вывод.
"""
"""
Задача, решаемая деревом решений: Классификация музыкальных треков на основе их характеристик,
таких как акустика, танцевальность, инструментальность, темп и т.д.
Дерево решений может предсказывать жанр трека, основываясь на его характеристиках.
"""
# 12 вариант
# Набор данных по курсовой: "Prediction of music genre"
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
DATASET_FILE = 'music_genre.csv'
def main():
df = open_dataset(DATASET_FILE)
df = df.sample(frac=.1) # отбираем 10% рандомных строк с набора данных, т.к. он большой
print("\033[92m[-----> Набор данных <-----]\033[00m")
print(df)
X = df.drop(columns=['music_genre']) # набор числовых признаков
y = df['music_genre'] # набор соответствующих им жанров
# Разделение датасета на тренировочные (99,5%) и тестовые данные (0,5%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.005)
# Создание и обучение дерева решений
model = DecisionTreeClassifier()
model.fit(X_train.values, y_train)
# Прогнозирование жанра на тестовых данных
y_pred = model.predict(X_test.values)
print("\033[92m\n\n\n[-----> Сравнение жанров <-----]\033[00m")
df_result = pd.DataFrame({'Прогноз': y_pred, 'Реальность': y_test})
print(df_result)
score = accuracy_score(y_test, y_pred)
print("\033[92m\n> Оценка точности модели: {}\033[00m" .format(round(score, 2)))
print("\033[92m\n\n\n[-----> Оценки важности признаков <-----]\033[00m")
df_feature = pd.DataFrame({'Признак': X.columns, "Важность": model.feature_importances_})
print(df_feature)
# Функция считывания и очищения csv-файла
def open_dataset(csv_file):
# открываем файл с указанием знака-отделителя
df_genres = pd.read_csv(csv_file, delimiter=',')
# выбираем необходимые признаки
df_genres = df_genres[['tempo', 'instrumentalness', 'acousticness', 'speechiness', 'danceability', 'energy', 'liveness', 'music_genre']]
# очищаем набор данных от пустых и неподходящих значений
df_genres = df_genres[df_genres['tempo'] != '?']
df_genres = df_genres.dropna()
return df_genres
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

View File

@@ -0,0 +1,78 @@
# Лабораторная работа №4. Кластеризация
## 12 вариант
___
### Задание:
Использовать для своих данных метод кластеризации по варианту,
самостоятельно сформулировав задачу. Интерпретировать результаты и оценить,
насколько хорошо он подходит для решения сформулированной вами задачи.
### Вариант:
- Алгоритм кластеризации: **linkage**
### Вариант набора данных по курсовой работе:
- Прогнозирование музыкальных жанров ("Prediction of music genre")
___
### Запуск
- Запустить файл lab4.py
### Используемые технологии
- Язык программирования **Python**
- Среда разработки **PyCharm**
- Библиотеки:
* pandas
* scipy
* matplotlib
### Описание программы
**Набор данных (Kaggle):** Полный список жанров, включенных в CSV:
«Электронная музыка», «Аниме», «Джаз», «Альтернатива», «Кантри», «Рэп»,
«Блюз», «Рок», «Классика», «Хип-хоп».
**Задача, решаемая алгоритмом кластеризации:**
Группировка музыкальных треков на основе их характеристик с целью создания
кластеров треков с схожими характеристиками. Алгоритм кластеризации может
помочь в создании плейлистов и рекомендаций, основанных на схожести
музыкальных треков по некоторым характеристикам.
**Задача оценки:**
Анализ получившейся иерархической структуры с помощью дендрограмме.
---
### Пример работы
*Датасет, сформированный из случайных строк csv-файла.*
![Graphics](2_dataset.jpg)
---
*Визуализация дерева, представляющего иерархическое слияние кластеров,
в виде дендрограммы. Это может быть полезно для понимания структуры данных.*
![Graphics](1_dendrogram.png)
---
*Вывод первых 10 музыльных треков из датасета с их
принадлежностью к кластеру*
![Graphics](3_clusters.jpg)
### Вывод
С моими данными алгоритм справляется довольно успешно. На результате выше
можно сравнить два трека — "Gake No Ue No Ponyo" и "He Would Have Laughed".
В результате работы программы они были присвоены к кластеру №10.
При этом первый трек отнесён к жанру "Anime", а второй — к "Alternative".
Тем не менее, эти две песни похожи преобладанием инструментала в них
(в особенности перкуссии), а так же наличием ирландских мотивов в нём.
В ходе работы было проверено 8 пар музыкальных треков, принадлежащих
к разным кластерам. Как итог, больше половины пар действительно имели
много схожего в звучании или концепте аранжировки, несмотря на различия
по некоторым характеристикам (в том числе жанр).
Из плюсов иерархической кластеризации можно выделить отсутствие
конкретного количества кластеров, для поиска похожей музыки это
явно преимущество. Из минусов же — слишком медленная работа
на больших наборах данных (из-за чего и было взято 50% от всего датасета).

View File

@@ -0,0 +1,85 @@
"""
Использовать для своих данных метод кластеризации по варианту, самостоятельно сформулировав задачу.
Интерпретировать результаты и оценить, насколько хорошо он подходит для решения сформулированной вами задачи.
"""
"""
Задача, решаемая алгоритмом кластеризации:
Группировка музыкальных треков на основе их характеристик с целью создания кластеров треков
с схожими характеристиками. Алгоритм кластеризации может помочь в создании плейлистов и рекомендаций,
основанных на схожести музыкальных треков по некоторым характеристикам.
"""
# 12 вариант
# Набор данных по курсовой: "Prediction of music genre"
# Алгоритм кластеризации: linkage
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
import matplotlib.pyplot as plt
DATASET_FILE = 'music_genre.csv'
def main():
df = open_dataset(DATASET_FILE)
df = df.sample(frac=.5) # отбираем 50% рандомных строк с набора данных, т.к. он большой
print("\033[92m[-----> Набор данных <-----]\033[00m")
print(df)
# Перевод жанров и ладов (минор/мажор) в числовые признаки
df_genres = pd.get_dummies(df['music_genre'])
df_modes = pd.get_dummies(df['mode'])
# Объединение основной таблицы с числовыми признаками
df_music = pd.concat([df, df_genres, df_modes], axis=1).reindex(df.index)
# Удаление строковых стоблцов, которые заменили на числовые признаки
df_music = df_music.drop(columns=['music_genre', 'mode'])
# Датасет для работы с кластеризацией (без исполнителя и названия трека)
X = df_music.drop(columns=['artist_name', 'track_name'])
# Иерархическая кластеризация с связью ward
# (минимизация суммы квадратов разностей во всех кластерах)
linkage_matrix = linkage(X, method='ward', metric='euclidean')
# Формирование кластеров из матрицы связей
cluster_label = fcluster(Z=linkage_matrix, t=300, criterion='distance')
# Присвоение кластера треку
df['cluster'] = cluster_label
# Установка опции показа 3 столбцов при выводе
pd.set_option('display.max_columns', 3)
# Вывод результата кластеризации
print("\033[92m\nЫ[-----> Результат иерархической кластеризации <-----]\033[00m")
print(df[['artist_name', 'track_name', 'cluster']].head(10))
print("\033[92mКоличество кластеров: {}\033[00m" .format(cluster_label.max()))
# Дендрограмма
plt.figure(figsize=(12, 6))
dendrogram(linkage_matrix, truncate_mode='lastp', p=20, leaf_rotation=90., leaf_font_size=8., show_contracted=True)
plt.title('Дендрограмма иерархической кластеризации музыкальных треков')
plt.xlabel('Количество треков в узле')
plt.ylabel('Евклидово расстояние между треками')
plt.savefig('1_dendrogram')
plt.show()
# Функция считывания и очищения csv-файла
def open_dataset(csv_file):
# открываем файл с указанием знака-отделителя
df = pd.read_csv(csv_file, delimiter=',')
# выбираем необходимые признаки
df = df[['artist_name', 'track_name', 'mode', 'tempo', 'instrumentalness', 'acousticness',
'speechiness', 'danceability', 'energy', 'liveness', 'valence', 'music_genre']]
# очищаем набор данных от пустых и неподходящих значений
df = df[df['tempo'] != '?']
df = df.dropna()
return df
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

View File

@@ -0,0 +1,77 @@
# Лабораторная работа №3. Вариант 21
## Тема:
Деревья решений
## Модель:
Decision Tree Classifier
## Как запустить программу:
Установить *python, numpy, matplotlib, sklearn*
```
python main.py
```
## Какие технологии использовались:
Язык программирования Python, библиотеки numpy, matplotlib, sklearn
Среда разработки VSCode
# Что делает лабораторная работа:
Использует данные из набора "UCI Heart Disease Data" и обучает модель: ```Decision Tree Classifier```
Датасет UCI Heart Disease Data содержит информацию о различных клинических признаках, таких как возраст, пол, артериальное давление, холестерин, наличие электрокардиографических признаков и другие, а также целевую переменную, отражающую наличие или отсутствие заболевания сердца.
Для начала нужно предобработать данные, чтобы модель могла принимать их на вход. Изначально данный имеют следующий вид:
![](1.png "")
Так как модели машинного обучения умеют работать исключительно с числовыми значениями, то нужно свести все данных к данному формату и использовать только полные строки, значение признаков которых не являются пустыми значениями. Это происходит с использованием функции, представленной ниже:
![](2.png "")
Далее нужно привести целевое значение к бинарному виду, т.к изначально данное поле принимает 4 значения. После этого применить подход, называемый “feature engineering”, для получения большего количества признаков, которые возможно помогут модели при решении задачи, т.к. обычно в машинном и глубоком обучении действует следующая логика: Больше данных - лучше результат. Получение новых признаков происходит с помощью функции ниже и далее обновленный набор данных снова преобразовывается к численному формату.
```
def fe_creation(df):
# Feature engineering (FE)
df['age2'] = df['age']//10
df['trestbps2'] = df['trestbps']//10
df['chol2'] = df['chol']//60
df['thalch2'] = df['thalch']//40
df['oldpeak2'] = df['oldpeak']//0.4
for i in ['sex', 'age2', 'fbs', 'restecg', 'exang']:
for j in ['cp','trestbps2', 'chol2', 'thalch2', 'oldpeak2', 'slope']:
df[i + "_" + j] = df[i].astype('str') + "_" + df[j].astype('str')
return df
```
После применения данной функции количество признаков увеличилось с 12 до 47. Далее все признаки стандартизируются с помощью следующей формулы z = (x-mean)/std, где х - текущее значение признак, mean - математическое ожидание столбца с этим признаком, std - стандартное отклонение данного признака, а z - соответственно новое значение признака x. После всех описанных действий данные стали готовыми для их использования для обучения деревьев.
```Decision Tree Classifier```- это алгоритм машинного обучения, который использует структуру дерева для принятия решений. Каждый узел дерева представляет собой тест по какому-то признаку, а каждая ветвь представляет возможный результат этого теста. Цель - разделить данные на подгруппы так, чтобы в каждой подгруппе преобладал один класс.
```
decision_tree = DecisionTreeClassifier()
param_grid = {'min_samples_leaf': [i for i in range(2,12)]}
decision_tree_CV = GridSearchCV(decision_tree, param_grid=param_grid, cv=cv_train, verbose=False)
decision_tree_CV.fit(train, train_target)
print(decision_tree_CV.best_params_)
acc_all = acc_metrics_calc(0, acc_all, decision_tree_CV, train, valid, train_target, valid_target, title="Decision Tree Classifier")
plot_learning_curve(decision_tree_CV, "Decision Tree", train, train_target, cv=cv_train)
feature_importances_dt = decision_tree_CV.best_estimator_.feature_importances_
plot_feature_importance(feature_importances_dt, data.columns, "Decision Tree")
```
Первым был обучен Decision Tree Classifier, который с помощью алгоритма GridSearch нашел наилучшие гиперпараметры для решения задачи. Ниже приведены графики, отображающие качество и процесс обучения данного классификатора.
![](3.png "")
На следующем графике мы можем увидеть какие признаки модель посчитала наиболее важными:
![](4.png "")
## Вывод
На обучающихся данных мы в большинстве случаев предсказываем правильно, а в валидационных появляется проблема с выявлением второго класса, которое отображает наличие заболеваний.

View File

@@ -0,0 +1,302 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.preprocessing import (LabelEncoder,
StandardScaler,
MinMaxScaler,
RobustScaler)
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, learning_curve, ShuffleSplit
from sklearn.model_selection import cross_val_predict as cvp
from sklearn import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, confusion_matrix, explained_variance_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
def str_features_to_numeric(data):
# Преобразовывает все строковые признаки в числовые.
# Определение категориальных признаков
categorical_columns = []
numerics = ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']
features = data.columns.values.tolist()
for col in features:
if data[col].dtype in numerics: continue
categorical_columns.append(col)
# Кодирование категориальных признаков
for col in categorical_columns:
if col in data.columns:
le = LabelEncoder()
le.fit(list(data[col].astype(str).values))
data[col] = le.transform(list(data[col].astype(str).values))
return data
def fe_creation(df):
# Feature engineering (FE)
df['age2'] = df['age']//10
df['trestbps2'] = df['trestbps']//10
df['chol2'] = df['chol']//60
df['thalch2'] = df['thalch']//40
df['oldpeak2'] = df['oldpeak']//0.4
for i in ['sex', 'age2', 'fbs', 'restecg', 'exang']:
for j in ['cp','trestbps2', 'chol2', 'thalch2', 'oldpeak2', 'slope']:
df[i + "_" + j] = df[i].astype('str') + "_" + df[j].astype('str')
return df
def acc_d(y_meas, y_pred):
# Относительная погрешность между прогнозируемыми значениями y_pred и измеренными значениями y_meas
return mean_absolute_error(y_meas, y_pred)*len(y_meas)/sum(abs(y_meas))
def acc_rmse(y_meas, y_pred):
# Среднеквадратичная ошибка между прогнозируемыми значениями y_pred и измеренными значениями y_meas
return (mean_squared_error(y_meas, y_pred))**0.5
def plot_cm(train_target, train_target_pred, valid_target, valid_target_pred, title):
# Построение матриц ошибок
def cm_calc(y_true, y_pred):
cm = confusion_matrix(y_true, y_pred, labels=np.unique(y_true))
cm_sum = np.sum(cm, axis=1, keepdims=True)
cm_perc = cm / cm_sum.astype(float) * 100
annot = np.empty_like(cm).astype(str)
nrows, ncols = cm.shape
for i in range(nrows):
for j in range(ncols):
c = cm[i, j]
p = cm_perc[i, j]
if i == j:
s = cm_sum[i]
annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
elif c == 0:
annot[i, j] = ''
else:
annot[i, j] = '%.1f%%\n%d' % (p, c)
cm = pd.DataFrame(cm, index=np.unique(y_true), columns=np.unique(y_true))
cm.index.name = 'Actual'
cm.columns.name = 'Predicted'
return cm, annot
# Построение матриц ошибок
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6), sharex=True)
# Обучающие данные
ax = axes[0]
ax.set_title("for training data")
cm0, annot0 = cm_calc(train_target, train_target_pred)
sns.heatmap(cm0, cmap= "YlGnBu", annot=annot0, fmt='', ax=ax)
# Тестовые данные
ax = axes[1]
ax.set_title("for test (validation) data")
cm1, annot1 = cm_calc(valid_target, valid_target_pred)
sns.heatmap(cm1, cmap= "YlGnBu", annot=annot1, fmt='', ax=ax)
fig.suptitle(f'CONFUSION MATRICES for {title}')
plt.savefig(f'CONFUSION MATRICES for {title}.png')
plt.show()
def acc_metrics_calc(num, acc_all, model, train, valid, train_target, valid_target, title):
# Этап выбора моделей
# Расчет точности модели по различным показателям
ytrain = model.predict(train).astype(int)
yvalid = model.predict(valid).astype(int)
print('train_target = ', train_target[:5].values)
print('ytrain = ', ytrain[:5])
print('valid_target =', valid_target[:5].values)
print('yvalid =', yvalid[:5])
num_acc = 0
for x in metrics_now:
if x == 1:
#критерий точности score
acc_train = round(metrics.accuracy_score(train_target, ytrain), 2)
acc_valid = round(metrics.accuracy_score(valid_target, yvalid), 2)
elif x == 2:
# rmse критерий
acc_train = round(acc_rmse(train_target, ytrain), 2)
acc_valid = round(acc_rmse(valid_target, yvalid), 2)
elif x == 3:
# критерий относительной погрешности
acc_train = round(acc_d(train_target, ytrain) * 100, 2)
acc_valid = round(acc_d(valid_target, yvalid) * 100, 2)
print('acc of', metrics_all[x], 'for train =', acc_train)
print('acc of', metrics_all[x], 'for valid =', acc_valid)
acc_all[num_acc].append(acc_train) #train
acc_all[num_acc+1].append(acc_valid) #valid
num_acc += 2
# Построение матриц
plot_cm(train_target, ytrain, valid_target, yvalid, title)
return acc_all
def plot_feature_importance(feature_importances, feature_names, model_name):
import matplotlib.pyplot as plt
import seaborn as sns
# Создание цветовой палитры
colors = sns.color_palette('viridis', len(feature_importances))
# Сортировка индексов важностей признаков
indices = feature_importances.argsort()[::-1]
# Создание стильного барплота
plt.figure(figsize=(12, 8))
ax = sns.barplot(x=feature_importances[indices], y=feature_names[indices], palette=colors)
# Добавление декораций
plt.xlabel('Важность признака', fontsize=14)
plt.ylabel('Признаки', fontsize=14)
plt.title(f'Важность признаков в модели {model_name}', fontsize=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
# Добавление цветовой шкалы и ее описания
cbar = plt.colorbar(plt.cm.ScalarMappable(cmap='viridis'), ax=ax)
cbar.set_label('Уровень важности', rotation=270, labelpad=15, fontsize=12)
# Добавление сетки для лучшей читаемости
plt.grid(axis='x', linestyle='--', alpha=0.6)
# Сохранение графика в файл
plt.savefig('feature_importance_plot.png', bbox_inches='tight')
# Отображение графика
plt.savefig(f'feature_importances_{model_name}.png')
plt.show()
def plot_learning_curve(estimator, title, X, y, cv=None, axes=None, ylim=None,
n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5), random_state=0):
fig, axes = plt.subplots(2, 1, figsize=(20, 10))
if axes is None:
_, axes = plt.subplots(1, 2, figsize=(20, 5))
axes[0].set_title(title)
if ylim is not None:
axes[0].set_ylim(*ylim)
axes[0].set_xlabel("Training examples")
axes[0].set_ylabel("Score")
cv_train = ShuffleSplit(n_splits=cv_n_split, test_size=test_train_split_part, random_state=random_state)
train_sizes, train_scores, test_scores, fit_times, _ = \
learning_curve(estimator=estimator, X=X, y=y, cv=cv,
train_sizes=train_sizes,
return_times=True)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
fit_times_mean = np.mean(fit_times, axis=1)
fit_times_std = np.std(fit_times, axis=1)
# Plot learning curve
axes[0].grid()
axes[0].fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
axes[0].fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1,
color="g")
axes[0].plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training score")
axes[0].plot(train_sizes, test_scores_mean, 'o-', color="g",
label="Cross-validation score")
axes[0].legend(loc="best")
# Plot n_samples vs fit_times
axes[1].grid()
axes[1].plot(train_sizes, fit_times_mean, 'o-')
axes[1].fill_between(train_sizes, fit_times_mean - fit_times_std,
fit_times_mean + fit_times_std, alpha=0.1)
axes[1].set_xlabel("Training examples")
axes[1].set_ylabel("fit_times")
axes[1].set_title("Scalability of the model")
plt.savefig(f'{title}.png')
plt.show()
return
if __name__ == "__main__":
# Загрузка данных
# Преобразование данных и предобработка
# Обучение моделей Decision Tree Classifier и Random Forest Classifier
# Расчет метрик и построение графиков
cv_n_split = 5
random_state = 42
test_train_split_part = 0.25
metrics_all = {1: 'acc', 2 : 'rmse', 3 : 're'}
metrics_now = [1, 2, 3]
data = pd.read_csv("..//heart_disease_uci.csv")
data['target'] = data['num']
data = data.drop(columns=['id', 'dataset', 'ca', 'thal', 'num'])
data = data[(data['chol'] <= 420) & (data['oldpeak'] >=0) & (data['oldpeak'] <=4)].reset_index(drop=True)
data = data.dropna().reset_index(drop=True)
print(data.info())
data = str_features_to_numeric(data)
data = data[data['target'].isin([0, 1])] # приволим столбец с целевыми значениями к бинарному виду
data = fe_creation(data)
data = str_features_to_numeric(data)
dataset = data.copy() # original data
target_name = 'target'
target = data.pop(target_name)
# Model standartization
# The standard score of a sample x is calculated as:
# z = (x - мат.ож.) / (стандартное отклонение)
scaler = StandardScaler()
data = pd.DataFrame(scaler.fit_transform(data), columns = data.columns)
train, valid, train_target, valid_target = train_test_split(data, target, test_size=test_train_split_part, random_state=random_state)
# list of accuracy of all model - amount of metrics_now * 2 (train & valid datasets)
num_models = 6
acc_train = []
acc_valid = []
acc_all = np.empty((len(metrics_now)*2, 0)).tolist()
acc_all
acc_all_pred = np.empty((len(metrics_now), 0)).tolist()
acc_all_pred
cv_train = ShuffleSplit(n_splits=cv_n_split, test_size=test_train_split_part, random_state=random_state)
decision_tree = DecisionTreeClassifier()
param_grid = {'min_samples_leaf': [i for i in range(2,12)]}
decision_tree_CV = GridSearchCV(decision_tree, param_grid=param_grid, cv=cv_train, verbose=False)
decision_tree_CV.fit(train, train_target)
print(decision_tree_CV.best_params_)
acc_all = acc_metrics_calc(0, acc_all, decision_tree_CV, train, valid, train_target, valid_target, title="Decision Tree Classifier")
plot_learning_curve(decision_tree_CV, "Decision Tree", train, train_target, cv=cv_train)
feature_importances_dt = decision_tree_CV.best_estimator_.feature_importances_
plot_feature_importance(feature_importances_dt, data.columns, "Decision Tree")

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

View File

@@ -0,0 +1,76 @@
# Лабораторная работа №4. Вариант 21
## Тема:
Кластеризация
## Модель:
KMeans
## Как запустить программу:
Установить *python, numpy, matplotlib, sklearn*
```
python main.py
```
## Какие технологии использовались:
Язык программирования Python, библиотеки numpy, matplotlib, sklearn
Среда разработки VSCode
# Что делает лабораторная работа:
Задача кластеризации заключается в разделении множества данных на группы, называемые кластерами, таким образом, чтобы объекты внутри одного кластера были более похожи друг на друга, чем на объекты из других кластеров. Это позволяет выявлять скрытые структуры данных, облегчая последующий анализ и принятие решений.
В данной работе была рассмотрена модель ```KMeans```.
### Описание:
```KMeans``` разбивает данные на K кластеров, где K - заранее заданное число. Он минимизирует сумму квадратов расстояний между точками данных и центрами своих соответствующих кластеров. Этот алгоритм прост в реализации и хорошо работает для сферических кластеров.
Кластеризация данных - это мощный инструмент для выделения закономерностей в больших объемах информации, и выбор конкретного алгоритма зависит от характера данных и поставленных задач. В данной работе мы рассмотрим эти алгоритмы более подробно, выявим их преимущества и недостатки, и проиллюстрируем их применение на практике.
Процесс получения кластеров происходит по следующему алгоритму:
&nbsp; 1. &nbsp;Получаемый исходные данные
&nbsp; 2. &nbsp;Приводим их всех к численному формату
&nbsp; 3. &nbsp;Обучение на подготовленных данных
```
def clustering_df(X, n, m, output_hist, title='clusters_by'):
X_columns = X.columns
scaler = StandardScaler()
scaler.fit(X)
X = pd.DataFrame(scaler.transform(X), columns = X_columns)
cl = generate_clustering_algorithms(X, n, m)
cl.fit(X)
if hasattr(cl, 'labels_'):
labels = cl.labels_.astype(np.uint8)
else:
labels = cl.predict(X)
clusters=pd.concat([X, pd.DataFrame({'cluster':labels})], axis=1)
```
Для кластеризации были выбраны все столбцы, часть кода представлена ниже:
```
print(data.select_dtypes(include='object').columns.tolist())
for column in data.select_dtypes(include='object').columns.tolist():
data[column] = pd.factorize(data[column])[0]
```
Программа генерирует диаграммы для каждого кластера относительно всех признаков. Для меня наиболее интересным показались признаки возраста и наличия заболевания человека.
![](1.png "")
![](2.png "")
Изучая графики выше, мы можем сделать вывод, что люди из кластера №3 почти все болеют и большинство имеет 2,3 и 4 стадии. А возраст этих людей от 45 до 70 лет.
Ниже приложен результат обучения алгоритма кластеризации:
![](3.png "")
## Вывод
Я думаю, что алгоритм ```KMeans```справился достаточно хорошо, т.к. в нем каждый кластер получился обособленным, то есть более отличным от других кластеров. Следовательно, это может означать, что именно этот алгоритм смог понять ключевые признаки для каждого кластера.

View File

@@ -0,0 +1,166 @@
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import cluster, mixture
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN, OPTICS
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import kneighbors_graph
from itertools import cycle, islice
import warnings
warnings.simplefilter('ignore')
def generate_clustering_algorithms(Z, n_clusters, m):
# Generate clustering algorithms:
# m = 'MeanShift', 'KMeans', 'MiniBatchKMeans'
# The minimal percentage of similarity of the clustered feature with "Survived" for inclusion in the final dataset
limit_opt = 0.7
params = {'quantile': .2,
'eps': .3,
'damping': .9,
'preference': -200,
'n_neighbors': 10,
'n_clusters': n_clusters,
'min_samples': 3,
'xi': 0.05,
'min_cluster_size': 0.05}
# estimate bandwidth for mean shift
bandwidth = cluster.estimate_bandwidth(Z, quantile=params['quantile'])
# connectivity matrix for structured Ward
connectivity = kneighbors_graph(
Z, n_neighbors=params['n_neighbors'], include_self=False)
# make connectivity symmetric
connectivity = 0.5 * (connectivity + connectivity.T)
# ============
# Create cluster objects
# ============
if m == 'MeanShift':
cl = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
elif m == 'KMeans':
cl = cluster.KMeans(n_clusters=n_clusters, random_state = 1000)
elif m == 'MiniBatchKMeans':
cl = cluster.MiniBatchKMeans(n_clusters=n_clusters)
return cl
def clustering_df(X, n, m, output_hist, title='clusters_by'):
# Standardization
X_columns = X.columns
scaler = StandardScaler()
scaler.fit(X)
X = pd.DataFrame(scaler.transform(X), columns = X_columns)
cl = generate_clustering_algorithms(X, n, m)
cl.fit(X)
if hasattr(cl, 'labels_'):
labels = cl.labels_.astype(np.uint8)
else:
labels = cl.predict(X)
clusters=pd.concat([X, pd.DataFrame({'cluster':labels})], axis=1)
# Inverse Standardization
X_inv = pd.DataFrame(scaler.inverse_transform(X), columns = X_columns)
clusters_inv=pd.concat([X_inv, pd.DataFrame({'cluster':labels})], axis=1)
# Number of points in clusters
print("Number of points in clusters:\n", clusters['cluster'].value_counts())
# Data in clusters - thanks to https://www.kaggle.com/sabanasimbutt/clustering-visualization-of-clusters-using-pca
if output_hist:
for c in clusters:
grid = sns.FacetGrid(clusters_inv, col='cluster')
grid.map(plt.hist, c)
plt.savefig(f'{title}_by_method_{m}.png')
return clusters, clusters_inv
def plot_draw(X, title, m):
# Drawing a plot with clusters on the plane (using PCA transformation)
# Thanks to https://www.kaggle.com/sabanasimbutt/clustering-visualization-of-clusters-using-pca
dist = 1 - cosine_similarity(X)
# PCA transform
pca = PCA(2)
pca.fit(dist)
X_PCA = pca.transform(dist)
# Generate point numbers and colors for clusters
hsv = plt.get_cmap('hsv')
n_clusters = max(X['cluster'].value_counts().index)-min(X['cluster'].value_counts().index)+2
colors = list(hsv(np.linspace(0, 1, n_clusters)))
colors_num = list(np.linspace(min(X['cluster'].value_counts().index), max(X['cluster'].value_counts().index), n_clusters))
colors_num = [int(x) for x in colors_num]
colors_str = [str(x) for x in colors_num]
names_dict = dict(zip(colors_num, colors_str))
colors_dict = dict(zip(colors_num, colors))
# Visualization
x, y = X_PCA[:, 0], X_PCA[:, 1]
df = pd.DataFrame({'x': x, 'y':y, 'label':X['cluster'].tolist()})
groups = df.groupby('label')
fig, ax = plt.subplots(figsize=(12, 8))
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=10,
color=colors_dict[name],
label=names_dict[name],
mec='none')
ax.set_aspect('auto')
ax.tick_params(axis='x',which='both',bottom='off',top='off',labelbottom='off')
ax.tick_params(axis= 'y',which='both',left='off',top='off',labelleft='off')
ax.legend(loc='upper right')
ax.set_title(f"{title} by method {m}")
plt.savefig(f'{title}_by_method_{m}.png')
plt.show()
if __name__ == "__main__":
data = pd.read_csv("..//heart_disease_uci.csv")
data = data.drop_duplicates().reset_index(drop=True)
print(data.select_dtypes(include='object').columns.tolist())
for column in data.select_dtypes(include='object').columns.tolist():
data[column] = pd.factorize(data[column])[0]
# print(pd.factorize(data[column])[0])
methods_all = ['KMeans', 'MiniBatchKMeans', 'MeanShift']
n_default = 6
data = data[data.notna().all(axis=1)]
res = dict(zip(methods_all, [False]*len(methods_all)))
n_clust = dict(zip(methods_all, [1]*len(methods_all)))
for method in methods_all:
print(f"Method - {method}")
Y, Y_inv = clustering_df(data.copy(), n_default, method, True)
# If the number of clusters is less than 2, then the clustering is not successful
n_cl = len(Y['cluster'].value_counts())
if n_cl > 1:
res[method] = True
n_clust[method] = n_cl
plot_draw(Y, "Data clustering", method)
else:
print('Clustering is not successful because all data is in one cluster!\n')

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@@ -0,0 +1,79 @@
# Лабораторная работа №5. Вариант 21
## Тема:
Регрессия
## Модель:
LinearRegression
## Как запустить программу:
Установить *python, numpy, matplotlib, sklearn*
```
python lab.py
```
## Какие технологии использовались:
Язык программирования Python, библиотеки numpy, matplotlib, sklearn
Среда разработки VSCode
# Что делает лабораторная работа:
Поскольку артериальное давление пациента в состоянии покоя является важным медицинским показателем, оно было выбрано для предсказания на основе доступных признаков, таких как возраст, пол и других.
Внедрение линейной регрессии в решение задачи прогнозирования артериального давления в состоянии покоя приносит несколько ключевых преимуществ.
Линейная регрессия является мощным инструментом в области статистики и машинного обучения, широко применяемым для анализа и моделирования связей между зависимыми и независимыми переменными. Ее основная цель — построить линейную функцию, наилучшим образом приближающую отношение между входными данными и целевой переменной. Это позволяет предсказывать значения целевой переменной на основе новых входных данных.
### Описание:
```LinearRegression``` - метод наименьших квадратов (MSE) это основной принцип LinearRegression. Он стремится минимизировать сумму квадратов разностей между фактическими и предсказанными значениями. Этот алгоритм предоставляет аналитическое решение для определения коэффициентов линейной модели, что делает его эффективным и простым для понимания.
Процесс обучения линейной регрессии требует выполнения следующих шагов:
&nbsp; 1. &nbsp;Получить исходные данные
&nbsp; 2. &nbsp;Выбрать целевое значение, которые нужно предсказывать
&nbsp; 3. &nbsp;Обработать данные таким образом, чтобы все признаки имели только числовой формат, и добавить нормализацию, или иначе, стандартизацию данных
&nbsp; 4. &nbsp;4. Провести обучение выбранной модели на подготовленных данных
Обработка данных происходит с помощью функции ```str_features_to_numeric```:
```
def str_features_to_numeric(data):
# Преобразовывает все строковые признаки в числовые.
# Определение категориальных признаков
categorical_columns = []
numerics = ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']
features = data.columns.values.tolist()
for col in features:
if data[col].dtype in numerics: continue
categorical_columns.append(col)
# Кодирование категориальных признаков
for col in categorical_columns:
if col in data.columns:
le = LabelEncoder()
le.fit(list(data[col].astype(str).values))
data[col] = le.transform(list(data[col].astype(str).values))
return data
```
Далее происходит нормализация с помощью ```StandardScaler```.
В качестве целевого признака был выбран артериальное давление в состоянии покоя ```trestbps```- артериальное давление в состоянии покоя (в мм рт. ст. при поступлении в больницу). Обработанные данные поступают на вход обучения модели линейной регресии:
![](1.png "")
- reg.score_ - отображает точность работы модели
- reg.coef_ - отображает коэффициенты при признаках расположенных по порядку
- reg.intercept_ - показывает параметр смещения (в английской литературе bias)
## Вывод
На основе полученных результатов, можно сказать, что классическая модель линейной регрессии является более чем подходящей для решения именно этой конкретной задачи

View File

@@ -0,0 +1,87 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
from sklearn.preprocessing import (LabelEncoder,
StandardScaler,
MinMaxScaler,
RobustScaler)
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, learning_curve, ShuffleSplit
def str_features_to_numeric(data):
# Преобразовывает все строковые признаки в числовые.
# Определение категориальных признаков
categorical_columns = []
numerics = ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']
features = data.columns.values.tolist()
for col in features:
if data[col].dtype in numerics: continue
categorical_columns.append(col)
# Кодирование категориальных признаков
for col in categorical_columns:
if col in data.columns:
le = LabelEncoder()
le.fit(list(data[col].astype(str).values))
data[col] = le.transform(list(data[col].astype(str).values))
return data
if __name__ == "__main__":
data = pd.read_csv("..//heart_disease_uci.csv")
data['target'] = data['trestbps']
data = data.drop(columns=['id', 'dataset', 'trestbps'])
data_wo_null = data.dropna()
print(len(data_wo_null))
encoded_data_wo_null = str_features_to_numeric(data_wo_null)
print(len(encoded_data_wo_null))
# Model standartization
# The standard score of a sample x is calculated as:
# z = (x - мат.ож.) / (стандартное отклонение)
scaler = StandardScaler()
new_data = pd.DataFrame(scaler.fit_transform(encoded_data_wo_null), columns = encoded_data_wo_null.columns)
dataset = data_wo_null.copy() # original data
target_name = 'target'
target = data_wo_null.pop(target_name)
test_train_split_part = 0.2
random_state = 42
train, valid, train_target, valid_target = train_test_split(new_data, target,
test_size=test_train_split_part,
random_state=random_state)
reg = LinearRegression().fit(train, train_target)
print("---"*15, " LinearRegression ", "---"*15)
print(f"Accuracy: {reg.score(valid, valid_target)}")
print(f"коэффициенты: {reg.coef_}")
print(f"Смещение относительно начала координат (bias): {reg.intercept_}")
SGD_reg = SGDRegressor(max_iter=1000, tol=1e-3)
SGD_reg.fit(train, train_target)
print("---"*15, " SGDRegressor ", "---"*15)
print(f"Accuracy: {SGD_reg.score(valid, valid_target)}")
print(f"коэффициенты: {SGD_reg.coef_}")
print(f"Смещение относительно начала координат (bias): {SGD_reg.intercept_}")
Ridge_clf = Ridge(alpha=1.0)
Ridge_clf.fit(train, train_target)
print("---"*15, " Ridge ", "---"*15)
print(f"Accuracy: {Ridge_clf.score(valid, valid_target)}")
print(f"коэффициенты: {Ridge_clf.coef_}")
print(f"Смещение относительно начала координат (bias): {Ridge_clf.intercept_}")

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

View File

@@ -0,0 +1,47 @@
# Лабораторная работа №6. Вариант 21
## Тема:
Нейронная сеть
## Модель:
MLPClassifier
## Как запустить программу:
Установить *python, numpy, matplotlib, sklearn*
```
python lab.py
```
## Какие технологии использовались:
Язык программирования Python, библиотеки numpy, matplotlib, sklearn
Среда разработки VSCode
# Что делает лабораторная работа:
В ходе исследования нейронных сетей, в особенности многослойных перцептронов (MLP), был проведен тщательный анализ влияния архитектуры сети на её производительность в задаче классификации стадий сердечных заболеваний. Эксперименты с различными конфигурациями слоев и их размерами позволили более глубоко понять, какие параметры сети оказывают наибольшее влияние на точность прогнозов.
В качестве MLP в коде использовался класс ```sklearn.neural_network.MLPClassifier``` и целевой задачей являлось предсказание наличие болезни сердца (0 - отсутствует, а 1,2,3,4 - стадии)
Процесс подготовки данных и обучение MLP представлен на изображении ниже и ```качество оценки составило 0.83```, данное число представляет точность оценки и вычисляется как отношение правильных ответов к общему количеству ответов. Важно отметить, что данный MLP состоял только из ```одного скрытого слоя с размером = 100```.
![](1.png "")
При MLP, содержащим два скрытых состояния с размерами ```300``` и ```100``` соответственно получилось добиться ```точности в примерно 0.92```.
![](2.png "")
При MLP, содержащим четыре скрытых состояния с размерами ```150, 100, 50 и 50 ```соответственно получилось добиться ```точности в 0.95```.
![](3.png "")
При MLP, который содержит 5 слоев с размерами ```100, 400, 600, 400, 100```, то есть самая большая с точки зрения архитектуры модель имеет наилучший показать точности.
![](4.png "")
## Вывод
На основе проведенных экспериментов можно сделать вывод, что при усложнении архитектуры нейронной сети мы получаем улучшение в ее качестве.
![](res.png "")

View File

@@ -0,0 +1,86 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.neural_network import MLPClassifier
import argparse
from sklearn.preprocessing import (LabelEncoder,
StandardScaler,
MinMaxScaler,
RobustScaler)
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, learning_curve, ShuffleSplit
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('--id_pred', type=int, default=1, help='Какой id из тестовой выборки будем предсказывать')
args = parser.parse_args()
return args
def str_features_to_numeric(data):
# Преобразовывает все строковые признаки в числовые.
# Определение категориальных признаков
categorical_columns = []
numerics = ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64']
features = data.columns.values.tolist()
for col in features:
if data[col].dtype in numerics: continue
categorical_columns.append(col)
# Кодирование категориальных признаков
for col in categorical_columns:
if col in data.columns:
le = LabelEncoder()
le.fit(list(data[col].astype(str).values))
data[col] = le.transform(list(data[col].astype(str).values))
return data
if __name__ == "__main__":
args = get_arguments()
data = pd.read_csv("..//heart_disease_uci.csv")
data['target'] = data['num']
data = data.drop(columns=['id', 'dataset', 'num'])
data_wo_null = data.dropna()
print(len(data_wo_null))
data_wo_null.head(3)
encoded_data_wo_null = str_features_to_numeric(data_wo_null)
scaler = StandardScaler()
new_data = pd.DataFrame(scaler.fit_transform(encoded_data_wo_null), columns = encoded_data_wo_null.columns)
dataset = data_wo_null.copy() # original data
target_name = 'target'
target = data_wo_null.pop(target_name)
X_train, X_test, y_train, y_test = train_test_split(new_data, target, test_size=0.2, random_state=42)
clf = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(100)).fit(X_train, y_train)
print("---"*15, " MLPClassifier(100) ", "---"*15)
print(f"Accuracy: {clf.score(X_test, y_test)}")
clf2 = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(300, 100)).fit(X_train, y_train)
print("---"*15, " MLPClassifier(300, 100) ", "---"*15)
print(f"Accuracy: {clf2.score(X_test, y_test)}")
clf3 = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(150, 100, 50, 50)).fit(X_train, y_train)
print("---"*15, " MLPClassifier(150, 100, 50, 50) ", "---"*15)
print(f"Accuracy: {clf3.score(X_test, y_test)}")
clf4 = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(100, 400, 600, 400, 100)).fit(X_train, y_train)
print("---"*15, " MLPClassifier(100, 400, 600, 400, 100) ", "---"*15)
print(f"Accuracy: {clf4.score(X_test, y_test)}")
print("---"*15, f" Предсказание элемента под id = {args.id_pred}", "---"*15)
print(f"Предсказанное значение: {clf3.predict(np.array(list(X_test.iloc[args.id_pred])).reshape(1, -1))}")
print(f"Настоящее значение {y_test.iloc[args.id_pred]}")

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View File

@@ -0,0 +1,52 @@
# Лабораторная работа №7. Вариант 21
## Тема
Рекуррентная нейронная сеть и задача генерации текста
## Задание
- Выбрать художественный текст и обучить на нем рекуррентную нейронную сеть для решения задачи генерации.
- Подобрать архитектуру и параметры так, чтобы приблизиться к максимально осмысленному результату.
## Используемые ресурсы
&nbsp;1. &nbsp;Художественный текст на английском языке ```wonderland.txt```
&nbsp;2. &nbsp;Python-скрипты: ```generate.py```, ```model.py```, ```train.py```.
## Описание работы
### Подготовка данных:
В файле ```train.py``` реализована функция ```get_data```, которая загружает художественный текст, приводит его к нижнему регистру, и создает сопоставление символов числовым значениям.
Текст разбивается на последовательности фиксированной длины ```seq_length```, и каждая последовательность связывается с символом, следующим за ней.
Данные приводятся к тензорам PyTorch и нормализуются для обучения модели.
### Архитектура модели:
В файле ```model.py``` определен класс ```CharModel```, наследуемый от ```nn.Module``` и представляющий собой рекуррентную нейронную сеть.
Архитектура модели включает в себя один слой LSTM с размером скрытого состояния 256, слой dropout для регуляризации и линейный слой для вывода результатов.
### Обучение модели:
В файле ```train.py``` реализован скрипт для обучения модели. Выбрана оптимизация Adam, функция потерь - ```CrossEntropyLoss```.
Обучение происходит на GPU, если он доступен. Обучение проводится в течение нескольких эпох, с валидацией на каждой эпохе. Сохраняется лучшая модель.
Процесс обучения модели:
![](train_process.png "")
### Генерация текста:
В файле ```generate.py``` модель загружается из сохраненного состояния. Генерируется случайный промпт из исходного текста, и модель используется для предсказания следующего символа в цикле.
## Вывод:
![](generated_text.png "")
В сгенерированном тексте можно найти осмысленные участки, поэтому можно сделать вывод, что модель действительно хорошо обучилась.

View File

@@ -0,0 +1,46 @@
import torch
from model import CharModel
import numpy as np
if __name__ == "__main__":
best_model, char_to_int = torch.load("single-char.pth")
n_vocab = len(char_to_int)
int_to_char = dict((i, c) for c, i in char_to_int.items())
model = CharModel()
model.load_state_dict(best_model)
# randomly generate a prompt
filename = "wonderland.txt"
seq_length = 100
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
start = np.random.randint(0, len(raw_text)-seq_length)
prompt = raw_text[start:start+seq_length]
pattern = [char_to_int[c] for c in prompt]
model.eval()
print(f'Prompt:\n{prompt}')
print("==="*15, "Сгенерированный результ", "==="*15, sep=" ")
with torch.no_grad():
for i in range(1000):
# format input array of int into PyTorch tensor
x = np.reshape(pattern, (1, len(pattern), 1)) / float(n_vocab)
x = torch.tensor(x, dtype=torch.float32)
# generate logits as output from the model
prediction = model(x)
# convert logits into one character
index = int(prediction.argmax())
result = int_to_char[index]
print(result, end="")
# append the new character into the prompt for the next iteration
pattern.append(index)
pattern = pattern[1:]
print()
print("==="*30)
print("Done.")

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

View File

@@ -0,0 +1,16 @@
import torch.nn as nn
class CharModel(nn.Module):
def __init__(self, n_vocab):
super().__init__()
self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True)
self.dropout = nn.Dropout(0.2)
self.linear = nn.Linear(256, n_vocab)
def forward(self, x):
x, _ = self.lstm(x)
# take only the last output
x = x[:, -1, :]
# produce output
x = self.linear(self.dropout(x))
return x

Binary file not shown.

View File

@@ -0,0 +1,86 @@
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch
from model import CharModel
def get_data(filename="wonderland.txt"):
# загружаем датасет и приводим к нижнему регистру
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
# делаем сопоставление текста с соответствующим ему значением
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
# статистика обучаемых данных
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
# подготовка датасета
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
seq_in = raw_text[i:i + seq_length]
seq_out = raw_text[i + seq_length]
dataX.append([char_to_int[char] for char in seq_in])
dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# --- переводим данные к тензору, чтобы рабоать с ними внутри pytorch ---
X = torch.tensor(dataX, dtype=torch.float32).reshape(n_patterns, seq_length, 1)
X = X / float(n_vocab)
y = torch.tensor(dataY)
print(X.shape, y.shape)
return X, y, char_to_int
def main():
X, y, char_to_int = get_data()
n_epochs = 40
batch_size = 128
model = CharModel()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")
model.to(device)
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss(reduction="sum")
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)
best_model = None
best_loss = np.inf
for epoch in range(n_epochs):
model.train()
for X_batch, y_batch in loader:
y_pred = model(X_batch.to(device))
loss = loss_fn(y_pred, y_batch.to(device))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Validation
model.eval()
loss = 0
with torch.no_grad():
for X_batch, y_batch in loader:
y_pred = model(X_batch.to(device))
loss += loss_fn(y_pred, y_batch.to(device))
if loss < best_loss:
best_loss = loss
best_model = model.state_dict()
print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))
torch.save([best_model, char_to_int], "single-char.pth")

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
Amidst the bustling cityscape, where the rhythm of life beats in harmony with the urban pulse, each dawn brings forth a cascade of city lights painting the skyline in hues of gold and amber. Strangers pass with nods and smiles, creating a tapestry of diverse connections. Skyscrapers line the streets, reflecting the vibrant energy of a metropolis in constant motion. As night falls, the city's heartbeat resonates in lively gatherings at eclectic eateries, where stories are exchanged, and the city's vibrant spirit comes alive.

View File

@@ -0,0 +1 @@
In the bustling cityscape where the rhythm of life beats in harmony with the urban pulse each dawn brings forth a cascade of city lights painting the skyline in hues of gold and amber strangers pass with nods and smiles creating a tapestry of diverse connections skyscrapers line the streets reflecting the vibrant energy of a metropolis in constant motion as night falls the city's heartbeat resonates in lively gatherings at

BIN
verina_daria_lab_7/img.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

View File

@@ -0,0 +1,68 @@
import numpy as np
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
def prepare_and_train_model(file_path, epochs):
# Считывание данных из файла
with open(file_path, encoding='utf-8') as f:
data = f.read()
# Создание токенизатора
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
# Преобразование текста в последовательности чисел
sequences = tokenizer.texts_to_sequences([data])
# Создание обучающих данных
input_sequences = []
for sequence in sequences:
for i in range(1, len(sequence)):
n_gram_sequence = sequence[:i+1]
input_sequences.append(n_gram_sequence)
# Предобработка для получения одинаковой длины последовательностей
max_sequence_len = max([len(sequence) for sequence in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
# Разделение на входные и выходные данные
x, y = input_sequences[:, :-1], input_sequences[:, -1]
# Создание модели рекуррентной нейронной сети
model = keras.Sequential([
keras.layers.Embedding(len(tokenizer.word_index) + 1, 100, input_length=max_sequence_len-1),
keras.layers.Dropout(0.2),
keras.layers.LSTM(150),
keras.layers.Dense(len(tokenizer.word_index) + 1, activation='softmax')
])
# Компиляция и обучение модели
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x, y, epochs=epochs, verbose=1)
return model, tokenizer, max_sequence_len
def generate_text_from_model(model, tokenizer, max_sequence_len, seed_text, next_words):
# Генерация текста
for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
predicted = model.predict(token_list)
predict_index = np.argmax(predicted, axis=-1)
word = tokenizer.index_word.get(predict_index[0], '')
seed_text += " " + word
return seed_text
model_rus, tokenizer_rus, max_sequence_len_rus = prepare_and_train_model('russian.txt', 150)
rus_text_generated = generate_text_from_model(model_rus, tokenizer_rus, max_sequence_len_rus, "В", 55)
model_eng, tokenizer_eng, max_sequence_len_eng = prepare_and_train_model('english.txt', 150)
eng_text_generated = generate_text_from_model(model_eng, tokenizer_eng, max_sequence_len_eng, "In the", 69)
with open('russian_generated.txt', 'w', encoding='utf-8') as f_rus:
f_rus.write(rus_text_generated)
with open('english_generated.txt', 'w', encoding='utf-8') as f_eng:
f_eng.write(eng_text_generated)

View File

@@ -0,0 +1,35 @@
# Генератор Текста на Рекуррентных Нейронных Сетях
## Общее задание
Выбран художественный англоязычный текст для обучения рекуррентной нейронной сети (RNN) с целью генерации текста. Задача включает подбор архитектуры и параметров для приближения к максимально осмысленным результатам. Далее предусмотрено обмен разработанными сетями с партнером, проверка, как архитектура товарища справляется с вашим текстом, и в конечном итоге подбор компромиссной архитектуры, справляющейся хорошо с обоими видами текстов.
## Задание по вариантам
Вариант: Нечетный вариант (художественный англоязычный текст).
Запуск программы
Программу можно запустить через файл app.py.
Технологии
Язык программирования: Python
Библиотеки: TensorFlow, Keras, Flask
## Описание работы программы
Программа реализует генерацию текста с использованием рекуррентных нейронных сетей (RNN) с помощью библиотек TensorFlow и Keras. Flask используется для создания веб-приложения, которое взаимодействует с моделью RNN. Пользователь вводит начальный текст (seed text) через веб-интерфейс, после чего программа отправляет запрос на сервер, который в свою очередь использует модель для генерации следующего участка текста, основываясь на введенном начальном тексте.
Входные данные
Текстовый файл (например, 'your_text_file.txt'), содержащий обучающие данные.
Веб-интерфейс для ввода начального текста.
Выходные данные
Сгенерированный текст, отображаемый в веб-интерфейсе.
## Вывод консоли:
![img_2.png](img_2.png)
![img_1.png](img_1.png)
![img.png](img.png)
## Получившийся текст:
In the bustling cityscape where the rhythm of life beats in harmony with the urban pulse each dawn brings forth a cascade of city lights painting the skyline in hues of gold and amber strangers pass with nods and smiles creating a tapestry of diverse connections skyscrapers line the streets reflecting the vibrant energy of a metropolis in constant motion as night falls the city's heartbeat resonates in lively gatherings at
## Вывод:
В результате выполнения лабораторной работы были успешно созданы и обучены рекуррентные нейронные сети (RNN) для генерации текста на русском и английском языках.

View File

@@ -0,0 +1 @@
В захватывающем мире исследований глубокого космоса, где звезды танцуют свой бескрайний вальс, каждое утро начинается с таинственного свечения далеких галактик, окрашивая космическую панораму в оттенках изумрудных и сапфировых лучей. Космические путешественники встречают друг друга с уважением, обмениваясь впечатлениями о чудесах вселенной. Межзвездные аллеи украшены мерцающими астероидами, создавая ощущение бескрайнего волнения и удивления. По наступлении ночи исследователи созвездий собираются в космических кафе, где звездные истории обретают новые оттенки в мистической атмосфере.

View File

@@ -0,0 +1 @@
В захватывающем мире исследований глубокого где где звезды танцуют свой бескрайний вальс каждое каждое начинается с таинственного свечения далеких галактик окрашивая космическую панораму в оттенках изумрудных и сапфировых лучей космические путешественники встречают друг друга с уважением обмениваясь впечатлениями о чудесах вселенной межзвездные аллеи украшены мерцающими астероидами создавая ощущение бескрайнего волнения и удивления по наступлении ночи исследователи

View File

@@ -0,0 +1,120 @@
from sre_parse import Tokenizer
import numpy as np
from flask import Flask
from keras.layers import Dense, LSTM, Embedding
from keras.models import load_model, Sequential
from keras_preprocessing.sequence import pad_sequences
app = Flask(__name__)
@app.route("/")
def home():
return "<html>" \
"<h1>Жукова Алина ПИбд-41</h1>" \
"<h1>Лабораторная работа №7</h1>" \
"<table>" \
"<td>" \
"<form Action='http://127.0.0.1:5000/k4_1_task_7' Method=get>" \
"<input type=submit value='Генерация текста'>" \
"</form>" \
"</td>" \
"</table>" \
"</html>"
# Реккурентная нейронная сеть, генерация текста
# 10 вариант
@app.route("/k4_1_task_7", methods=['GET'])
def k4_1_task_7():
# Загрузка текста из файла
# Русский текст
# with open('lab_4_1__7_text_rus.txt', 'r', encoding='utf-8') as file:
# text = file.read()
# Анлглийский текст
with open('lab_4_1__7_text_eng.txt', 'r', encoding='utf-8') as file:
text = file.read()
# Создание Tokenizer и токенизация текста
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(text)
# Преобразование текста в последовательность чисел
sequences = tokenizer.texts_to_sequences(text)
# Подготовка обучающих данных
seq_length = 100
dataX, dataY = [], []
for i in range(0, len(sequences) - seq_length):
seq_in = sequences[i:i + seq_length]
seq_out = sequences[i + seq_length]
dataX.append(seq_in)
dataY.append(seq_out)
dataX = np.array(dataX)
dataY = np.array(dataY)
# Создание модели
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 256
rnn_units = 1024
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=seq_length))
model.add(LSTM(units=rnn_units))
model.add(Dense(units=vocab_size, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
# Загрузка модели
# РУсская модель
# model = load_model('lab_4_1__7_model.keras')
# Английская модель
model = load_model('lab_4_1__7_model_eng.keras')
print("Loaded model from disk")
# Обучение модели
# batch_size = 64
# model.fit(dataX, dataY, epochs=15, batch_size=batch_size)
def generate_text(seed_text, gen_length):
generated_text = seed_text
for _ in range(gen_length):
sequence = tokenizer.texts_to_sequences([seed_text])[0]
sequence = pad_sequences([sequence], maxlen=seq_length)
prediction = model.predict(sequence)[0]
predicted_index = np.argmax(prediction)
predicted_char = tokenizer.index_word[predicted_index]
generated_text += predicted_char
seed_text += predicted_char
seed_text = seed_text[1:]
return generated_text
# Пример использования
start_phraze = "Black cat"
# Русский
# generated_text = generate_text("Невероятный котик", 250)
# Английский
generated_text = generate_text(start_phraze, 250)
i = 10
# Сохранение модели
# Русская модель
# model.save('C:/Users/Alina/PycharmProjects/lab1/lab_4_1__7_model.keras')
# Английская модель
# model.save('C:/Users/Alina/PycharmProjects/lab1/lab_4_1__7_model_eng.keras')
# print("Saved model to disk")
return "<html>" \
"<h1></h1>" \
"<h2>Вариант 10. Задание 7 - Генерация текста</h2>" \
"<h2> Сгенерированный текст, начальная фраза " + start_phraze + ": " + str(generated_text) + " </h2>" \
"</html>"
if __name__ == "__main__":
app.run(debug=True)

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@@ -0,0 +1 @@
Cats are wonderful pets that many people love. They come in different colors and sizes. Cats are known for being playful and sometimes a bit lazy. These furry friends have soft paws and sharp claws. They use their claws for various things, like scratching to keep their claws healthy and marking their territory. Cats also have a special ability to land on their feet if they fall, which is really impressive. Cats enjoy their independence. They often like to explore their surroundings and might hide in cozy spots. They clean themselves by licking their fur and are usually very clean animals. Cats like to communicate with us using different sounds, like meowing, purring, and even hissing if theyre scared. They also use their tails to show how they feel. A wagging tail might mean theyre excited, while a puffed-up tail can mean theyre scared. Playing with cats using toys like balls or strings is lots of fun, and it keeps them active. They also like to nap a lot during the day. If youre thinking of having a cat as a pet, remember to give them love, care, and a cozy place to sleep. In short, cats are lovely pets with soft fur and sharp claws. Theyre independent, playful, and great at keeping clean. Cats talk to us with sounds and tails, and they enjoy playing and napping. If you have a cat, make sure to give them care and affection. I have a cat. Her name is Matilda. She is seven years old. She is grey with a few dark spots. Matilda has green eyes. She is quite fluffy and big. When we brought Matilda home, she was a little two-month-old kitten. At first, she was scared to leave a little blanket that my mom laid for her in the kitchen. A few days later, she started to explore the surroundings. Matilda is a very smart pet. She is a bit shy and always hides somewhere when we have guests. She is very independent and likes solitude. When Matilda was a little kitten she loved to play with different toys, balls and strings. Now most of the time she spends lying on the sofa or the armchair. We buy her cat food in the shop because this is all she eats. Apart from that she is also fond of fresh cucumbers. There are two dishes for her in the kitchen. One is filled with food, and the other contains water. My cat eats three times a day and likes drinking directly from the tap. Matilda rarely shows affection. Sometimes it seems that my mom is her most loved person. She likes to climb my moms lap and stay there for some time. Matilda purrs when my mom strokes her. It is hard to make Matilda sit on somebody elses lap. Nevertheless, I love Matilda very much! She is an amazing cat with immaculate manners.

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,80 @@
## Задание
Рекуррентная нейронная сеть и задача генерации текста
Выбрать тексты на русском и английском языках.
Обучить нейронную сеть и подобрать параметры таким образом,
чтобы максимально приблизиться к осмысленному результату.
Интерпретировать результаты
Вариант №10
## Используемые технологии
В лабораторной были использованы библиотеки:
+ keras - используется для работы с реккурентной нейронной сетью и методами машинного обучения
+ numpy - позволяет работать с массивами и матрицами
+ Flask - предоставляет способ быстрого создания веб-страниц для визуализации работы приложения
## Используемые компоненты
+ Tokenizer - инструмент для токенизации текста, подсчитывает частоту вхождения слов
+ Sequential - предоставляет линейный набор слоев нейронной сети
+ Embedding - слой keras, который преобразует целочисленный последовательности
в плотные векторы
+ LSTM - представляет особую структуру реккурентной нейронной сети,
способную к обучению долговременными зависимостями
+ Dense - предоставляет плотные(полносвязные) слои нейросети
## Как запустить
Запустить файл flask-server, который поднимет локальный сервер
и позволит обратиться к программе через браузер по ссылке [http://127.0.0.1:5000/](http://127.0.0.1:5000/)
## Что делает программа
В зависимости от параметров в коде, обучается и генерирует русский или английский текст.
В первую очередь считанный из файла текст проходит токенизацию, строится модель нейронной сети.
Далее полученная реккурентная нейронная сеть обучается, а после генерирует 250 символов
исходя из начальной фразы
## Анализ
Наиболее качественные результаты модель предоставляет при 15 итерациях обучения.
Если итераций недостаточно, модель генерирует не слова, а одно и то же сочетание букв,
разделяя сочетания пробелами.
На 15 итерациях с текстом величиной в 3 400 знаков, время обучения составило около 5-6 часов.
Потому в программе реализованы функции сохранения и загрузки моделей, чтобы не обучать нейросетей заново,
а дообучать на тех же или новых данных.
На выбранной архитектуре сети при введении новых данных качество работы модели значительно снижалось.
При работе на одних и тех же данных выполнение 15 итераций достаточно, чтобы сеть могла генерировать
слова из текста. Однако осмысленного текста добиться не удалось. Очевидно это связано с
выбранной архитектурой, нехватке данных и недостаточном обучении. Можно сделать вывод о том,
что обучить нейросеть генерировать осмысленный текст на имеющейся аппаратуре если и возможно,
то очень затратно.
## Скриншоты работы программы
**Наилучший результат генерации русского текста.**
Генерация русского текста при достаточном обучении. Почти все сгенерированные слова являются реальными.
Однако полученный текст все же нельзя назвать осмысленным
![img.png](img_screen_2.png)
**Остальные варианты**
Генерация русского текста при недостаточном обучении. Вместо слов создаются сочетания букв,
которые очень быстро зацикливаются.
![img.png](img_screen_1.png)
Генерация русского текста при переобучении. Нейросеть не генерирует новый текст, она переписывает
фрагмент из обучающих данных.
![img.png](img_screen_3.png)
В случае с английским текстом все происходит точно также.
**Наилучший результат генерации английского текста.**
Генерация английского текста при достаточном обучении. Почти все сгенерированные слова являются реальными.
Однако полученный текст все же нельзя назвать осмысленным
![img.png](img_screen_4.png)
При генерации английских текстов зацикление происходит быстрее. Возможно
это связано с меньшим количеством используемых символов.
![img.png](img_screen_5.png)