diff --git a/dz_les_8.ipynb b/dz_les_8.ipynb
new file mode 100644
index 0000000..2449ef4
--- /dev/null
+++ b/dz_les_8.ipynb
@@ -0,0 +1,3230 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "49b023d6",
+ "metadata": {},
+ "source": [
+ "# Тема “Обучение без учителя”\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5498618d",
+ "metadata": {},
+ "source": [
+ "## Задание 1\n",
+ "Импортируйте библиотеки pandas, numpy и matplotlib.\n",
+ "\n",
+ "Загрузите \"Boston House Prices dataset\" из встроенных наборов данных библиотеки sklearn.\n",
+ "\n",
+ "Создайте датафреймы X и y из этих данных.\n",
+ "\n",
+ "Разбейте эти датафреймы на тренировочные (X_train, y_train) и тестовые (X_test, y_test) с помощью функции train_test_split так, чтобы размер тестовой выборки составлял 20% от всех данных, при этом аргумент random_state должен быть равен 42.\n",
+ "\n",
+ "Масштабируйте данные с помощью StandardScaler.\n",
+ "\n",
+ "Постройте модель TSNE на тренировочный данных с параметрами: n_components=2, learning_rate=250, random_state=42.\n",
+ "\n",
+ "Постройте диаграмму рассеяния на этих данных."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "75198cc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from matplotlib import pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "002a6a82",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CRIM | \n",
+ " ZN | \n",
+ " INDUS | \n",
+ " CHAS | \n",
+ " NOX | \n",
+ " RM | \n",
+ " AGE | \n",
+ " DIS | \n",
+ " RAD | \n",
+ " TAX | \n",
+ " PTRATIO | \n",
+ " B | \n",
+ " LSTAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.00632 | \n",
+ " 18.0 | \n",
+ " 2.31 | \n",
+ " 0.0 | \n",
+ " 0.538 | \n",
+ " 6.575 | \n",
+ " 65.2 | \n",
+ " 4.0900 | \n",
+ " 1.0 | \n",
+ " 296.0 | \n",
+ " 15.3 | \n",
+ " 396.90 | \n",
+ " 4.98 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.02731 | \n",
+ " 0.0 | \n",
+ " 7.07 | \n",
+ " 0.0 | \n",
+ " 0.469 | \n",
+ " 6.421 | \n",
+ " 78.9 | \n",
+ " 4.9671 | \n",
+ " 2.0 | \n",
+ " 242.0 | \n",
+ " 17.8 | \n",
+ " 396.90 | \n",
+ " 9.14 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.02729 | \n",
+ " 0.0 | \n",
+ " 7.07 | \n",
+ " 0.0 | \n",
+ " 0.469 | \n",
+ " 7.185 | \n",
+ " 61.1 | \n",
+ " 4.9671 | \n",
+ " 2.0 | \n",
+ " 242.0 | \n",
+ " 17.8 | \n",
+ " 392.83 | \n",
+ " 4.03 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.03237 | \n",
+ " 0.0 | \n",
+ " 2.18 | \n",
+ " 0.0 | \n",
+ " 0.458 | \n",
+ " 6.998 | \n",
+ " 45.8 | \n",
+ " 6.0622 | \n",
+ " 3.0 | \n",
+ " 222.0 | \n",
+ " 18.7 | \n",
+ " 394.63 | \n",
+ " 2.94 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.06905 | \n",
+ " 0.0 | \n",
+ " 2.18 | \n",
+ " 0.0 | \n",
+ " 0.458 | \n",
+ " 7.147 | \n",
+ " 54.2 | \n",
+ " 6.0622 | \n",
+ " 3.0 | \n",
+ " 222.0 | \n",
+ " 18.7 | \n",
+ " 396.90 | \n",
+ " 5.33 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n",
+ "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n",
+ "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n",
+ "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n",
+ "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n",
+ "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n",
+ "\n",
+ " PTRATIO B LSTAT \n",
+ "0 15.3 396.90 4.98 \n",
+ "1 17.8 396.90 9.14 \n",
+ "2 17.8 392.83 4.03 \n",
+ "3 18.7 394.63 2.94 \n",
+ "4 18.7 396.90 5.33 "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import warnings\n",
+ "from sklearn.datasets import load_boston\n",
+ "with warnings.catch_warnings():\n",
+ " # You should probably not use this dataset.\n",
+ " warnings.filterwarnings(\"ignore\")\n",
+ " boston = load_boston()\n",
+ "\n",
+ "data = boston[\"data\"]\n",
+ "\n",
+ "feature_names = boston.feature_names\n",
+ "\n",
+ "target = boston.target\n",
+ "\n",
+ "X = pd.DataFrame(data, columns = feature_names)\n",
+ "X.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cc335a2e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 24.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 21.6 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 34.7 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 33.4 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 36.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " price\n",
+ "0 24.0\n",
+ "1 21.6\n",
+ "2 34.7\n",
+ "3 33.4\n",
+ "4 36.2"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y = pd.DataFrame(target, columns = ['price'])\n",
+ "y.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "721f2c87",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((404, 13), (102, 13), (404, 1), (102, 1))"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 42)\n",
+ "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "fd05580e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CRIM | \n",
+ " ZN | \n",
+ " INDUS | \n",
+ " CHAS | \n",
+ " NOX | \n",
+ " RM | \n",
+ " AGE | \n",
+ " DIS | \n",
+ " RAD | \n",
+ " TAX | \n",
+ " PTRATIO | \n",
+ " B | \n",
+ " LSTAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.287702 | \n",
+ " -0.500320 | \n",
+ " 1.033237 | \n",
+ " -0.278089 | \n",
+ " 0.489252 | \n",
+ " -1.428069 | \n",
+ " 1.028015 | \n",
+ " -0.802173 | \n",
+ " 1.706891 | \n",
+ " 1.578434 | \n",
+ " 0.845343 | \n",
+ " -0.074337 | \n",
+ " 1.753505 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.336384 | \n",
+ " -0.500320 | \n",
+ " -0.413160 | \n",
+ " -0.278089 | \n",
+ " -0.157233 | \n",
+ " -0.680087 | \n",
+ " -0.431199 | \n",
+ " 0.324349 | \n",
+ " -0.624360 | \n",
+ " -0.584648 | \n",
+ " 1.204741 | \n",
+ " 0.430184 | \n",
+ " -0.561474 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.403253 | \n",
+ " 1.013271 | \n",
+ " -0.715218 | \n",
+ " -0.278089 | \n",
+ " -1.008723 | \n",
+ " -0.402063 | \n",
+ " -1.618599 | \n",
+ " 1.330697 | \n",
+ " -0.974048 | \n",
+ " -0.602724 | \n",
+ " -0.637176 | \n",
+ " 0.065297 | \n",
+ " -0.651595 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.388230 | \n",
+ " -0.500320 | \n",
+ " 1.033237 | \n",
+ " -0.278089 | \n",
+ " 0.489252 | \n",
+ " -0.300450 | \n",
+ " 0.591681 | \n",
+ " -0.839240 | \n",
+ " 1.706891 | \n",
+ " 1.578434 | \n",
+ " 0.845343 | \n",
+ " -3.868193 | \n",
+ " 1.525387 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -0.325282 | \n",
+ " -0.500320 | \n",
+ " -0.413160 | \n",
+ " -0.278089 | \n",
+ " -0.157233 | \n",
+ " -0.831094 | \n",
+ " 0.033747 | \n",
+ " -0.005494 | \n",
+ " -0.624360 | \n",
+ " -0.584648 | \n",
+ " 1.204741 | \n",
+ " 0.379119 | \n",
+ " -0.165787 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CRIM ZN INDUS CHAS NOX RM AGE \\\n",
+ "0 1.287702 -0.500320 1.033237 -0.278089 0.489252 -1.428069 1.028015 \n",
+ "1 -0.336384 -0.500320 -0.413160 -0.278089 -0.157233 -0.680087 -0.431199 \n",
+ "2 -0.403253 1.013271 -0.715218 -0.278089 -1.008723 -0.402063 -1.618599 \n",
+ "3 0.388230 -0.500320 1.033237 -0.278089 0.489252 -0.300450 0.591681 \n",
+ "4 -0.325282 -0.500320 -0.413160 -0.278089 -0.157233 -0.831094 0.033747 \n",
+ "\n",
+ " DIS RAD TAX PTRATIO B LSTAT \n",
+ "0 -0.802173 1.706891 1.578434 0.845343 -0.074337 1.753505 \n",
+ "1 0.324349 -0.624360 -0.584648 1.204741 0.430184 -0.561474 \n",
+ "2 1.330697 -0.974048 -0.602724 -0.637176 0.065297 -0.651595 \n",
+ "3 -0.839240 1.706891 1.578434 0.845343 -3.868193 1.525387 \n",
+ "4 -0.005494 -0.624360 -0.584648 1.204741 0.379119 -0.165787 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.preprocessing import StandardScaler\n",
+ "scaler = StandardScaler()\n",
+ "\n",
+ "X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)\n",
+ "X_train_scaled.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "28d28399",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CRIM | \n",
+ " ZN | \n",
+ " INDUS | \n",
+ " CHAS | \n",
+ " NOX | \n",
+ " RM | \n",
+ " AGE | \n",
+ " DIS | \n",
+ " RAD | \n",
+ " TAX | \n",
+ " PTRATIO | \n",
+ " B | \n",
+ " LSTAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " -0.396809 | \n",
+ " -0.500320 | \n",
+ " -1.007111 | \n",
+ " -0.278089 | \n",
+ " -0.395412 | \n",
+ " 0.141282 | \n",
+ " 0.555916 | \n",
+ " -0.545853 | \n",
+ " -0.507797 | \n",
+ " -0.650926 | \n",
+ " -0.771951 | \n",
+ " 0.428872 | \n",
+ " -0.481210 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.400796 | \n",
+ " 1.229499 | \n",
+ " -0.664391 | \n",
+ " 3.595975 | \n",
+ " -0.931315 | \n",
+ " 0.623942 | \n",
+ " -1.275254 | \n",
+ " 0.126565 | \n",
+ " -0.624360 | \n",
+ " -0.903989 | \n",
+ " -0.322703 | \n",
+ " 0.444180 | \n",
+ " -1.257094 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -0.395234 | \n",
+ " -0.500320 | \n",
+ " 2.433163 | \n",
+ " -0.278089 | \n",
+ " 0.446720 | \n",
+ " -0.469805 | \n",
+ " 1.081663 | \n",
+ " -0.911447 | \n",
+ " -0.624360 | \n",
+ " 1.849573 | \n",
+ " 0.800418 | \n",
+ " 0.369934 | \n",
+ " 0.790338 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -0.396825 | \n",
+ " -0.500320 | \n",
+ " -0.025421 | \n",
+ " -0.278089 | \n",
+ " -1.220532 | \n",
+ " -0.354079 | \n",
+ " -2.172957 | \n",
+ " 0.694876 | \n",
+ " -0.624360 | \n",
+ " -0.596698 | \n",
+ " 0.396095 | \n",
+ " 0.378682 | \n",
+ " -0.976875 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.167084 | \n",
+ " -0.500320 | \n",
+ " 1.033237 | \n",
+ " -0.278089 | \n",
+ " 1.331384 | \n",
+ " -0.026661 | \n",
+ " 0.831307 | \n",
+ " -0.676503 | \n",
+ " 1.706891 | \n",
+ " 1.578434 | \n",
+ " 0.845343 | \n",
+ " 0.315043 | \n",
+ " 0.677687 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CRIM ZN INDUS CHAS NOX RM AGE \\\n",
+ "0 -0.396809 -0.500320 -1.007111 -0.278089 -0.395412 0.141282 0.555916 \n",
+ "1 -0.400796 1.229499 -0.664391 3.595975 -0.931315 0.623942 -1.275254 \n",
+ "2 -0.395234 -0.500320 2.433163 -0.278089 0.446720 -0.469805 1.081663 \n",
+ "3 -0.396825 -0.500320 -0.025421 -0.278089 -1.220532 -0.354079 -2.172957 \n",
+ "4 0.167084 -0.500320 1.033237 -0.278089 1.331384 -0.026661 0.831307 \n",
+ "\n",
+ " DIS RAD TAX PTRATIO B LSTAT \n",
+ "0 -0.545853 -0.507797 -0.650926 -0.771951 0.428872 -0.481210 \n",
+ "1 0.126565 -0.624360 -0.903989 -0.322703 0.444180 -1.257094 \n",
+ "2 -0.911447 -0.624360 1.849573 0.800418 0.369934 0.790338 \n",
+ "3 0.694876 -0.624360 -0.596698 0.396095 0.378682 -0.976875 \n",
+ "4 -0.676503 1.706891 1.578434 0.845343 0.315043 0.677687 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)\n",
+ "X_test_scaled.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f66354e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.manifold import TSNE\n",
+ "tsne = TSNE(n_components=2, learning_rate=250, random_state=42, init='random')\n",
+ "X_train_tsne = tsne.fit_transform(X_train_scaled)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "61ac5f4f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "До:\t(404, 13)\n",
+ "После:\t(404, 2)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('До:\\t{}'.format(X_train_scaled.shape))\n",
+ "print('После:\\t{}'.format(X_train_tsne.shape))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "cf8cca07",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%matplotlib inline\n",
+ "%config InlineBackend.figure_format = 'svg'\n",
+ "\n",
+ "plt.scatter(X_train_tsne[:, 0], X_train_tsne[:, 1])\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7da6e6ac",
+ "metadata": {},
+ "source": [
+ "## Задание 2\n",
+ "\n",
+ "С помощью KMeans разбейте данные из тренировочного набора на 3 кластера, используйте все признаки из датафрейма X_train.\n",
+ "\n",
+ "Параметр max_iter должен быть равен 100, random_state сделайте равным 42.\n",
+ "\n",
+ "Постройте еще раз диаграмму рассеяния на данных, полученных с помощью TSNE, и раскрасьте точки из разных кластеров разными цветами.\n",
+ "\n",
+ "Вычислите средние значения price и CRIM в разных кластерах.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "745ac1c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from sklearn.cluster import KMeans\n",
+ "\n",
+ "model = KMeans(n_clusters=3, random_state=42, max_iter=100)\n",
+ "labels_train = model.fit_predict(X_train_scaled)\n",
+ "\n",
+ "plt.scatter(X_train_tsne[:, 0], X_train_tsne[:, 1], c=labels_train)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "d093c22e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Среднее значение \"price\" в кластере № 1: 27.788372093023252\n",
+ "Среднее значение \"price\" в кластере № 2: 16.165354330708666\n",
+ "Среднее значение \"price\" в кластере № 3: 24.958115183246072\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Среднее значение \"price\" в кластере № 1: ', y_train[labels_train == 0].mean().values[0])\n",
+ "print('Среднее значение \"price\" в кластере № 2: ', y_train[labels_train == 1].mean().values[0])\n",
+ "print('Среднее значение \"price\" в кластере № 3: ', y_train[labels_train == 2].mean().values[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "1b0551b1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Среднее значение \"CRIM\" в кластере № 1: 0.07356558139534886\n",
+ "Среднее значение \"CRIM\" в кластере № 2: 10.797028425196853\n",
+ "Среднее значение \"CRIM\" в кластере № 3: 0.42166020942408367\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Среднее значение \"CRIM\" в кластере № 1: ', X_train['CRIM'][labels_train == 0].mean())\n",
+ "print('Среднее значение \"CRIM\" в кластере № 2: ', X_train['CRIM'][labels_train == 1].mean())\n",
+ "print('Среднее значение \"CRIM\" в кластере № 3: ', X_train['CRIM'][labels_train == 2].mean())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb3d3c49",
+ "metadata": {},
+ "source": [
+ "## * Задание 3\n",
+ "\n",
+ "Примените модель KMeans, построенную в предыдущем задании, к данным из тестового набора.\n",
+ "\n",
+ "Вычислите средние значения price и CRIM в разных кластерах на тестовых данных.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "9421450d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Среднее значение \"price\" в кластере № 1: 33.871428571428574\n",
+ "Среднее значение \"price\" в кластере № 2: 21.552830188679245\n",
+ "Среднее значение \"price\" в кластере № 3: 16.437142857142856\n",
+ "Среднее значение \"CRIM\" в кластере № 1: 0.08618571428571427\n",
+ "Среднее значение \"CRIM\" в кластере № 2: 0.25200716981132076\n",
+ "Среднее значение \"CRIM\" в кластере № 3: 10.165531142857143\n"
+ ]
+ }
+ ],
+ "source": [
+ "labels_test = model.fit_predict(X_test_scaled)\n",
+ "\n",
+ "print('Среднее значение \"price\" в кластере № 1: ', y_test[labels_test == 0].mean().values[0])\n",
+ "print('Среднее значение \"price\" в кластере № 2: ', y_test[labels_test == 1].mean().values[0])\n",
+ "print('Среднее значение \"price\" в кластере № 3: ', y_test[labels_test == 2].mean().values[0])\n",
+ "\n",
+ "print('Среднее значение \"CRIM\" в кластере № 1: ', X_test.loc[labels_test == 0, 'CRIM'].mean())\n",
+ "print('Среднее значение \"CRIM\" в кластере № 2: ', X_test.loc[labels_test == 1, 'CRIM'].mean())\n",
+ "print('Среднее значение \"CRIM\" в кластере № 3: ', X_test.loc[labels_test == 2, 'CRIM'].mean())\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad4afe18",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}