{ "cells": [ { "cell_type": "markdown", "id": "3f54f1bd-881b-426b-bdf4-54c0f65214a0", "metadata": {}, "source": [ "# Unsupervised Machine Learning: Clustering and Dimensionality Reduction\n", "\n", "CSC/DSC 340 Week 6 Slides\n", "\n", "Author: [Dr. Julie Butler](www.juliebutler.org)\n", "\n", "Date Created: August 24, 2023\n", "\n", "Last Modified: August 29, 2023" ] }, { "cell_type": "code", "execution_count": 2, "id": "7bebc6e1-0398-4179-9dbd-5ce35c7fb376", "metadata": {}, "outputs": [], "source": [ "##############################\n", "## IMPORTS ##\n", "##############################\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.cm as cm\n", "from sklearn.svm import SVC\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import uniform\n", "from sklearn.cluster import KMeans\n", "from sklearn.decomposition import PCA\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.datasets import load_wine\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_squared_error as mse\n", "from sklearn.model_selection import RandomizedSearchCV\n", "from sklearn.metrics import silhouette_samples, silhouette_score\n", "from sklearn.metrics import accuracy_score, classification_report" ] }, { "cell_type": "markdown", "id": "0e31af59-09dc-4447-bc5b-ad52db45e78c", "metadata": {}, "source": [ "## Wine Data Set (Classification)\n", "* 178 data points with 13 features and 3 different target values\n", "* Goal: given information about a wine, predict which of three locations it originated from\n", "* Data set challenges: varying scales and class imbalance" ] }, { "cell_type": "code", "execution_count": 3, "id": "c7908628-af53-4d69-83cd-4e6b2def7d9c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | alcohol | \n", "malic_acid | \n", "ash | \n", "alcalinity_of_ash | \n", "magnesium | \n", "total_phenols | \n", "flavanoids | \n", "nonflavanoid_phenols | \n", "proanthocyanins | \n", "color_intensity | \n", "hue | \n", "od280/od315_of_diluted_wines | \n", "proline | \n", "labels | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "14.23 | \n", "1.71 | \n", "2.43 | \n", "15.6 | \n", "127.0 | \n", "2.80 | \n", "3.06 | \n", "0.28 | \n", "2.29 | \n", "5.64 | \n", "1.04 | \n", "3.92 | \n", "1065.0 | \n", "0 | \n", "
1 | \n", "13.20 | \n", "1.78 | \n", "2.14 | \n", "11.2 | \n", "100.0 | \n", "2.65 | \n", "2.76 | \n", "0.26 | \n", "1.28 | \n", "4.38 | \n", "1.05 | \n", "3.40 | \n", "1050.0 | \n", "0 | \n", "
2 | \n", "13.16 | \n", "2.36 | \n", "2.67 | \n", "18.6 | \n", "101.0 | \n", "2.80 | \n", "3.24 | \n", "0.30 | \n", "2.81 | \n", "5.68 | \n", "1.03 | \n", "3.17 | \n", "1185.0 | \n", "0 | \n", "
3 | \n", "14.37 | \n", "1.95 | \n", "2.50 | \n", "16.8 | \n", "113.0 | \n", "3.85 | \n", "3.49 | \n", "0.24 | \n", "2.18 | \n", "7.80 | \n", "0.86 | \n", "3.45 | \n", "1480.0 | \n", "0 | \n", "
4 | \n", "13.24 | \n", "2.59 | \n", "2.87 | \n", "21.0 | \n", "118.0 | \n", "2.80 | \n", "2.69 | \n", "0.39 | \n", "1.82 | \n", "4.32 | \n", "1.04 | \n", "2.93 | \n", "735.0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
173 | \n", "13.71 | \n", "5.65 | \n", "2.45 | \n", "20.5 | \n", "95.0 | \n", "1.68 | \n", "0.61 | \n", "0.52 | \n", "1.06 | \n", "7.70 | \n", "0.64 | \n", "1.74 | \n", "740.0 | \n", "2 | \n", "
174 | \n", "13.40 | \n", "3.91 | \n", "2.48 | \n", "23.0 | \n", "102.0 | \n", "1.80 | \n", "0.75 | \n", "0.43 | \n", "1.41 | \n", "7.30 | \n", "0.70 | \n", "1.56 | \n", "750.0 | \n", "2 | \n", "
175 | \n", "13.27 | \n", "4.28 | \n", "2.26 | \n", "20.0 | \n", "120.0 | \n", "1.59 | \n", "0.69 | \n", "0.43 | \n", "1.35 | \n", "10.20 | \n", "0.59 | \n", "1.56 | \n", "835.0 | \n", "2 | \n", "
176 | \n", "13.17 | \n", "2.59 | \n", "2.37 | \n", "20.0 | \n", "120.0 | \n", "1.65 | \n", "0.68 | \n", "0.53 | \n", "1.46 | \n", "9.30 | \n", "0.60 | \n", "1.62 | \n", "840.0 | \n", "2 | \n", "
177 | \n", "14.13 | \n", "4.10 | \n", "2.74 | \n", "24.5 | \n", "96.0 | \n", "2.05 | \n", "0.76 | \n", "0.56 | \n", "1.35 | \n", "9.20 | \n", "0.61 | \n", "1.60 | \n", "560.0 | \n", "2 | \n", "
178 rows × 14 columns
\n", "