pip install scikit-learn matplotlib scikit-learn-extra
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn_extra.cluster import KMedoids
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
# 示例1: KMedoids
kmedoids = KMedoids(n_clusters=3, random_state=0)
kmedoids.fit(X)
labels_kmedoids = kmedoids.labels_
# 示例3: KMeans++ 初始化
kmeans_plus = KMeans(n_clusters=3, init='k-means++', random_state=0)
kmeans_plus.fit(X)
labels_kmeans_plus = kmeans_plus.labels_
# 示例4: elkan KMeans
kmeans_elkan = KMeans(n_clusters=3, algorithm='elkan', random_state=0)
kmeans_elkan.fit(X)
labels_kmeans_elkan = kmeans_elkan.labels_
# 示例5: Mini Batch KMeans
minibatch_kmeans = MiniBatchKMeans(n_clusters=3, random_state=0)
minibatch_kmeans.fit(X)
labels_minibatch = minibatch_kmeans.labels_
# 可视化聚类结果
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
# KMedoids
axs[0, 0].scatter(X[:, 0], X[:, 1], c=labels_kmedoids)
axs[0, 0].set_title('KMedoids')
# KMeans++
axs[0, 1].scatter(X[:, 0], X[:, 1], c=labels_kmeans_plus)
axs[0, 1].set_title('KMeans++')
# elkan KMeans
axs[1, 0].scatter(X[:, 0], X[:, 1], c=labels_kmeans_elkan)
axs[1, 0].set_title('elkan KMeans')
# Mini Batch KMeans
axs[1, 1].scatter(X[:, 0], X[:, 1], c=labels_minibatch)
axs[1, 1].set_title('Mini Batch KMeans')
plt.show()
说明
KMedoids: 使用scikit-learn-extra
库中的KMedoids
类进行实现,适合处理包含异常值的数据集。
KMeans++: 通过设置init='k-means++'
参数,使用KMeans++初始化方法,提高聚类效果。
elkan KMeans: 通过设置algorithm='elkan'
参数,使用elkan优化算法,减少距离计算。
Mini Batch KMeans: 使用MiniBatchKMeans
类,适合处理大规模数据集,提高计算效率。