pip install scikit-learn matplotlib scikit-learn-extra
import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.cluster import KMeans, MiniBatchKMeans from sklearn_extra.cluster import KMedoids # 加载数据集 iris = load_iris() X = iris.data y = iris.target # 示例1: KMedoids kmedoids = KMedoids(n_clusters=3, random_state=0) kmedoids.fit(X) labels_kmedoids = kmedoids.labels_ # 示例3: KMeans++ 初始化 kmeans_plus = KMeans(n_clusters=3, init='k-means++', random_state=0) kmeans_plus.fit(X) labels_kmeans_plus = kmeans_plus.labels_ # 示例4: elkan KMeans kmeans_elkan = KMeans(n_clusters=3, algorithm='elkan', random_state=0) kmeans_elkan.fit(X) labels_kmeans_elkan = kmeans_elkan.labels_ # 示例5: Mini Batch KMeans minibatch_kmeans = MiniBatchKMeans(n_clusters=3, random_state=0) minibatch_kmeans.fit(X) labels_minibatch = minibatch_kmeans.labels_ # 可视化聚类结果 fig, axs = plt.subplots(2, 2, figsize=(15, 10)) # KMedoids axs[0, 0].scatter(X[:, 0], X[:, 1], c=labels_kmedoids) axs[0, 0].set_title('KMedoids') # KMeans++ axs[0, 1].scatter(X[:, 0], X[:, 1], c=labels_kmeans_plus) axs[0, 1].set_title('KMeans++') # elkan KMeans axs[1, 0].scatter(X[:, 0], X[:, 1], c=labels_kmeans_elkan) axs[1, 0].set_title('elkan KMeans') # Mini Batch KMeans axs[1, 1].scatter(X[:, 0], X[:, 1], c=labels_minibatch) axs[1, 1].set_title('Mini Batch KMeans') plt.show()
KMedoids: 使用scikit-learn-extra
库中的KMedoids
类进行实现,适合处理包含异常值的数据集。
KMeans++: 通过设置init='k-means++'
参数,使用KMeans++初始化方法,提高聚类效果。
elkan KMeans: 通过设置algorithm='elkan'
参数,使用elkan优化算法,减少距离计算。
Mini Batch KMeans: 使用MiniBatchKMeans
类,适合处理大规模数据集,提高计算效率。