用TSNE进行数据降维并展示聚类结果

TSNE提供了一种有效的数据降维方式,让我们可以在2维或3维的空间中展示聚类结果。

# -*- coding: utf-8 -*-from __future__ import unicode_literalsfrom sklearn.manifold import TSNEimport pandas as pdimport matplotlib.pyplot as mpinputfile = 'data/consumption_data.xls'outputfile = 'tmp/data_type3.xls'data = pd.read_excel(inputfile, index_col='Id')data_zs = 1.0 * (data - data.mean()) / data.std()from sklearn.cluster import KMeansk = 3iteration = 800model = KMeans(n_clusters=k, n_jobs =4, max_iter=iteration)  # n_jobs 并发数设置为4model.fit(data_zs)  # 开始聚类r1 = pd.Series(model.labels_).value_counts()  # 统计各个类别的数目r2 = pd.DataFrame(model.cluster_centers_)  # 找出聚类中心r = pd.concat([data,pd.Series(model.labels_,index = data.index)],axis=1)r.columns = list(data.columns) + [r'聚类类别'] tsne = TSNE()tsne.fit_transform(data_zs)  # 进行数据降维tsne = pd.DataFrame(tsne.embedding_, index=data_zs.index)mp.rcParams['font.sans-serif'] = ['SimHei']mp.rcParams['axes.unicode_minus'] = Falsed = tsne[r[r'聚类类别'] == 0]mp.plot(d[0], d[1], 'r.')d = tsne[r[r'聚类类别'] == 1]mp.plot(d[0], d[1], 'go')d = tsne[r[r'聚类类别'] == 2]mp.plot(d[0], d[1], 'b*')mp.show()

运行结果:

(0)

相关推荐