import warnings; warnings.simplefilter('ignore')
import sklearn.cluster as scl
import numpy as np,pandas as pd,pylab as pl
from IPython.core.magic import register_line_magic
from scipy.spatial.distance import pdist,squareform
from scipy.cluster.hierarchy import linkage,dendrogram
labels=['id'+str(i) for i in range(1,10)]
X=np.random.random_sample([9,3])*10
df=pd.DataFrame(X,columns=variables,index=labels)
ac=scl.AgglomerativeClustering(
n_clusters=3,affinity='euclidean',linkage='complete')
ac_labels=ac.fit_predict(X)
print('Cluster labels: %s'%ac_labels)
def dist_clusters(method):
row_dist=pdist(df,metric='euclidean')
row_dist=squareform(row_dist)
row_dist,method='complete',metric='euclidean')
columns=['row label 1','row label 2','distance','n_items'],
index=['cl%d'%(i+1) for i in range(c)])
fig=pl.figure(figsize=(6.5,5))
axd=fig.add_axes([.1,.1,.31,.6])
d=dendrogram(row_clusters,orientation='left',labels=labels)
df_cl=df.iloc[d['leaves'][::-1]]
axd.set_xticks([]); axd.set_yticks([])
for i in axd.spines.values(): i.set_visible(False)
axm=fig.add_axes([.2,.1,.6,.6])
cax=axm.matshow(df_cl,interpolation='nearest',cmap='cool')
ac=[str(ac_labels[int(el[2])-1])+'-'+el
for el in list(df_cl.index)]
axm.set_xticks(axm.get_xticks().tolist())
axm.set_yticks(axm.get_yticks().tolist())
axm.set_xticklabels(['']+list(df_cl.columns)+[''])
axm.set_yticklabels(['']+ac+[''])
if method=='False': pl.title('incorrect')
else: pl.title('correct')
No comments:
Post a Comment