from sklearn.feature_extraction.text import CountVectorizer
corpus=['Have you already set your goals for the New Year?',
'Do you want to lose ten kilos, '+\
'run a marathon or speak fluent English?',
'Some experts believe that you need systems, not goals.',
'A system is something you do on a regular basis. ',
'This means focusing on what you can control '+\
'(your actions) rather than what you can’t.',
'For example, do not focus on losing ten kilos.',
'Focus on shopping for healthy food and '+\
'cooking something light every day.',
'Do not focus on the marathon.',
'Focus on the training schedule.',
'Invent a system to improve your English, '+\
c_vectorizer=CountVectorizer(min_df=1)
corpus_features=c_vectorizer.fit_transform(corpus)
corpus_array=corpus_features.toarray().astype('int16')
c_analyzer=c_vectorizer.build_analyzer()
import pylab as pl; pl.figure(figsize=(6,5))
pl.title('Word Occurrences in Sentences',fontsize=12)
for i in range(len(corpus_array)):
pl.scatter(range(len(corpus_array[i])),
(corpus_array[i]*.5+i),marker='*')
pl.tight_layout(); pl.grid(); pl.show()
No comments:
Post a Comment