http://blog.christianperone.com/2013/09/machine-learning-cosine-similarity-for-vector-space-models-part-iii/
documents = ( "The sky is blue", "The sun is bright", "The sun in the sky is bright", "We can see the shining sun, the bright sun" ) from sklearn.feature_extraction.text import TfidfVectorizer tfidf_vectorizer = TfidfVectorizer() tfidf_matrix = tfidf_vectorizer.fit_transform(documents) # print tfidf_matrix from sklearn.metrics.pairwise import cosine_similarity print cosine_similarity(tfidf_matrix[0], tfidf_matrix) import math # This was already calculated on the previous step, so we just use the value cos_sim = 0.52305744 angle_in_radians = math.acos(cos_sim) print math.degrees(angle_in_radians)
原文:http://www.cnblogs.com/gwnbu/p/7222942.html