推荐系统

时间：2019-05-16 18:29:05 阅读：139 评论：0 收藏：0 [点我收藏+]
# A dictionary of movie critics and their ratings of a small
# set of movies
critics={‘Lisa Rose‘: {‘Lady in the Water‘: 2.5, ‘Snakes on a Plane‘: 3.5,
‘Just My Luck‘: 3.0, ‘Superman Returns‘: 3.5, ‘You, Me and Dupree‘: 2.5, 
‘The Night Listener‘: 3.0},
‘Gene Seymour‘: {‘Lady in the Water‘: 3.0, ‘Snakes on a Plane‘: 3.5, 
‘Just My Luck‘: 1.5, ‘Superman Returns‘: 5.0, ‘The Night Listener‘: 3.0, 
‘You, Me and Dupree‘: 3.5}, 
‘Michael Phillips‘: {‘Lady in the Water‘: 2.5, ‘Snakes on a Plane‘: 3.0,
‘Superman Returns‘: 3.5, ‘The Night Listener‘: 4.0},
‘Claudia Puig‘: {‘Snakes on a Plane‘: 3.5, ‘Just My Luck‘: 3.0,
‘The Night Listener‘: 4.5, ‘Superman Returns‘: 4.0, 
‘You, Me and Dupree‘: 2.5},
‘Mick LaSalle‘: {‘Lady in the Water‘: 3.0, ‘Snakes on a Plane‘: 4.0, 
‘Just My Luck‘: 2.0, ‘Superman Returns‘: 3.0, ‘The Night Listener‘: 3.0,
‘You, Me and Dupree‘: 2.0}, 
‘Jack Matthews‘: {‘Lady in the Water‘: 3.0, ‘Snakes on a Plane‘: 4.0,
‘The Night Listener‘: 3.0, ‘Superman Returns‘: 5.0, ‘You, Me and Dupree‘: 3.5},
‘Toby‘: {‘Snakes on a Plane‘:4.5,‘You, Me and Dupree‘:1.0,‘Superman Returns‘:4.0}}


from math import sqrt
from pandas import DataFrame
import numpy as np
import operator
import scipy.stats.stats as scss
import pandas as pd
#
#import pydelicious

def sim_distance(prefs, person1, person2): 
    #prefs = DataFrame(prefs)
    sub = prefs[person1] - prefs[person2]
    commonCount = len(sub) - len(sub[np.isnan(sub)])
    if commonCount == 0: 
        #print(‘%s and %s has nothing in common.‘%(person1,person2))
        return 0
    sim = 1/ (1+sqrt(sum(sub[~np.isnan(sub)]**2)))
    return sim

def sim_pearson(prefs, person1, person2):
    #prefs = DataFrame(prefs)
    pref = prefs[[person1,person2]]
    pref = pref.dropna(axis=0, how = ‘any‘)
    sub = prefs[person1] - prefs[person2]
    commonCount = len(sub) - len(sub[np.isnan(sub)])
    if commonCount == 0: 
        #print(‘%s and %s has nothing in common.‘%(person1,person2))
        return 0
    sim = scss.pearsonr(pref[person1], pref[person2])[0]
    return sim

def topMatches(prefs, person, n=3, similarity = sim_distance):
    #prefs = DataFrame(prefs)
    sims = {}
    for other in prefs.columns:
        if other == person: continue
        sim = similarity(prefs, person, other)
        sims[other] = sim
    sortedSims = sorted(sims.items(), key = operator.itemgetter(1), reverse = True)
    return sortedSims[:n]

def getRecommendations(prefs, person, similarity = sim_pearson):
    prefs = DataFrame(prefs).T
    totalSums = {}
    simSums = {}
    for other in prefs.columns:
        if other == person: continue
        sim = similarity(prefs, other, person)
        if sim <= 0: continue
        for item in prefs[other].index:  
            if np.isnan(prefs[person][item]) or prefs[person][item] == 0: 
                if np.isnan(prefs[other][item] * sim): continue
                
                totalSums.setdefault(item,0)
                totalSums[item] += prefs[other][item] * sim
                simSums.setdefault(item,0)
                simSums[item] += sim
    rankings = [(total/simSums[item], item) for item, total in totalSums.items()] 
    rankings.sort()
    rankings.reverse()
    return rankings

def caculateSimilarItems(prefs, n=10):
    simularItems = {}
    for item in prefs.columns:
        topMatch = topMatches(prefs, item, n = n, similarity = sim_distance)
        simularItems[item] = topMatch
    return simularItems


def getRecommandationItems(prefs, simularItems, person):
    totals = {}
    sumSims = {}
    pref = prefs.loc[person, :]
    alreadyWatched = pref[~pref.isna()]
    for i in alreadyWatched.index:     #所有當前用戶看過的電影
        if i not in simularItems.keys(): continue     #其他人都沒看過的電影
        
        for item, sim in simularItems[i]:
            if item in alreadyWatched.index: continue
            totals.setdefault(item,0)
            sumSims.setdefault(item,0)
            totals[item] += alreadyWatched[i] * sim
            sumSims[item] += sim
    ranking = [(score/ sumSims[item],item) for item,score in totals.items()]
    ranking.sort()
    ranking.reverse()
    return ranking

if __name__ == "__main__":

    homePath = ‘P:\Department\Celer\個人資料夾\F3234506_麗娟\Extra\Python\资源\9780596529321-master\PCI_Code Folder\chapter2\DataSet\ml-latest-small‘
    #homePath1 = os.path.dirname(os.path.abspath(‘__filen__‘))
    #os.path.join()
    
    with open(homePath + ‘\\ratings.csv‘,‘rb‘) as ratings_f:
        ratings = pd.read_csv(ratings_f)
    with open(homePath + ‘\\movies.csv‘, ‘rb‘) as movies_f:
        movies = pd.read_csv(movies_f)
    with open(homePath + ‘\\tags.csv‘, ‘rb‘) as tags_f:
        tags = pd.read_csv(tags_f)
    
    movies = movies.drop_duplicates([‘title‘])    
    data = pd.merge(ratings, movies, on = ‘movieId‘, how =‘outer‘) #.merge(tags, on = [‘movieId‘,‘userId‘])
    data = data.pivot(index = ‘userId‘, columns = ‘title‘,values = ‘rating‘)
    
    #ratings = ratings.pivot(index = ‘movieId‘, columns = ‘userId‘, values = ‘rating‘)
    #ratings = ratings.pivot(index = ‘userId‘, columns = ‘movieId‘, values = ‘rating‘).loc[:,0:100]
    
    
    
    similarity = caculateSimilarItems(data.iloc[:,0:500])
    #print(similarity)
    
    recommItems = getRecommandationItems(data, similarity, 6)
    print(recommItems)
推荐系统
原文：https://www.cnblogs.com/Colleen-Blog/p/10865201.html
踩
(0)
评论一句话评论（0）
分享档案
更多>
2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)