首页 > 其他 > 详细

数据挖掘之Slope One

时间:2016-04-13 02:07:30      阅读:192      评论:0      收藏:0      [点我收藏+]

 计算偏差:

技术分享

card() 表示集合包含的元素数量。

加权Slope One算法

技术分享

 

 

 

# coding:utf-8
__author__ = ‘similarface‘
import codecs, os, sys
from math import sqrt
‘‘‘
该数据:
{"用户":{"乐队":评分}}
‘‘‘
users2 = {"Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4},
          "Ben": {"Taylor Swift": 5, "PSY": 2},
          "Clara": {"PSY": 3.5, "Whitney Houston": 4},
          "Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}}

class recommender:
    def __init__(self, data, k=1, metric=‘pearson‘, n=5):
        self.k = k
        self.n = n
        self.username2id = {}
        self.userid2name = {}
        self.productid2name = {}
        self.metric = metric
        if self.metric == ‘pearson‘:
            self.fn = self.pearson
        if type(data).__name__ == ‘dict‘:
            self.data = data
        #频率值 同时对A,B都进行评分的用户数目
        self.frequencies={}
        #样本A对样本B的偏差值
        self.deviations={}
    def computerDeviation(self):
        ‘‘‘
        计算样本间的偏差
        :return:
        ‘‘‘
        #{"用户":{"乐队1":评分1,"乐队2":评分2,"乐队n":评分n}} =》 ratings={"乐队":评分}
        for ratings in self.data.values():
            #"乐队n":评分n
            for (item,rating) in ratings.items():
                #频率值 2样本同时都进行评分的用户数目
                #setdefault 如果键在字典中,返回这个键所对应的值。如果键不在字典中,向字典 中插入这个键,并且以{}为这个键的值,并返回{}
                self.frequencies.setdefault(item, {})
                #偏差值
                self.deviations.setdefault(item, {})
                for (item2,rating2) in ratings.items():
                    if item!=item2:
                        self.frequencies[item].setdefault(item2,0)
                        self.deviations[item].setdefault(item2,0.0)
                        self.frequencies[item][item2]+=1
                        self.deviations[item][item2]+=rating-rating2
        for (item,ratings) in self.deviations.items():
            for item2 in ratings:
                #dev(i,j)
                ratings[item2]/=self.frequencies[item][item2]

    def convertProductID2name(self, id):
        ‘‘‘
        给定商品编号返回商品名称
        ‘‘‘
        if id in self.productid2name:
            return self.productid2name[id]
        else:
            return id

    def slopeOneRecommendations(self,userRatings):
        ‘‘‘
        遍历用户u评论的所有样本:u[i]
            遍历用户u的偏差矩阵: dev[j,i]
                SUM((dev[j,i]+u[i])*c[j,i]) ==?c[j,i]=frequencies[j][i]
        :param userRatings:
        :return:
        ‘‘‘
        recommendations={}
        frequencies={}

        for (useritem,userRating) in userRatings.items():
            for (diffItem,diffRatting) in self.deviations.items():
                if diffItem not in userRatings and useritem in self.deviations[diffItem]:
                    freq=self.frequencies[diffItem][useritem]
                    recommendations.setdefault(diffItem,0.0)
                    frequencies.setdefault(diffItem,0)
                    recommendations[diffItem]+=(diffRatting[useritem]+userRating)*freq
                    frequencies[diffItem]+=freq
        recommendations=[(self.convertProductID2name(k),v /frequencies[k]) for k ,v in recommendations.items()]
        recommendations.sort(key=lambda artistTuple:artistTuple[1],reverse=True)
        return recommendations

if __name__ == ‘__main__‘:
    r=recommender(users2)
    r.computerDeviation()
    g=users2[‘Ben‘]
    result=r.slopeOneRecommendations(g)
    print(result)

  [(‘Whitney Houston‘, 3.375)]

数据挖掘之Slope One

原文:http://www.cnblogs.com/similarface/p/5385176.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!