标签:
计算偏差:

card() 表示集合包含的元素数量。
加权Slope One算法

# coding:utf-8
__author__ = ‘similarface‘
import codecs, os, sys
from math import sqrt
‘‘‘
该数据:
{"用户":{"乐队":评分}}
‘‘‘
users2 = {"Amy": {"Taylor Swift": 4, "PSY": 3, "Whitney Houston": 4},
"Ben": {"Taylor Swift": 5, "PSY": 2},
"Clara": {"PSY": 3.5, "Whitney Houston": 4},
"Daisy": {"Taylor Swift": 5, "Whitney Houston": 3}}
class recommender:
def __init__(self, data, k=1, metric=‘pearson‘, n=5):
self.k = k
self.n = n
self.username2id = {}
self.userid2name = {}
self.productid2name = {}
self.metric = metric
if self.metric == ‘pearson‘:
self.fn = self.pearson
if type(data).__name__ == ‘dict‘:
self.data = data
#频率值 同时对A,B都进行评分的用户数目
self.frequencies={}
#样本A对样本B的偏差值
self.deviations={}
def computerDeviation(self):
‘‘‘
计算样本间的偏差
:return:
‘‘‘
#{"用户":{"乐队1":评分1,"乐队2":评分2,"乐队n":评分n}} =》 ratings={"乐队":评分}
for ratings in self.data.values():
#"乐队n":评分n
for (item,rating) in ratings.items():
#频率值 2样本同时都进行评分的用户数目
#setdefault 如果键在字典中,返回这个键所对应的值。如果键不在字典中,向字典 中插入这个键,并且以{}为这个键的值,并返回{}
self.frequencies.setdefault(item, {})
#偏差值
self.deviations.setdefault(item, {})
for (item2,rating2) in ratings.items():
if item!=item2:
self.frequencies[item].setdefault(item2,0)
self.deviations[item].setdefault(item2,0.0)
self.frequencies[item][item2]+=1
self.deviations[item][item2]+=rating-rating2
for (item,ratings) in self.deviations.items():
for item2 in ratings:
#dev(i,j)
ratings[item2]/=self.frequencies[item][item2]
def convertProductID2name(self, id):
‘‘‘
给定商品编号返回商品名称
‘‘‘
if id in self.productid2name:
return self.productid2name[id]
else:
return id
def slopeOneRecommendations(self,userRatings):
‘‘‘
遍历用户u评论的所有样本:u[i]
遍历用户u的偏差矩阵: dev[j,i]
SUM((dev[j,i]+u[i])*c[j,i]) ==?c[j,i]=frequencies[j][i]
:param userRatings:
:return:
‘‘‘
recommendations={}
frequencies={}
for (useritem,userRating) in userRatings.items():
for (diffItem,diffRatting) in self.deviations.items():
if diffItem not in userRatings and useritem in self.deviations[diffItem]:
freq=self.frequencies[diffItem][useritem]
recommendations.setdefault(diffItem,0.0)
frequencies.setdefault(diffItem,0)
recommendations[diffItem]+=(diffRatting[useritem]+userRating)*freq
frequencies[diffItem]+=freq
recommendations=[(self.convertProductID2name(k),v /frequencies[k]) for k ,v in recommendations.items()]
recommendations.sort(key=lambda artistTuple:artistTuple[1],reverse=True)
return recommendations
if __name__ == ‘__main__‘:
r=recommender(users2)
r.computerDeviation()
g=users2[‘Ben‘]
result=r.slopeOneRecommendations(g)
print(result)
[(‘Whitney Houston‘, 3.375)]
标签:
原文地址:http://www.cnblogs.com/similarface/p/5385176.html