标签:
#!/usr/bin/python278
# _*_ coding: utf-8 _*_
import kNN
reload(kNN)
datingDataMat,datingLabels=kNN.file2matrix('datingTestSet2.txt')
import matplotlib
import matplotlib.pyplot as plt
zhfont = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\ukai.ttc')
fig=plt.figure()
ax=fig.add_subplot(111)
from numpy import *
ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
plt.xlabel(u'玩游戏所耗时间百分比', fontproperties=zhfont)
plt.ylabel(u'每周消费的冰淇淋公升数', fontproperties=zhfont)
plt.show()#!/usr/bin/python278
# _*_ coding: utf-8 _*_
import kNN
reload(kNN)
datingDataMat,datingLabels=kNN.file2matrix('datingTestSet2.txt')
import matplotlib
import matplotlib.pyplot as plt
zhfont = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\ukai.ttc')
fig=plt.figure()
ax=fig.add_subplot(111)
from numpy import *
ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(datingLabels),15.0*array(datingLabels))
plt.xlabel(u'玩游戏所耗时间百分比', fontproperties=zhfont)
plt.ylabel(u'每周消费的冰淇淋公升数', fontproperties=zhfont)
plt.show()#!/usr/bin/env python
# _*_ coding: utf-8 _*_
import kNN
reload(kNN)
import matplotlib
import matplotlib.pyplot as plt
matrix, labels = kNN.file2matrix('datingTestSet2.txt')
print matrix
print labels
zhfont = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\ukai.ttc')
plt.figure(figsize=(8, 5), dpi=80)
axes = plt.subplot(111)
# 将三类数据分别取出来
# x轴代表飞行的里程数
# y轴代表玩视频游戏的百分比
type1_x = []
type1_y = []
type2_x = []
type2_y = []
type3_x = []
type3_y = []
print 'range(len(labels)):'
print range(len(labels))
for i in range(len(labels)):
if labels[i] == 1: # 不喜欢
type1_x.append(matrix[i][0])
type1_y.append(matrix[i][1])
if labels[i] == 2: # 魅力一般
type2_x.append(matrix[i][0])
type2_y.append(matrix[i][1])
if labels[i] == 3: # 极具魅力
print i, ':', labels[i], ':', type(labels[i])
type3_x.append(matrix[i][0])
type3_y.append(matrix[i][1])
type1 = axes.scatter(type1_x, type1_y, s=20, c='red')
type2 = axes.scatter(type2_x, type2_y, s=40, c='green')
type3 = axes.scatter(type3_x, type3_y, s=50, c='blue')
# plt.scatter(matrix[:, 0], matrix[:, 1], s=20 * numpy.array(labels),
# c=50 * numpy.array(labels), marker='o',
# label='test')
plt.xlabel(u'每年获取的飞行里程数', fontproperties=zhfont)
plt.ylabel(u'玩视频游戏所消耗的事件百分比', fontproperties=zhfont)
axes.legend((type1, type2, type3), (u'不喜欢', u'魅力一般', u'极具魅力'), loc=2, prop=zhfont)
plt.show()
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals, (m,1))
normDataSet = normDataSet/tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals>>> import kNN
>>> reload(kNN)
<module 'kNN' from 'kNN.pyc'>
>>> datingDataMat,datingLabels=kNN.file2matrix('datingTestSet2.txt')
>>> normMat,ranges,minVals=kNN.autoNorm(datingDataMat)
>>> normMat
array([[ 0.44832535, 0.39805139, 0.56233353],
[ 0.15873259, 0.34195467, 0.98724416],
[ 0.28542943, 0.06892523, 0.47449629],
...,
[ 0.29115949, 0.50910294, 0.51079493],
[ 0.52711097, 0.43665451, 0.4290048 ],
[ 0.47940793, 0.3768091 , 0.78571804]])
>>> ranges
array([ 9.12730000e+04, 2.09193490e+01, 1.69436100e+00])
>>> minVals
array([ 0. , 0. , 0.001156])def datingClassTest():
hoRatio = 0.50 #hold out 10%
datingDataMat,datingLabels = file2matrix('datingTestSet2.txt') #load data setfrom file
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])
if (classifierResult != datingLabels[i]): errorCount += 1.0
print "the total error rate is: %f" % (errorCount/float(numTestVecs))
print errorCount>>> kNN.datingClassTest() the classifier came back with: 3, the real answer is: 3 the classifier came back with: 2, the real answer is: 2 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 3, the real answer is: 3 the classifier came back with: 3, the real answer is: 3 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 3, the real answer is: 3 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1.
the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 3, the real answer is: 3 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 2, the real answer is: 1 the classifier came back with: 2, the real answer is: 2 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 1, the real answer is: 1 the classifier came back with: 2, the real answer is: 2 the total error rate is: 0.064000
def classifyPerson():
resultList=['not at all','in small doses','in large doses']
percentTats=float(raw_input("percentage of time spent playing video games?"))
ffMiles=float(raw_input("frequent flier miles earned per year?"))
iceCream=float(raw_input("liters of ice cream consumed per year?"))
datingDataMat,datingLabels=file2matrix('datingTestSet2.txt')
normMat,ranges,minVals=autoNorm(datingDataMat)
inArr=array([ffMiles,percentTats,iceCream])
classifierResult=classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
print "You will probably like this person:",resultList[classifierResult-1]代码讲解:Python中的raw_input()允许用户输入文本行命令并返回用户所输入的命令>>> import kNN >>> reload(kNN) <module 'kNN' from 'kNN.py'> >>> kNN.classifyPerson() percentage of time spent playing video games?10 frequent flier miles earned per year?10000 liters of ice cream consumed per year?0.5 You will probably like this person: in small doses
标签:
原文地址:http://blog.csdn.net/geekmanong/article/details/50523331