标签:lsh pie upd gif gbk tor jre idg fpm
import sys def scoreAUC(labels,probs): i_sorted = sorted(range(len(probs)),key=lambda i: probs[i], reverse=True) auc_temp = 0.0 TP = 0.0 TP_pre = 0.0 FP = 0.0 FP_pre = 0.0 P = 0; N = 0; last_prob = probs[i_sorted[0]] + 1.0 for i in range(len(probs)): if last_prob != probs[i_sorted[i]]: auc_temp += (TP+TP_pre) * (FP-FP_pre) / 2.0 TP_pre = TP FP_pre = FP last_prob = probs[i_sorted[i]] if labels[i_sorted[i]] == 1: TP = TP + 1 else: FP = FP + 1 auc_temp += (TP+TP_pre) * (FP-FP_pre) / 2.0 auc = auc_temp / (TP * FP) return auc def read_file(f_name): f = open(f_name) labels = [] probs = [] for line in f: line = line.strip(‘\n‘).split(‘,‘) try: label = int(line[0]) prob = float(line[1]) except ValueError: # skip over header continue labels.append(label) probs.append(prob) return (labels, probs) def main(): if len(sys.argv) != 2: print("Usage: python scoreKDD.py file") sys.exit(2) labels, probs = read_file(sys.argv[1]) auc = scoreAUC(labels, probs) print("%f" % auc) if __name__=="__main__": main()
import sys #init auc dict params_auc_dict = {"last_ctr":1.1, "slot_show_sum":0, "slot_click_sum":0, "auc_temp":0.0, "click_sum":0.0, "old_click_sum":0.0, "no_click":0.0, "no_click_sum":0.0} #init q distribute q_bucket = 1000 params_Q_dict = {"count_list":[0]*(q_bucket+1)} for line in sys.stdin: lineL = line.strip().split(‘\t‘) if len(lineL) < 3: continue pctr = float(lineL[0]) #print lineL[0] #pctr = float(lineL[0])/1e6 show = int(float(lineL[1])) click = int(float(lineL[2])) slot_info = ‘-‘ ### calculate auc params_auc_dict["slot_show_sum"] += show params_auc_dict["slot_click_sum"] += click if params_auc_dict["last_ctr"] != pctr: params_auc_dict["auc_temp"] += (params_auc_dict["click_sum"] + params_auc_dict["old_click_sum"]) * params_auc_dict["no_click"] / 2.0 params_auc_dict["old_click_sum"] = params_auc_dict["click_sum"] params_auc_dict["no_click"] = 0.0 params_auc_dict["last_ctr"] = pctr params_auc_dict["no_click"] += show - click params_auc_dict["no_click_sum"] += show - click params_auc_dict["click_sum"] += click ### calculate Q distribution index = int(pctr / (1.0/q_bucket)) #interval [0, 0.001) left close, right open count_list = params_Q_dict["count_list"] count_list[index] += show # last instance for auc params_auc_dict["auc_temp"] += (params_auc_dict["click_sum"] + params_auc_dict["old_click_sum"]) * params_auc_dict["no_click"] / 2.0 if params_auc_dict["auc_temp"] > 0: auc = params_auc_dict["auc_temp"] / (params_auc_dict["click_sum"] * params_auc_dict["no_click_sum"]) else: auc = 0 print "AUC:%s\tshow_sum:%s\tclk_sum:%s" %( auc, params_auc_dict["slot_show_sum"], params_auc_dict["slot_click_sum"]) #print Q distribution result for item in params_Q_dict: count_list = params_Q_dict["count_list"] print "Max bucket num: %s" %(sum(count_list)) for i in range(q_bucket+1): if i < (q_bucket - 1): print str((i+1)*(1.0/q_bucket)) + ‘\t‘ + str(count_list[i]) else: print ‘1.0\t‘ + str(count_list[i]+count_list[i+1]) break
标签:lsh pie upd gif gbk tor jre idg fpm
原文地址:http://www.cnblogs.com/rongyux/p/7104232.html