接上篇,下面是几个匹配算法的详情:
1.完全匹配
完全匹配很简单了,只要判断string是否相等就行了。这里要判断所有拼音和所有号码。如果拼音已经符合,就不再判断号码。反正是一个人……
private ScoreAndHits completeMatch(String reg) {
ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f,
new ArrayList<PointPair>());
for (int i = 0; i < fullNameNumberWithoutSpace.size(); i++) {
String str = fullNameNumberWithoutSpace.get(i);
if (reg.equals(str)) {
scoreAndHits.nameIndex = i;
scoreAndHits.score = Match_Level_Complete;
scoreAndHits.pairs.add(new PointPair(i, -1));
scoreAndHits.matchLevel = Level_Complete;
return scoreAndHits;
}
}
for (int i = 0; i < phones.size(); i++) {
PhoneStruct phone = phones.get(i);
if (reg.equals(phone.phoneNumber)) {
scoreAndHits.nameIndex = i;
scoreAndHits.score = Match_Level_Complete;
scoreAndHits.pairs.add(new PointPair(i, -1));
scoreAndHits.matchType = Match_Type_Phone;
scoreAndHits.matchLevel = Level_Complete;
return scoreAndHits;
}
}
// 走到这里说明没有匹配
return new ScoreAndHits(-1, 0f, new ArrayList<PointPair>());
}private ScoreAndHits foreAcronymOverFlowMatch(String reg) {
// 因为有可能是多音字,所以这个方法用来对比不同拼音的匹配度,并取最大的那个
ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f,
new ArrayList<PointPair>());
for (int i = 0; i < fullNameNumber.size(); i++) {
ArrayList<String> names = fullNameNumber.get(i);
ScoreAndHits tmpscore = foreAcronymOverFlowMatch(names, reg);
if (tmpscore.score > scoreAndHits.score) {
scoreAndHits = tmpscore;
scoreAndHits.nameIndex = i;
}
}
scoreAndHits.matchLevel = Level_Fore_Acronym_Overflow;
return scoreAndHits;
}
// 在第一个字母确定的情况下,第二个字母有可能有三种情况
// 一、在第一个字母所在单词的邻居位置charAt(x+1);
// 二、在第二个单词的首字母处
// 三、以上两种情况皆不符合,不匹配,出局
private ScoreAndHits foreAcronymOverFlowMatch(ArrayList<String> names,
String reg) {
// 用来得出某一个拼音的匹配值。
ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f,
new ArrayList<PointPair>());
if (names.get(0).charAt(0) == reg.charAt(0)) {
//其实crossWords()方法才是求匹配值的方法,lol
OverflowMatchValue value = crossWords(names, reg, 0, 0, 0);
int cross = crossWords(names, reg, 0, 0, 0).crossed;
if (cross > 0) {
scoreAndHits.score = Match_Level_Fore_Acronym_Overflow + cross
* Match_Score_Reward - (names.size() - cross)
* Match_Miss_Punish;
scoreAndHits.pairs = value.pairs;
}
}
return scoreAndHits;
}
/**
* 返回一串字符能跨越另一串字符的长度,根据上面的匹配规则,要尽可能的多匹配单词。若要保证
* 能匹配最长的长度,只要保证下一个字符开始的一段字符能匹配
* 最长的长度即可,换名话说,如果想要让96758匹配最长的字符串,那么只要保证6758能匹配最长
* 的字符串即可,然后758,再然后58……。例如,名字叫PanAnNing,输入pan,那么应该匹配三个
* 首字母,PAN,而不是第一姓的拼音Pan.
* 这是一个递归。
*
* @param names
* @param regString
* 匹配字符串
* @param listIndex
* 匹配到的list的第listIndex个单词
* @param strIndex
* 匹配到第listIndex个单词中的第strIndex个字母
* @param regIndex
* regchar的匹配位置,比如匹配到了96758的7上,也就是regIndex==2.
* @return
*/
private OverflowMatchValue crossWords(ArrayList<String> names,
String regString, int listIndex, int strIndex, int regIndex) {
OverflowMatchValue result = new OverflowMatchValue(0, false);
OverflowMatchValue reser = new OverflowMatchValue(0, false);
OverflowMatchValue impul = new OverflowMatchValue(0, false);
if (regIndex < regString.length() - 1) {
char nextChar = regString.charAt(regIndex + 1);
if (listIndex < names.size() - 1
&& nextChar == names.get(listIndex + 1).charAt(0)) {
impul = crossWords(names, regString, listIndex + 1, 0,
regIndex + 1);
}
if (strIndex < names.get(listIndex).length() - 1
&& nextChar == names.get(listIndex).charAt(strIndex + 1)) {
reser = crossWords(names, regString, listIndex, strIndex + 1,
regIndex + 1);
}
} else {
result = new OverflowMatchValue((strIndex == 0) ? 1 : 0, true);
result.pairs.add(0, new PointPair(listIndex, strIndex));
}
if (reser.matched || impul.matched) {
if (impul.crossed > reser.crossed) {
result = impul;
} else {
result = reser;
}
result.matched = true;
result.crossed = ((strIndex == 0) ? 1 : 0)
+ Math.max(result.crossed, result.crossed);
result.pairs.add(0, new PointPair(listIndex, strIndex));
}
return result;
}
static class OverflowMatchValue {
public int crossed = 0;
public boolean matched = false;
public ArrayList<PointPair> pairs = new ArrayList<PointPair>();
public OverflowMatchValue(int c, boolean m) {
this.crossed = c;
this.matched = m;
}
}跟前置首字母溢出匹配基本一样,只不过匹配的第一个字母不再是姓的首字母。
private ScoreAndHits backAcronymOverFlowMatch(String reg) {
//跟上面差不多
ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f,
new ArrayList<PointPair>());
for (int i = 0; i < fullNameNumber.size(); i++) {
ArrayList<String> names = fullNameNumber.get(i);
ScoreAndHits tmp = backAcronymOverFlowMatch(names, reg);
if (tmp.score > scoreAndHits.score) {
scoreAndHits = tmp;
scoreAndHits.nameIndex = i;
}
}
scoreAndHits.matchLevel = Level_Back_Acronym_Overflow;
return scoreAndHits;
}
private ScoreAndHits backAcronymOverFlowMatch(ArrayList<String> names,
String reg) {
int score = 0;
int punish = 0;
ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f,
new ArrayList<PointPair>());
// 有可能会调用多次crossWords,取决于名字的长度。这是跟前面的不同
for (int i = 0; i < names.size(); i++) {
String string = (String) names.get(i);
if (string.charAt(0) == reg.charAt(0)) {
OverflowMatchValue value = crossWords(names, reg, i, 0, 0);
int cross = value.crossed;
int lost = names.size() - cross;
if (cross > score || cross == score && punish > lost) {
scoreAndHits.pairs = value.pairs;
score = cross;
punish = lost;
}
}
}
if (score > 0) {
scoreAndHits.score = Match_Level_Back_Acronym_Overflow + score
* Match_Score_Reward - punish * Match_Miss_Punish;
return scoreAndHits;
} else {
return new ScoreAndHits(-1, 0f, new ArrayList<PointPair>());
}
}4.后置无头匹配。(难听就难听了,反正就那个意思)
private ScoreAndHits backHeadlessParagraphMatch(String reg) {
int punish = 0;
ScoreAndHits scoreAndHits = new ScoreAndHits(-1, -1f,
new ArrayList<PointPair>());
scoreAndHits.matchLevel = Level_Headless;
scoreAndHits.matchType = Match_Type_Phone;
// 算了不匹配姓名了,假设没人会得这么离谱
for (int i = 0; i < phones.size(); i++) {
PhoneStruct phone = phones.get(i);
int sco = phone.phoneNumber.indexOf(reg);
if (sco >= 0) {
int lost = phone.phoneNumber.length() - reg.length();
if (scoreAndHits.score < sco || sco == scoreAndHits.score
&& punish > lost) {
scoreAndHits.score = sco;
scoreAndHits.nameIndex = i;
// 尽管有可能匹配的字符串很长,但是因为是连贯的,所以只要
// add一个就够啦
scoreAndHits.pairs.add(new PointPair(i, sco));
punish = lost;
}
}
}
if (scoreAndHits.score >= 0) {
scoreAndHits.score = Match_Level_Headless - scoreAndHits.score
* Match_Score_Reward - punish * Match_Miss_Punish;
}
return scoreAndHits;
}到这里已经可以得出输入字符串与联系人的匹配度了,剩下的事情就是调用和显示了,但是这不是本文的重点
本文出自 “NashLegend” 博客,请务必保留此出处http://nashlegend.blog.51cto.com/5635342/1566111
原文地址:http://nashlegend.blog.51cto.com/5635342/1566111