标签:blog ar io os sp for java on div
package MyCluster;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Vector;
public class SimpleCluster {
/**
* @param args
*/
public static void main(String[] args) throws Exception, IOException {
// TODO Auto-generated method stub
int Featurelenth=GetLenth();
double Theta=0.1;
String Addr="raw/data.txt";
String OutPutAddr="raw/clusterresult.txt";
ArrayList<ClusterObject> ResultSet=Ini_ClusterLIST(Featurelenth);
ResultSet=Get_Cluster_Result(Theta,ResultSet,Addr,OutPutAddr,Featurelenth);
}//针对维度长,多于k个类
public static ArrayList<ClusterObject> Get_Cluster_Result(double Theta,ArrayList<ClusterObject> ResultSet,String Addr,String OutPutAddr,int Featurelenth) throws Exception, FileNotFoundException
{
BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(Addr),"utf-8"));
String line="";
PrintWriter pw=new PrintWriter(OutPutAddr);
while((line=reader.readLine())!=null)
{
Vector<Double> Line_Vector=StringToVector(line,Featurelenth);
Distribute_OneVecter_to_Cluster(Theta,ResultSet,Line_Vector,Featurelenth,pw,line);
}
pw.close();
return ResultSet;
}
public static ArrayList<ClusterObject> Distribute_OneVecter_to_Cluster(double Theta,ArrayList<ClusterObject> ResultSet,Vector<Double> ToBeComfined,int Featurelenth,PrintWriter pw,String line)
{
double similaritymax=0;
int NO_max=0;
for(int i=0;i<ResultSet.size();i++)
{
double Similarity_cur=Similarity(ToBeComfined,ResultSet.get(i).center,Featurelenth);
if(Similarity_cur>similaritymax)
{ NO_max=i;
similaritymax= Similarity_cur;
}
}
//System.out.println(similaritymax);
if(similaritymax>Theta)
{
ResultSet.get(NO_max).setcenter( GetNewCenter(ResultSet.get(NO_max),Featurelenth,ToBeComfined));
ResultSet.get(NO_max).LIST.add(ToBeComfined);
pw.write(NO_max+"\r\n");
}
else
{
ClusterObject NewClu=IniClusterOj(1,ResultSet.size(),ToBeComfined);
ResultSet.add(NewClu);
pw.write(ResultSet.size()-1+"\r\n");
}
return ResultSet;
}
public static Vector<Double> GetNewCenter(ClusterObject Cluster,int Featurelenth,Vector<Double> NewMenber)
{
Vector<Double> Pre=Cluster.getcenter();
Vector<Double> Fresh=new Vector<Double>();
for(int i=0;i<Featurelenth;i++)
{
Fresh.add((Pre.get(i)*Cluster.LIST.size()+NewMenber.get(i))/(Cluster.LIST.size()+1));
}
return Fresh;
}
public static double Similarity(Vector<Double> A,Vector<Double> B,int Featurelenth)
{
double res=0;
double Vec_Multi_Sum=0;
for(int i=0;i<Featurelenth;i++)
Vec_Multi_Sum=Vec_Multi_Sum+A.get(i)*B.get(i);
res=Vec_Multi_Sum/(VectorLength(A)*VectorLength(B));
return res;
}
public static double VectorLength(Vector<Double> A)
{
double res=0;
double sum=0;
for(int i=0;i<A.size();i++)
sum=sum+A.get(i)*A.get(i);
res=Math.sqrt(sum);
return res;
}
public static ArrayList<ClusterObject> Ini_ClusterLIST(int Featurelenth) throws IOException, FileNotFoundException
{
ArrayList<ClusterObject> ori=new ArrayList<ClusterObject>();
BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("ori/original.txt"),"utf-8"));
String line="";
int i=0;
while((line=reader.readLine())!=null)
{
Vector<Double> v=StringToVector(line,Featurelenth);
ClusterObject ClusterOj=IniClusterOj(1,i++,v);
ori.add(ClusterOj);
}
reader.close();
return ori;
}
public static ClusterObject IniClusterOj(int totalsum,int NO,Vector<Double> center)
{
ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>();
LIST.add(center);
ClusterObject ClusterOj=new ClusterObject(totalsum,NO,LIST,center);
return ClusterOj;
}
public static int GetLenth() throws IOException
{
BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream("raw/data.txt"),"utf-8"));
String line="";
while((line=reader.readLine())!=null)
{
line=reader.readLine();
String value[]=line.split(" ");
return value.length-1;
}
reader.close();
return 0;
}
public static Vector<Double> StringToVector (String line,int Featurelenth)
{
Vector<Double> res=new Vector<Double>();
String value[]=line.split(" ");
for(int i=1;i<=Featurelenth;i++)
res.add(Double.valueOf(value[i]));
return res;
}
}
package MyCluster;
import java.util.ArrayList;
import java.util.Vector;
public class ClusterObject {
public int totalsum;
public int NO;
public ArrayList<Vector<Double>> LIST=new ArrayList<Vector<Double>>();
public Vector<Double> center=new<Double> Vector();
public ClusterObject(int totalsum,int NO,ArrayList<Vector<Double>> LIST,Vector<Double> center){
this.totalsum=totalsum;
this.NO=NO;
this.LIST=LIST;
this.center=center;
}
public int getNO()
{ return NO;
}
public Vector<Double> getcenter()
{ return center;
}
public void setNO(int X)
{
this.NO=X;
}
public void setcenter(Vector<Double> X)
{
this.center=X;
}
}
标签:blog ar io os sp for java on div
原文地址:http://www.cnblogs.com/limpek/p/4172077.html