`
daoger
  • 浏览: 529714 次
  • 性别: Icon_minigender_1
  • 来自: 山东济南
社区版块
存档分类
最新评论

An Application Example of K-Mean Algorithm

阅读更多

ClusterMain.java

package eu.eodigos.kmean;

import java.util.Iterator;
import java.util.List;
import java.util.Vector;

import eu.eodigos.hibernate.bean.ClusterInput;
import eu.eodigos.hibernate.server.AccessDBServer;
import eu.eodigos.hibernate.server.AccessDBServerImp;

/**
 * @author daoger
 * @version 1.0
 * @k-mean Cluster
 */

public class ClusterMain
{
	public static void main(String[] args)
	{
		ClusterMain clusterMain = new ClusterMain();
		clusterMain.clusterByDatabase();
	}

	/**
	 * Test with manual data
	 */
	public void test()
	{
		Vector<DataPoint> dataPoints = new Vector<DataPoint>();
		dataPoints.add(new DataPoint(0.12, 0.21, 0.26, 0.45, 0.67, 0.23, 0.11, new Integer(1)));
		dataPoints.add(new DataPoint(0.22, 0.23, 0.46, 0.11, 0.63, 0.11, 0.12, new Integer(2)));
		dataPoints.add(new DataPoint(0.32, 0.34, 0.78, 0.17, 0.68, 0.67, 0.13, new Integer(3)));
		dataPoints.add(new DataPoint(0.42, 0.45, 0.26, 0.42, 0.48, 0.39, 0.14, new Integer(4)));
		dataPoints.add(new DataPoint(0.52, 0.29, 0.65, 0.59, 0.16, 0.74, 0.15, new Integer(5)));
		dataPoints.add(new DataPoint(0.62, 0.25, 0.48, 0.61, 0.27, 0.16, 0.67, new Integer(6)));
		dataPoints.add(new DataPoint(0.72, 0.35, 0.39, 0.20, 0.65, 0.26, 0.17, new Integer(7)));
		dataPoints.add(new DataPoint(0.82, 0.20, 0.16, 0.29, 0.32, 0.61, 0.18, new Integer(8)));
		dataPoints.add(new DataPoint(0.92, 0.71, 0.26, 0.37, 0.17, 0.81, 0.19, new Integer(9)));
		dataPoints.add(new DataPoint(0.13, 0.39, 0.17, 0.41, 0.47, 0.37, 0.10, new Integer(10)));
		dataPoints.add(new DataPoint(0.14, 0.23, 0.47, 0.93, 0.68, 0.28, 0.29, new Integer(11)));
		dataPoints.add(new DataPoint(0.15, 0.57, 0.84, 0.19, 0.15, 0.39, 0.39, new Integer(12)));
		dataPoints.add(new DataPoint(0.16, 0.19, 0.45, 0.38, 0.36, 0.82, 0.49, new Integer(13)));
		dataPoints.add(new DataPoint(0.17, 0.89, 0.29, 0.39, 0.82, 0.58, 0.59, new Integer(14)));
		// divide all user to 7 cluster
		// 10000 stand for precision,the bigger of this value the more accuratly
		ClusterAssistant clusterAssistant = new ClusterAssistant(3, 10000, dataPoints);
		clusterAssistant.startAnalysis();

		Vector[] v = clusterAssistant.getClusterOutput();
		for (int i = 0; i < v.length; i++)
		{
			Vector tempV = v[i];
			System.out.println("-----------Cluster" + i + "---------");
			Iterator iter = tempV.iterator();
			while (iter.hasNext())
			{
				DataPoint dpTemp = (DataPoint) iter.next();
				String dps = "userid_" + dpTemp.getUserid() + "[" + dpTemp.getAvg1() + "," + dpTemp.getAvg2()
						+ dpTemp.getAvg3() + "," + dpTemp.getAvg4() + "," + dpTemp.getAvg5() + "," + dpTemp.getAvg6()
						+ "," + dpTemp.getAvg7() + "]";
				System.out.println(dps);
			}
		}
	}

	/**
	 * get data from database and calaulate
	 */
	public void clusterByDatabase()
	{
		AccessDBServer access = new AccessDBServerImp();
		Vector<DataPoint> dataPoints = new Vector<DataPoint>();
		List clusterList = access.getAllClusterInputData();
		for (Iterator iter = clusterList.iterator(); iter.hasNext();)
		{
			ClusterInput clusterInput = (ClusterInput) iter.next();
			if (clusterInput != null)
			{
				dataPoints.add(new DataPoint(clusterInput.getAvgArch(), clusterInput.getAvgMon(), clusterInput
						.getAvgMus(), clusterInput.getAvgBuil(), clusterInput.getAvgChap(), clusterInput.getAvgBeach(),
						clusterInput.getAvgWalk(), clusterInput.getClusterId()));
			}

		}
		// divide all user to 7 cluster
		// 10000 stand for precision,the bigger of this value the more accuratly
		ClusterAssistant clusterAssistant = new ClusterAssistant(7, 10000, dataPoints);
		clusterAssistant.startAnalysis();

		Vector[] v = clusterAssistant.getClusterOutput();
		for (int i = 0; i < v.length; i++)
		{
			Vector tempV = v[i];
			Iterator iter = tempV.iterator();
			while (iter.hasNext())
			{
				DataPoint dpTemp = (DataPoint) iter.next();
				access.updateClusterCateOfUsers(dpTemp.getUserid(), new Integer(i + 1));
			}
		}
	}
}

 ClusterAssistant.java

package eu.eodigos.kmean;

import java.util.Vector;

/**
 * @author daoger
 * @version 1.0
 * @k-mean Cluster
 */


public class ClusterAssistant
{
	private Cluster[] clusters;

	private int miter;

	private Vector mDataPoints = new Vector();

	private double mSWCSS;

	public ClusterAssistant(int k, int iter, Vector dataPoints)
	{
		clusters = new Cluster[k];
		for (int i = 0; i < k; i++)
		{
			clusters[i] = new Cluster("Cluster" + i);
		}
		this.miter = iter;
		this.mDataPoints = dataPoints;
	}

	private void calcSWCSS()
	{
		double temp = 0;
		for (int i = 0; i < clusters.length; i++)
		{
			temp = temp + clusters[i].getSumSqr();
		}
		mSWCSS = temp;
	}

	public void startAnalysis()
	{
		setInitialCentroids();
		int n = 0;
		loop1: while (true)
		{
			for (int l = 0; l < clusters.length; l++)
			{
				clusters[l].addDataPoint((DataPoint) mDataPoints.elementAt(n));
				n++;
				if (n >= mDataPoints.size())
					break loop1;
			}
		}
		calcSWCSS();
		for (int i = 0; i < clusters.length; i++)
		{
			clusters[i].getCentroid().calcCentroid();
		}
		calcSWCSS();

		for (int i = 0; i < miter; i++)
		{
			for (int j = 0; j < clusters.length; j++)
			{
				for (int k = 0; k < clusters[j].getNumDataPoints(); k++)
				{
					double tempEuDt = clusters[j].getDataPoint(k).getCurrentEuDt();
					Cluster tempCluster = null;
					boolean matchFoundFlag = false;

					for (int l = 0; l < clusters.length; l++)
					{

						if (tempEuDt > clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid()))
						{
							tempEuDt = clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid());
							tempCluster = clusters[l];
							matchFoundFlag = true;
						}
					}
					if (matchFoundFlag)
					{
						tempCluster.addDataPoint(clusters[j].getDataPoint(k));
						clusters[j].removeDataPoint(clusters[j].getDataPoint(k));
						for (int m = 0; m < clusters.length; m++)
						{
							clusters[m].getCentroid().calcCentroid();
						}
						calcSWCSS();
					}
				}
			}
		}
	}

	public Vector[] getClusterOutput()
	{
		Vector v[] = new Vector[clusters.length];
		for (int i = 0; i < clusters.length; i++)
		{
			v[i] = clusters[i].getDataPoints();
		}
		return v;
	}

	private void setInitialCentroids()
	{
		// kn = (round((max-min)/k)*n)+min where n is from 0 to (k-1).
		double[] c = new double[7];
		for (int n = 1; n <= clusters.length; n++)
		{
			for (int i = 1; i < 8; i++)
			{
				c[i - 1] = (((getMaxXValue(i) - getMinXValue(i)) / (clusters.length + 1)) * n) + getMinXValue(i);
			}
			Centroid ce = new Centroid(c[0], c[1], c[2], c[3], c[4], c[5], c[6]);
			clusters[n - 1].setCentroid(ce);
			ce.setCluster(clusters[n - 1]);
		}
	}

	private double getMaxXValue(int avgnumber)
	{
		double temp = 0.0;
		switch (avgnumber)
		{
		case 1:// Archeological
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg1();
			break;
		case 2:// Monuments
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg2();
			break;
		case 3:// Museums
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg3();
			break;
		case 4:// Buildings
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg4();
			break;
		case 5:// Chapels
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg5();
			break;
		case 6:// Beaches
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg6();
			break;
		case 7:// Walking
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg7();
			break;
		}
		for (int i = 0; i < mDataPoints.size(); i++)
		{
			DataPoint dp = (DataPoint) mDataPoints.elementAt(i);
			switch (avgnumber)
			{
			case 1:// Archeological
				temp = (dp.getAvg1() > temp) ? dp.getAvg1() : temp;
				break;
			case 2:// Monuments
				temp = (dp.getAvg2() > temp) ? dp.getAvg2() : temp;
				break;
			case 3:// Museums
				temp = (dp.getAvg3() > temp) ? dp.getAvg3() : temp;
				break;
			case 4:// Buildings
				temp = (dp.getAvg4() > temp) ? dp.getAvg4() : temp;
				break;
			case 5:// Chapels
				temp = (dp.getAvg5() > temp) ? dp.getAvg5() : temp;
				break;
			case 6:// Beaches
				temp = (dp.getAvg6() > temp) ? dp.getAvg6() : temp;
				break;
			case 7:// Walking
				temp = (dp.getAvg7() > temp) ? dp.getAvg7() : temp;
				break;
			}
		}
		return temp;
	}

	private double getMinXValue(int avgnumber)
	{
		double temp = 0.0;
		switch (avgnumber)
		{
		case 1:// Archeological
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg1();
			break;
		case 2:// Monuments
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg2();
			break;
		case 3:// Museums
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg3();
			break;
		case 4:// Buildings
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg4();
			break;
		case 5:// Chapels
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg5();
			break;
		case 6:// Beaches
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg6();
			break;
		case 7:// Walking
			temp = ((DataPoint) mDataPoints.elementAt(0)).getAvg7();
			break;
		}
		for (int i = 0; i < mDataPoints.size(); i++)
		{
			DataPoint dp = (DataPoint) mDataPoints.elementAt(i);
			switch (avgnumber)
			{
			case 1:// Archeological
				temp = (dp.getAvg1() < temp) ? dp.getAvg1() : temp;
				break;
			case 2:// Monuments
				temp = (dp.getAvg2() < temp) ? dp.getAvg2() : temp;
				break;
			case 3:// Museums
				temp = (dp.getAvg3() < temp) ? dp.getAvg3() : temp;
				break;
			case 4:// Buildings
				temp = (dp.getAvg4() < temp) ? dp.getAvg4() : temp;
				break;
			case 5:// Chapels
				temp = (dp.getAvg5() < temp) ? dp.getAvg5() : temp;
				break;
			case 6:// Beaches
				temp = (dp.getAvg6() < temp) ? dp.getAvg6() : temp;
				break;
			case 7:// Walking
				temp = (dp.getAvg7() < temp) ? dp.getAvg7() : temp;
				break;
			}
		}
		return temp;
	}

	public int getKValue()
	{
		return clusters.length;
	}

	public int getIterations()
	{
		return miter;
	}

	public int getTotalDataPoints()
	{
		return mDataPoints.size();
	}

	public double getSWCSS()
	{
		return mSWCSS;
	}

	public Cluster getCluster(int pos)
	{
		return clusters[pos];
	}
}

 

 

Centroid.java

package eu.eodigos.kmean;

/**
 * @author daoger
 * @version 1.0
 * @k-mean Cluster
 */

class Centroid
{
	private double avgC1, avgC2, avgC3, avgC4, avgC5, avgC6, avgC7;

	private Cluster mCluster;

	public Centroid(double ac1, double ac2, double ac3, double ac4, double ac5, double ac6, double ac7)
	{
		this.avgC1 = ac1;
		this.avgC2 = ac2;
		this.avgC3 = ac3;
		this.avgC4 = ac4;
		this.avgC5 = ac5;
		this.avgC6 = ac6;
		this.avgC7 = ac7;
	}

	public void calcCentroid()
	{ // only called by CAInstance
		int numDP = mCluster.getNumDataPoints();
		double temp1 = 0, temp2 = 0, temp3 = 0, temp4 = 0, temp5 = 0, temp6 = 0, temp7 = 0;
		int i;
		// caluclating the new Centroid
		for (i = 0; i < numDP; i++)
		{
			temp1 = temp1 + mCluster.getDataPoint(i).getAvg1();
			// total for avg1
			temp2 = temp2 + mCluster.getDataPoint(i).getAvg2();
			// total for avg1
			temp3 = temp3 + mCluster.getDataPoint(i).getAvg3();
			// total for avg1
			temp4 = temp4 + mCluster.getDataPoint(i).getAvg4();
			// total for avg1
			temp5 = temp5 + mCluster.getDataPoint(i).getAvg5();
			// total for avg1
			temp6 = temp6 + mCluster.getDataPoint(i).getAvg6();
			// total for avg1
			temp7 = temp7 + mCluster.getDataPoint(i).getAvg7();
			// total for avg1
		}
		this.avgC1 = temp1 / numDP;
		this.avgC2 = temp2 / numDP;
		this.avgC3 = temp3 / numDP;
		this.avgC4 = temp4 / numDP;
		this.avgC5 = temp5 / numDP;
		this.avgC6 = temp6 / numDP;
		this.avgC7 = temp7 / numDP;
		// calculating the new Euclidean Distance for each Data Point
		temp1 = 0;
		temp2 = 0;
		temp3 = 0;
		temp4 = 0;
		temp5 = 0;
		temp6 = 0;
		temp7 = 0;
		for (i = 0; i < numDP; i++)
		{
			mCluster.getDataPoint(i).calcEuclideanDistance();
		}
		// calculate the new Sum of Squares for the Cluster
		mCluster.calcSumOfSquares();
	}

	public void setCluster(Cluster c)
	{
		this.mCluster = c;
	}

	public double getAvgC1()
	{
		return avgC1;
	}

	public void setAvgC1(double avgC1)
	{
		this.avgC1 = avgC1;
	}

	public double getAvgC2()
	{
		return avgC2;
	}

	public void setAvgC2(double avgC2)
	{
		this.avgC2 = avgC2;
	}

	public double getAvgC3()
	{
		return avgC3;
	}

	public void setAvgC3(double avgC3)
	{
		this.avgC3 = avgC3;
	}

	public double getAvgC4()
	{
		return avgC4;
	}

	public void setAvgC4(double avgC4)
	{
		this.avgC4 = avgC4;
	}

	public double getAvgC5()
	{
		return avgC5;
	}

	public void setAvgC5(double avgC5)
	{
		this.avgC5 = avgC5;
	}

	public double getAvgC6()
	{
		return avgC6;
	}

	public void setAvgC6(double avgC6)
	{
		this.avgC6 = avgC6;
	}

	public double getAvgC7()
	{
		return avgC7;
	}

	public void setAvgC7(double avgC7)
	{
		this.avgC7 = avgC7;
	}

	public Cluster getCluster()
	{
		return mCluster;
	}

}

 

Cluster.java

package eu.eodigos.kmean;

import java.util.Vector;

/**
 * @author daoger
 * @version 1.0
 * @k-mean Cluster
 */


class Cluster
{
	private String mName;

	private Centroid mCentroid;

	private double mSumSqr;

	private Vector<DataPoint> mDataPoints;

	public Cluster(String name)
	{
		this.mName = name;
		this.mCentroid = null; // will be set by calling setCentroid()
		mDataPoints = new Vector<DataPoint>();
	}

	public void setCentroid(Centroid c)
	{
		mCentroid = c;
	}

	public Centroid getCentroid()
	{
		return mCentroid;
	}

	public void addDataPoint(DataPoint dp) { // called from CAInstance
        dp.setCluster(this); // initiates a inner call to calcEuclideanDistance() in DP.
        this.mDataPoints.addElement(dp);
        calcSumOfSquares();
    }

	public void removeDataPoint(DataPoint dp)
	{
		this.mDataPoints.removeElement(dp);
		calcSumOfSquares();
	}

	public int getNumDataPoints()
	{
		return this.mDataPoints.size();
	}

	public DataPoint getDataPoint(int pos)
	{
		return (DataPoint) this.mDataPoints.elementAt(pos);
	}

	public void calcSumOfSquares()
	{ // called from Centroid
		int size = this.mDataPoints.size();
		double temp = 0;
		for (int i = 0; i < size; i++)
		{
			temp = temp + ((DataPoint) this.mDataPoints.elementAt(i)).getCurrentEuDt();
		}
		this.mSumSqr = temp;
	}

	public double getSumSqr()
	{
		return this.mSumSqr;
	}

	public String getName()
	{
		return this.mName;
	}

	public Vector getDataPoints()
	{
		return this.mDataPoints;
	}

}

 

DataPoint.java

package eu.eodigos.kmean;

/**
 * @author daoger
 * @version 1.0
 * @k-mean Cluster
 */

public class DataPoint
{
	private double avg1, avg2, avg3, avg4, avg5, avg6, avg7;

	private Integer userid;

	private Cluster mCluster;

	private double mEuDt;

	public DataPoint(double avg1, double avg2, double avg3, double avg4, double avg5, double avg6, double avg7,
			Integer userid)
	{
		this.avg1 = avg1;
		this.avg2 = avg2;
		this.avg3 = avg3;
		this.avg4 = avg4;
		this.avg5 = avg5;
		this.avg6 = avg6;
		this.avg7 = avg7;
		this.userid = userid;
		this.mCluster = null;
	}

	public void setCluster(Cluster cluster)
	{
		this.mCluster = cluster;
		calcEuclideanDistance();
	}

	public void calcEuclideanDistance()
	{

		// called when DP is added to a cluster or when a Centroid is
		// recalculated.
		mEuDt = Math.sqrt(Math.pow((avg1 - mCluster.getCentroid().getAvgC1()), 2)
				+ Math.pow((avg2 - mCluster.getCentroid().getAvgC2()), 2)
				+ Math.pow((avg3 - mCluster.getCentroid().getAvgC3()), 2)
				+ Math.pow((avg4 - mCluster.getCentroid().getAvgC4()), 2)
				+ Math.pow((avg5 - mCluster.getCentroid().getAvgC5()), 2)
				+ Math.pow((avg6 - mCluster.getCentroid().getAvgC6()), 2)
				+ Math.pow((avg7 - mCluster.getCentroid().getAvgC7()), 2));
	}

	public double testEuclideanDistance(Centroid c)
	{
		return Math.sqrt(Math.pow((avg1 - c.getAvgC1()), 2) + Math.pow((avg2 - c.getAvgC2()), 2)
				+ Math.pow((avg3 - c.getAvgC3()), 2) + Math.pow((avg4 - c.getAvgC4()), 2)
				+ Math.pow((avg5 - c.getAvgC5()), 2) + Math.pow((avg6 - c.getAvgC6()), 2)
				+ Math.pow((avg7 - c.getAvgC7()), 2));
	}

	public double getAvg1()
	{
		return avg1;
	}

	public void setAvg1(double avg1)
	{
		this.avg1 = avg1;
	}

	public double getAvg2()
	{
		return avg2;
	}

	public void setAvg2(double avg2)
	{
		this.avg2 = avg2;
	}

	public double getAvg3()
	{
		return avg3;
	}

	public void setAvg3(double avg3)
	{
		this.avg3 = avg3;
	}

	public double getAvg4()
	{
		return avg4;
	}

	public void setAvg4(double avg4)
	{
		this.avg4 = avg4;
	}

	public double getAvg5()
	{
		return avg5;
	}

	public void setAvg5(double avg5)
	{
		this.avg5 = avg5;
	}

	public double getAvg6()
	{
		return avg6;
	}

	public void setAvg6(double avg6)
	{
		this.avg6 = avg6;
	}

	public double getAvg7()
	{
		return avg7;
	}

	public void setAvg7(double avg7)
	{
		this.avg7 = avg7;
	}

	public Cluster getCluster()
	{
		return mCluster;
	}

	public double getCurrentEuDt()
	{
		return mEuDt;
	}

	/**
	 * @return the userid
	 */
	public Integer getUserid()
	{
		return userid;
	}

	/**
	 * @param userid
	 *            the userid to set
	 */
	public void setUserid(Integer userid)
	{
		this.userid = userid;
	}

}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics