当前位置:文档之家› k-means算法C语言实现

k-means算法C语言实现

#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
#define MAXVECTDIM 20
#define MAXPATTERN 20
#define MAXCLUSTER 10

char *f2a(double x, int width)
{ //transform double data into string
char cbuf[255];
char *cp;
int i,k;
int d,s;
cp=fcvt(x,width,&d,&s); //把一个浮点数转换为字符串
if (s) {
strcpy(cbuf,"-");
}
else {
strcpy(cbuf," ");
} /* endif */
if (d>0) {
for (i=0; icbuf[i+1]=cp[i];
} /* endfor */
cbuf[d+1]=0;
cp+=d;
strcat(cbuf,".");
strcat(cbuf,cp);
} else {
if (d==0) {
strcat(cbuf,".");
strcat(cbuf,cp);
}
else {
k=-d;
strcat(cbuf,".");
for (i=0; istrcat(cbuf,"0");
} /* endfor */
strcat(cbuf,cp);
} /* endif */
} /* endif */
cp=&cbuf[0];
return cp;
}

// ***** Defined structures & classes *****
struct aCluster {// cluster structures
double Center[MAXVECTDIM];
int Member[MAXPATTERN];
int NumMembers;
};

struct aVector {//
double Center[MAXVECTDIM];
int Size;
};

class System {
private:
double Pattern[MAXPATTERN][MAXVECTDIM+1];
aCluster Cluster[MAXCLUSTER];
int NumPatterns; // Number of patterns
int SizeVector; // Number of dimensions in vector
int NumClusters; // Number of clusters
void DistributeSamples(); // Step 3 of K-means algorithm
int CalcNewClustCenters();// Step 4 of K-means algorithm
double EucNorm(int, int); // Calc Euclidean norm vector
int FindClosestCluster(int); //ret index of clust closest to pattern
//whose index is arg
public:
system();
int LoadPatterns(char *fname); // Get pattern data to be clustered
void InitClusters(); // Step 1 of K-means algorithm
void RunKMeans(); // Overall control K-means process
void ShowClusters(); // Show results on screen
void SaveClusters(char *fname); // Save results to file
void ShowCenters();
};

void System::ShowCenters(){
int i,j;
printf("Cluster centers:\n");
for (i=0; iCluster[i].Member[0]=i;
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
} /* endfor */
printf("\n");
}

int System::LoadPatterns(char *fname){
FILE *InFilePtr;
int i,j;
double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE;
fscanf(InFilePtr, "%d", &NumPatterns); // Read # of patterns
fscanf(InFilePtr, "%d", &SizeVector); // Read dimension of vector
fscanf(InFilePtr, "%d", &NumClusters); // Read # of

clusters for K-Means
for (i=0; ifor (j=0; jfscanf(InFilePtr,"%lg",&x); // consisting of all elements
Pattern[i][j]=x;
} /* endfor */
} /* endfor */
printf("Input patterns:\n");
for (i=0; iprintf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);
} /* endfor */
printf("\n--------------------\n");
return SUCCESS;
}
//**************************************************************************
// InitClusters *
// Arbitrarily assign a vector to each of the K clusters *
// We choose the first K vectors to do this *
//**************************************************************************
void System::InitClusters(){
int i,j;
printf("Initial cluster centers:\n");
for (i=0; iCluster[i].Member[0]=i;
for (j=0; jCluster[i].Center[j]=Pattern[i][j];
} /* endfor */
} /* endfor */
for (i=0; iprintf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
} /* endfor */
printf("\n");
}

void System::RunKMeans(){
int converged;
int pass;
pass=1;
converged=FALSE;
while (converged==FALSE) {
printf("PASS=%d\n",pass++);
DistributeSamples();
converged=CalcNewClustCenters();
ShowCenters();
} /* endwhile */
}

double System::EucNorm(int p, int c){ // Calc Euclidean norm of vector difference
double dist,x; // between pattern vector, p, and cluster
int i; // center, c.
char zout[128];
char znum[40];
char *pnum;

pnum=&znum[0];
strcpy(zout,"d=sqrt(");
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p);
dist=0;
for (i=0; ix=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
strcat(zout,f2a(x,4));
if (i==0)
strcat(zout,"+");
dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
} /* endfor */
printf("%s)\n",zout);
return dist;
}

int System::FindClosestCluster(int pat){
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; id=EucNorm(pat,i);
printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d));
if (dMinDist=d;
ClustID=i;
} /* endif */
} /* endfor */
if (ClustID<0) {
printf("Aaargh");
exit(0);
} /* endif */
return ClustID;
}

void System::DistributeSamples(){
int i,pat,Clustid,MemberIndex;
//Clear membership list for all current clusters
for (i=0; iCluster[i].NumMembers=0;
}
for (pat=0; pat//Find cluster center to which the pattern is closest

Clustid= FindClosestCluster(pat);
printf("patern %d assigned to cluster %d\n\n",pat,Clustid);
//post this pattern to the cluster
MemberIndex=Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex]=pat;
Cluster[Clustid].NumMembers++;
} /* endfor */
}

int System::CalcNewClustCenters(){
int ConvFlag,VectID,i,j,k;
double tmp[MAXVECTDIM];
char xs[255];
char ys[255];
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;
char *fpv;

pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
printf("The new cluster centers are now calculated as:\n");
for (i=0; ipnc1=itoa(Cluster[i].NumMembers,nc1,10);
pnc2=itoa(i,nc2,10);
strcpy(xs,"Cluster Center");
strcat(xs,nc2);
strcat(xs,"(1/");
strcpy(ys,"(1/");
strcat(xs,nc1);
strcat(ys,nc1);
strcat(xs,")(");
strcat(ys,")(");
for (j=0; jtmp[j]=0.0;
} /* endfor */
for (j=0; jVectID=Cluster[i].Member[j];
for (k=0; ktmp[k] += Pattern[VectID][k]; // add (member) pattern elmnt into temp
if (k==0) {
strcat(xs,f2a(Pattern[VectID][k],3));
} else {
strcat(ys,f2a(Pattern[VectID][k],3));
} /* endif */
} /* endfor */
if(jstrcat(xs,"+");
strcat(ys,"+");
}
else {
strcat(xs,")");
strcat(ys,")");
}
} /* endfor */
for (k=0; ktmp[k]=tmp[k]/Cluster[i].NumMembers;
if (tmp[k] != Cluster[i].Center[k])
ConvFlag=FALSE;
Cluster[i].Center[k]=tmp[k];
} /* endfor */
printf("%s,\n",xs);
printf("%s\n",ys);
} /* endfor */
return ConvFlag;
}

void System::ShowClusters(){
int cl;
for (cl=0; clprintf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1]);
} /* endfor */
}

void System::SaveClusters(char *fname){
}

main(int argc, char *argv[]) {//main procedure
System kmeans;
if (argc<2) {
printf("USAGE: KMEANS PATTERN_FILE\n");
exit(0);
}
if (kmeans.LoadPatterns(argv[1])==FAILURE ){
printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1]);
exit(0);
}
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
}



k- medoids算法
class medoids
{
public:
void initial(string fileName,int k); //initialize variable
void distribute();//redistribute data into cluster
void print();//print result
private:
void getData(string fileName);//read data from file
int closet(int num); //calculate cluster id which data belongs to
double findNewCenter (set&amt; s);// find new clus

ter center
vector data;//dataset vector
int clusterNum;//record current cluster Number
vector center;//cluster center vector
vector > cluster;//current cluster vector
};

void medoids::initial(string fileName,int k)
{
getData(fileName);
if(data.size()clusterNum = data.size();
else
clusterNum = k;
for(int i=0;icenter.push_back(data[i]);
cluster.resize(clusterNum);
int s = cluster.size();
}

void medoids::getData(string file)//read data from file
{
ifstream in(file.c_str(),ios::in);
int size;
in>>size;
for(int i=0;i{
double temp;
in>>temp;
data.push_back(temp);
}
in.close();
}

void medoids::distribute()
{
cluster.clear();
cluster.resize(clusterNum);
int s = cluster.size();
for(int i=0;i{
int pos = closet(data[i]);
cluster[pos].insert(data[i]);
}
}

int medoids::closet(int num)
{
int pos = -1;
for(int i=0;i{
double newCenter = calCost(cluster[i]);
if(newCenter!=center[i])
{
center[i]=newCenter;
pos=i; break;
}
}
return pos;
}

double medoids::calCost(set&amt; s)
{
set::iterator ite = s.begin();
int minCost = 9.9e+20;
double newCenter = -1;
while(ite!=s.end())
{
set::iterator ite1 = s.begin();
double total=0;
while(ite1!=s.end())
{
double delta = (*ite1)-(*ite);
total = total + delta*delta;
ite1++;
}
if(total{
minCost = total;
newCenter = (*ite);
}
ite++;
}
return newCenter;
}

void medoids::print()
{
ofstream cout("medoids.txt",ios::out);
set::iterator ite;
for(int i=0;i{
cout<<"cluster "<cout<<"medoids "<ite = cluster[i].begin();
while(ite!=cluster[i].end())
{
cout<<(*ite)<< ' ';
ite++;
}
cout<}
}


相关主题
文本预览
相关文档 最新文档