//converting program of double dataset into int dataset using standard deviation by dimension
//by Hiroki Kusumoto and Yoshiyasu Takefuji, Keio University
//kusu@sfc.keio.ac.jp, hiroki.kusumoto@gmail.com, takefuji@sfc.keio.ac.jp
#include <stdio.h>
#include <math.h>
#include <stdlib.h>

int main(int argc, char* argv[]){
  int data_num, d_n, dimension, dim, i,j,k,x,y, **Int_Data;//data array [d_n][dim]
  int LINEMAX=100000;
  double **Raw_Data,sigma,average;
  char s[LINEMAX+1] , fname[100] ,*ptok;
  FILE *fp;
  ///////////////read file/////////////////
  strcpy(fname, argv[1]);
  //Open File
  if ((fp = fopen(fname,"r")) == NULL) {
    printf("file open error\n");
    exit(1);
  }
  fgets(s, LINEMAX, fp);
  //Count the number of dimensions
  dim = 0;
  for( i = 0; s[i] != '\n'; i++ ){
    if( s[i] == ',' ){
      dim++;
    }
  }
  dimension=dim+1;
  //Count the number of data
  d_n=1;
  while (fgets(s, LINEMAX, fp) != NULL) {
    d_n++;
  }
  data_num=d_n;
  fclose(fp);
  //fprintf(stderr, "%d data and %d dimension\n", data_num, dimension);
  // Memory allocation for the nrows * ncols int array
  Raw_Data = (double **)malloc(data_num * sizeof(double *));
  for(d_n = 0; d_n < data_num; d_n++){
    Raw_Data[d_n] = (double *)malloc(dimension * sizeof(double));
  }
  // Store the data
  fp = fopen(fname,"r");
  for( d_n = 0; d_n < data_num; d_n++){
    fgets(s, LINEMAX, fp);
    ptok = strtok(s ,",");
    for( dim = 0; dim < dimension; dim++){
      Raw_Data[d_n][dim] = atof(ptok);
      ptok = strtok(NULL,",");
    }
  }
  fclose(fp);   //Close file
  /////////////////read file  End///////////////////
  ///////////change raw data into "int data" using standard deviation (sigma) by dimension///////////////////////////
  Int_Data = (int **)malloc(data_num * sizeof(int *));
  for(d_n = 0; d_n < data_num; d_n++){
    Int_Data[d_n] = (int *)malloc(dimension * sizeof(int));
  }
  for(dim = 0; dim < dimension; dim++){
    average=0;
    sigma=0;
    for(d_n =0; d_n < data_num; d_n++){
      average+=Raw_Data[d_n][dim];
      sigma+=pow(Raw_Data[d_n][dim],2);
    }
    average=average/(double)data_num;
    sigma=pow((sigma/(double)data_num-pow(average,2)),0.5);
    if(sigma==0){
      for(d_n =0; d_n < data_num; d_n++){
        Int_Data[d_n][dim]=0;
      } 
    }else{
      for(d_n =0; d_n < data_num; d_n++){
        Int_Data[d_n][dim]=(int)(10000*(Raw_Data[d_n][dim]-average)/sigma);
      }
    }
  }
  ///////////change raw data into "int data" using standard deviation   End //////////////////////
  if ((fp = fopen("int_data.txt","w")) == NULL) {
    printf("file open error for output\n");
    exit(-1);
  }
  for(d_n=0;d_n<data_num;d_n++){
    fprintf(fp,"%d",Int_Data[d_n][0]);
    for(dim=1;dim<dimension;dim++){
      fprintf(fp,",%d",Int_Data[d_n][dim]);
    }
    fprintf(fp,"\n");
  }
  fclose(fp);
  return (0);
}

