Optimizing Memory Usage Cache

1

I got a code implementing Dijkstra in C, and my mission is to optimize this code. The code is this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define NUM_NODES                          1000
#define NONE                               9999

struct _NODE
{
  int iDist;
  int iPrev;
};
typedef struct _NODE NODE;

struct _QITEM
{
  int iNode;
  int iDist;
  int iPrev;
  struct _QITEM *qNext;
};
typedef struct _QITEM QITEM;

QITEM *qHead = NULL;




short int AdjMatrix[NUM_NODES][NUM_NODES];

int g_qCount = 0;
NODE rgnNodes[NUM_NODES];
int ch;
int iPrev, iNode;
int i, iCost, iDist;


void print_path (NODE *rgnNodes, int chNode)
{
  if (rgnNodes[chNode].iPrev != NONE)
    {
      print_path(rgnNodes, rgnNodes[chNode].iPrev);
    }
  printf (" %d", chNode);
  fflush(stdout);
}


void enqueue (int iNode, int iDist, int iPrev)
{
  QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));
  QITEM *qLast = qHead;

  if (!qNew) 
    {
      fprintf(stderr, "Out of memory.\n");
      exit(1);
    }
  qNew->iNode = iNode;
  qNew->iDist = iDist;
  qNew->iPrev = iPrev;
  qNew->qNext = NULL;

  if (!qLast) 
    {
      qHead = qNew;
    }
  else
    {
      while (qLast->qNext) qLast = qLast->qNext;
      qLast->qNext = qNew;
    }
  g_qCount++;
  //               ASSERT(g_qCount);
}


void dequeue (int *piNode, int *piDist, int *piPrev)
{
  QITEM *qKill = qHead;

  if (qHead)
    {
      //                 ASSERT(g_qCount);
      *piNode = qHead->iNode;
      *piDist = qHead->iDist;
      *piPrev = qHead->iPrev;
      qHead = qHead->qNext;
      free(qKill);
      g_qCount--;
    }
}


int qcount (void)
{
  return(g_qCount);
}

int dijkstra(int chStart, int chEnd) 
{



  for (ch = 0; ch < NUM_NODES; ch++)
    {
      rgnNodes[ch].iDist = NONE;
      rgnNodes[ch].iPrev = NONE;
    }

  if (chStart == chEnd) 
    {
      printf("Shortest path is 0 in cost. Just stay where you are.\n");
    }
  else
    {
      rgnNodes[chStart].iDist = 0;
      rgnNodes[chStart].iPrev = NONE;

      enqueue (chStart, 0, NONE);

     while (qcount() > 0)
 {
      dequeue (&iNode, &iDist, &iPrev);
      for (i = 0; i < NUM_NODES; i++)
        {
          if ((iCost = AdjMatrix[iNode][i]) != NONE)
        {
          if ((NONE == rgnNodes[i].iDist) || 
              (rgnNodes[i].iDist > (iCost + iDist)))
            {
              rgnNodes[i].iDist = iDist + iCost;
              rgnNodes[i].iPrev = iNode;
              enqueue (i, iDist + iCost, iNode);
            }
        }
        }
    }

      printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
      printf("Path is: ");
      print_path(rgnNodes, chEnd);
      printf("\n");

    }
}

  int main(int argc, char *argv[]) {
  int i,j,k;
  FILE *fp;
  char line[2048];
  char* token;

  if (argc<2) {
    fprintf(stderr, "Usage: dijkstra <filename>\n");
    fprintf(stderr, "Only supports matrix size is #define'd.\n");
  }

  /* open the adjacency matrix file */
  fp = fopen (argv[1],"r");

   /*
  // make a fully connected matrix 
  for (i=0;i<NUM_NODES;i++) {
    for (j=0;j<NUM_NODES;j++) {
      // make it more sparce 
       fscanf(fp,"%d",&k);
            AdjMatrix[i][j]= k;
    }
 }
  */

  for (i=0;i<1000;i++) {
      for (j=0;j<1000;j++) {
        AdjMatrix[i][j] = NONE;
      }
  }  

   while (fgets(line, sizeof(line), fp)) {
       token = strtok(line," ");
       i = atoi(token);
       token = strtok(NULL, " \n");
        while (token) {
            j = atoi(token);
            AdjMatrix[i][j]= 1;
            token = strtok(NULL, " \n");
        }
    }  


  // nao mudar esta lista de caminhos a serem pesquisados
  for (i=0,j=50;i<50;i++,j--) {
      printf("Path %d => %d\n",i,j);
     dijkstra(i,j);

  }

  for (i=50,j=100;i<100;i++,j--) {
      printf("Path %d => %d\n",i,j);
      dijkstra(i,j);

  }  

/*
  for (i=0;i<300;i++) {
      for (j=0;j<300;j++) {
        printf("%d ",AdjMatrix[i][j]);
      }
      printf("\n");
  }  
*/

/*
  printf("Path %d => %d\n",0,13);
  dijkstra(0,13);
  printf("Path %d => %d\n",1,77);
  dijkstra(1,77);
*/
  printf("Path %d => %d\n",51,99);
  dijkstra(51,99);
  exit(0);


}

Well, I did some optimizations, which are nothing to do, swap int for short int, change NUM_NODES to 300 because I know that the test files have 300 nodes, decrease the size of the char array that reads the rows because I know a row does not have 2048 characters, add a pointer to the last element of the queue, and change the order of struct attributes, but the main optimization, and which I thought would drastically change the number of misses from the cache, just got worse. In the given code the array is declared like this:     int AdjMatrix [NUM_NODES] [NUM_NODES]; I've been researching, and allocating in memory this array would be much more cache-friendly due to the contiguity of the addresses, and the same goes for the rgnNodes Array, my code looks like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define NUM_NODES                          300
#define NONE                               9999

struct _NODE
{
  short int iDist;
  short int iPrev;
};
typedef struct _NODE NODE;

struct _QITEM
{
  struct _QITEM *qNext;
  short int iNode;
  short int iDist;
  short int iPrev;
};
typedef struct _QITEM QITEM;

short int *mat;
QITEM *qHead = NULL;
QITEM *qTail = NULL;


short int g_qCount = 0;
NODE *rgnNodes;
short int ch;
short int iPrev, iNode;
short int i, iDist;
int offset;

void print_path (NODE *rgnNodes, short int chNode)
{
  if (rgnNodes[chNode].iPrev != NONE)
    {
      print_path(rgnNodes, rgnNodes[chNode].iPrev);
    }
  printf (" %d", chNode);
  fflush(stdout);
}


void enqueue (short int iNode, short int iDist, short int iPrev)
{
  QITEM *qNew = (QITEM *) malloc(sizeof(QITEM));

  if (!qNew) 
    {
      fprintf(stderr, "Out of memory.\n");
      exit(1);
    }
  qNew->iNode = iNode;
  qNew->iDist = iDist;
  qNew->iPrev = iPrev;
  qNew->qNext = NULL; //qNew recebe os parametros

  if (!qHead)
    {
      qHead = qTail = qNew;

    }
  else
    {
      qTail->qNext = qNew;
      qTail=qNew;
    }
  g_qCount++;
  //               ASSERT(g_qCount);
}


void dequeue (short int *piNode, short int *piDist, short int *piPrev)
{
  QITEM *qKill = qHead;

  if (qHead)
    {
      //                 ASSERT(g_qCount);
      *piNode = qHead->iNode;
      *piDist = qHead->iDist;
      *piPrev = qHead->iPrev;
      qHead = qHead->qNext;
      free(qKill);
      g_qCount--;
    }
}


short int qcount (void)
{
  return(g_qCount);
}

int dijkstra(short int chStart, short int chEnd) 
{



  for (ch = 0; ch < NUM_NODES; ch++)
    {
      rgnNodes[ch].iDist = NONE;
      rgnNodes[ch].iPrev = NONE;
    }

  if (chStart == chEnd) 
    {
      printf("Shortest path is 0 in cost. Just stay where you are.\n");
    }
  else
    {
      rgnNodes[chStart].iDist = 0;
      rgnNodes[chStart].iPrev = NONE;

      enqueue (chStart, 0, NONE);

     while (qcount() > 0)
    {
      dequeue (&iNode, &iDist, &iPrev);
      for (i = 0; i < NUM_NODES; i++)
        {
          offset= iNode * NUM_NODES + i;
          if (mat[offset] != NONE && ((NONE == rgnNodes[i].iDist) || (rgnNodes[i].iDist > (1 + iDist))))  
        {
              rgnNodes[i].iDist = iDist + 1;
              rgnNodes[i].iPrev = iNode; 
              enqueue (i, iDist + 1, iNode);
            }

        }
    }

      printf("Shortest path is %d in cost. ", rgnNodes[chEnd].iDist);
      printf("Path is: ");
      print_path(rgnNodes, chEnd);
      printf("\n");

    }
}

int main(short int argc, char *argv[]) {
  short int i,j,k;
  FILE *fp;
  char line[512];
  char* token;
  mat=(short int *)malloc(NUM_NODES * NUM_NODES * sizeof(short int));
  rgnNodes = (NODE *)malloc(NUM_NODES * sizeof(NODE));
  if (argc<2) {
    fprintf(stderr, "Usage: dijkstra <filename>\n");
    fprintf(stderr, "Only supports matrix size is #define'd.\n");
  }

  /* open the adjacency matrix file */
  fp = fopen (argv[1],"r");

/*
  // make a fully connected matrix 
  for (i=0;i<NUM_NODES;i++) {
    for (j=0;j<NUM_NODES;j++) {
      // make it more sparce 
      fscanf(fp,"%d",&k);
            mat[i][j]= k;
    }
  }
  */

  for (i=0;i<300;i++) {
      for (j=0;j<300;j++) {
        offset= i * NUM_NODES + j;
        mat[offset]=NONE;
      }
  }  

   while (fgets(line, sizeof(line), fp)) { //completa a matriz com as devidas adjacencias
       token = strtok(line," ");
       i = atoi(token);
       token = strtok(NULL, " \n");
        while (token) {
            j = atoi(token);
            offset=i*NUM_NODES+j;
            mat[offset]= 1;
            token = strtok(NULL, " \n");
        }
    }  


  // nao mudar esta lista de caminhos a serem pesquisados
  for (i=0,j=50;i<50;i++,j--) {
      printf("Path %d => %d\n",i,j);
      dijkstra(i,j);

  }

  for (i=50,j=100;i<100;i++,j--) {
      printf("Path %d => %d\n",i,j);
      dijkstra(i,j);

  }  

/*
  for (i=0;i<300;i++) {
      for (j=0;j<300;j++) {
        printf("%d ",mat[i][j]);
      }
      printf("\n");
  }  
*/

/*
  printf("Path %d => %d\n",0,13);
  dijkstra(0,13);
  printf("Path %d => %d\n",1,77);
  dijkstra(1,77);
*/
  printf("Path %d => %d\n",51,99);
  dijkstra(51,99);


  exit(0);


}

It is important to mention that the tests are being run in the valgrind with the following configuration:

valgrind --tool = cachegrind --I1 = 16384,1,32 --D1 = 16384,1,32 --LL = 16384,1,32 ./dijkstra_large test300.adjlist

ie L1 data and instructions = 16KB, associativity 1, line size 32bytes L2 = 16KB, associativity 1, line size 32bytes

Of course, the code I've optimized is better than the original, but the array optimization and array made the code worse, and I'd like to know why. Also, if anyone has any optimization tips I would be very grateful.

I can not change cache settings

    
asked by anonymous 19.10.2015 / 03:08

1 answer

1

TLDR the code ... int is faster than short int ; the elements of an array are contiguous (do not need malloc() / free() ).

    
19.10.2015 / 13:04