Given a list of words, return the size of each group of anagrams decreasing [closed]

1

I have a file with a list of words, each in a line of .txt. I need to group them that are anagrams and return the size of the group, in descending order. Here is an example:

  • cat, bed, aeds, drop, saed, toga, maca

The grouping would be:

  • (cat, drop, toga), (bed, litter), (aeds, saed)

And your program should return the size of each group in descending order:

  

3, 2, 2.

I need to code in c. I read about and saw some anagrams problems and solutions using hashMap.

My code looks like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>

char *pegaPalavra(const char *palavra, char *wbuf)
{
    char *p1, *p2, *fimPalavra;
    char t;
    int trocas;

    strcpy(wbuf, palavra);
    fimPalavra = wbuf+strlen(wbuf);
    do {
       trocas = 0;
       p1 = wbuf; p2 = fimPalavra-1;
       while (p1<p2) {
          if (*p2 > *p1) {
             t = *p2; *p2 = *p1; *p1 = t;
             trocas = 1;
          }
          p1++; p2--;
       }
       p1 = wbuf; p2 = p1+1;
       while(p2 < fimPalavra) {
           if (*p2 > *p1) {
             t = *p2; *p2 = *p1; *p1 = t;
             trocas = 1;
           }
           p1++; p2++;
       }
    } while (trocas);
    return wbuf;
}

static
short cxmap[] = {
    0x06, 0x1f, 0x4d, 0x0c, 0x5c, 0x28, 0x5d, 0x0e, 0x09, 0x33, 0x31, 0x56,
    0x52, 0x19, 0x29, 0x53, 0x32, 0x48, 0x35, 0x55, 0x5e, 0x14, 0x27, 0x24,
    0x02, 0x3e, 0x18, 0x4a, 0x3f, 0x4c, 0x45, 0x30, 0x08, 0x2c, 0x1a, 0x03,
    0x0b, 0x0d, 0x4f, 0x07, 0x20, 0x1d, 0x51, 0x3b, 0x11, 0x58, 0x00, 0x49,
    0x15, 0x2d, 0x41, 0x17, 0x5f, 0x39, 0x16, 0x42, 0x37, 0x22, 0x1c, 0x0f,
    0x43, 0x5b, 0x46, 0x4b, 0x0a, 0x26, 0x2e, 0x40, 0x12, 0x21, 0x3c, 0x36,
    0x38, 0x1e, 0x01, 0x1b, 0x05, 0x4e, 0x44, 0x3d, 0x04, 0x10, 0x5a, 0x2a,
    0x23, 0x34, 0x25, 0x2f, 0x2b, 0x50, 0x3a, 0x54, 0x47, 0x59, 0x13, 0x57,
   };
#define CXMAP_SIZE (sizeof(cxmap)/sizeof(short))


int Str_Hash( const char *key, int ix_max )
{
   const char *cp;
   short mash;
   int  hash = 33501551;
   for (cp = key; *cp; cp++) {
      mash = cxmap[*cp % CXMAP_SIZE];
      hash = (hash >>4) ^ 0x5C5CF5C ^ ((hash<<1) + (mash<<5));
      hash &= 0x3FFFFFFF;
      }
   return  hash % ix_max;
}

typedef struct sDictpalavra  *Dictpalavra;
struct sDictpalavra {
    const char *palavra;
    Dictpalavra next;
};

typedef struct sHashEntry *HashEntry;
struct sHashEntry {
    const char *key;
    HashEntry next;
    Dictpalavra  palavras;
    HashEntry link;
    short palavraCount;
};

#define HT_SIZE 8192

HashEntry hashTable[HT_SIZE];

HashEntry mostPerms = NULL;

int buildAnagrams( FILE *fin )
{
    char buffer[40];
    char bufr2[40];
    char *hkey;
    int hix;
    HashEntry he, *hep;
    Dictpalavra  we;
    int  maxPC = 2;
    int numpalavras = 0;

    while ( fgets(buffer, 40, fin)) {
        for(hkey = buffer; *hkey && (*hkey!='\n'); hkey++);
        *hkey = 0;
        hkey = pegaPalavra(buffer, bufr2);
        hix = Str_Hash(hkey, HT_SIZE);
        he = hashTable[hix]; hep = &hashTable[hix];
        while( he && strcmp(he->key , hkey) ) {
            hep = &he->next;
            he = he->next;
        }
        if ( ! he ) {
            he = malloc(sizeof(struct sHashEntry));
            he->next = NULL;
            he->key = strdup(hkey);
            he->palavraCount = 0;
            he->palavras = NULL;
            he->link = NULL;
            *hep = he;
        }
        we = malloc(sizeof(struct sDictpalavra));
        we->palavra = strdup(buffer);
        we->next = he->palavras;
        he->palavras = we;
        he->palavraCount++;
        if ( maxPC < he->palavraCount) {
            maxPC = he->palavraCount;
            mostPerms = he;
            he->link = NULL;
        }
        else if (maxPC == he->palavraCount) {
            he->link = mostPerms;
            mostPerms = he;
        }

        numpalavras++;
    }
    printf("%d palavras in dictionary max ana=%d\n", numpalavras, maxPC);
    return maxPC;
}


int main( )
{
    HashEntry he;
    Dictpalavra  we;
    FILE *f1;

    f1 = fopen("entrada.txt","r");
    buildAnagrams(f1);
    fclose(f1);

    f1 = fopen("anaout.txt","w");
    //f1 = stdout;

    for (he = mostPerms; he; he = he->link) {
        fprintf(f1,"%d:", he->palavraCount);
        for(we = he->palavras; we; we = we->next) {
            fprintf(f1,"%s, ", we->palavra);
        }
        fprintf(f1, "\n");
    }

    fclose(f1);
    return 0;
}

It just prints the biggest anagram. Where specifically would I have to change so that it would print all in descending order?

    
asked by anonymous 19.09.2015 / 05:00

1 answer

1

As I remember very little C, I'll leave the code in C ++ for now and I'll update: P

Well, that's a lot! Do you know what anagrams you have in common? They have the exact same letters. So how do we get if two words are anagrams?

std::sort(palavra) == std::sort(possivel_anagrama)

We check the ordered strings. If they hit, they are anagrams.

Then we ordered each of them:

for(std::string& palavra : lista_de_palavras)
{
     std::sort(palavra)
}

Soon we will have several words repeated. Now just see how many repetitions of each different word occur.

    
12.01.2016 / 17:18