Error Segmentation Fault (Core Dumped) Binary Tree Search

0

I'm doing a job that performs a search for a key (an int value) in a binary tree populated from a txt file. The file txt has the following registry in each row: a key (int), a given1 (long int) and a given2 (char [1000]). For example, the first line could be: "5 13734842 paper house" .

I made a simple program to randomly generate the data in the txt file, formatted and with the desired number of lines. In the main function I populated my tree by reading the created txt file and inserting each record in the tree created with the insere_ArvBin(ArvBin *raiz, Registro valor) function. Then calling the function consulta_ArvBin(ArvBin *raiz, int valor) passes an integer value to perform the search in the tree. This function returns 0 when the element is not found and 1 when it is found in the tree.

The program usually works for a small record in the txt file (with 5 lines, for example, and with few characters worked normally), the problem happens when you generate a large record with the < in> txt containing several lines and many characters, then accusing the error with the Segmentation fault; dump the core

I think the error is related to some bad memory allocation, I tried to use a libera_ArvBin(ArvBin *raiz) function that frees memory from the tree at the end of the program but nothing has changed. I do not know where this error could be and how it would modify some routine to solve this problem, since it works normally for small records. This is the code with the functions I used for creating, inserting, and querying the search tree:

#include <iostream>
#include <cstdlib>
#include <stdio.h>
#include <stdlib.h>
#include "ArvoreBinaria.h"

#define TAM_REG 100000

using namespace std;

struct registro{
    int chave;
    int dado1;
    char dado2[1000];
};

struct NO{
    Registro info;
    struct NO *esq;
    struct NO *dir;
};

ArvBin* cria_ArvBin(){
    ArvBin* raiz =  (ArvBin*) malloc(sizeof(ArvBin));
    if(raiz != NULL){
        *raiz = NULL;
    }
    return raiz;
}

void libera_NO(struct NO* no){
    if(no == NULL)
        return;
    libera_NO(no->esq);
    libera_NO(no->dir);
    free(no);
    no = NULL;    
}

void libera_ArvBin(ArvBin *raiz){
    if(raiz == NULL)
        return;
    libera_NO(*raiz);//libera cada nó
    free(raiz);//libera a raiz

}

int insere_ArvBin(ArvBin *raiz, Registro valor){
    if(raiz==NULL)
        return 0;
    struct NO* novo;
    novo = (struct NO*) malloc(sizeof(struct NO));
    if(novo == NULL)//se deu erro na alocação
        return 0;
    novo->info = valor;
    novo->dir = NULL;
    novo->esq = NULL;
    //procura onde inserir!
    if(*raiz==NULL)//se minha arvore é uma arvore vazia. se for só inserir
        *raiz = novo;
    else{
        struct NO* atual = *raiz;
        struct NO* ant = NULL;
        //navega nos nós da arvore até chegar em um nó folha
        while(atual!=NULL){
            ant = atual;
            if(valor.chave == atual->info.chave){
                free(novo);
                return 0;//elemento já existe
            }
            if(valor.chave > atual->info.chave)
                atual = atual->dir;
            else
                atual = atual->esq;
        }
        //insere como filho desse nó folha
        if(valor.chave > ant->info.chave)
            ant->dir= novo;
        else
            ant->esq=novo;     
    }
    return 1;
}

//busca
int consulta_ArvBin(ArvBin *raiz, int valor){
    if(raiz==NULL)
        return 0;
    struct NO* atual = *raiz;
    while(atual != NULL){
        if(valor == atual->info.chave){
            return 1;
        }
        if(valor > atual->info.chave)
            atual = atual->dir;
        else
            atual = atual->esq;
    }
    return 0;
}

int main(int argc, char** argv) {

    ArvBin* raiz = cria_ArvBin();
    int i = 0;
    Registro valor[TAM_REG];    

    //carrega arquivo, lê dados e insere na árvore criada 
    FILE* arq = fopen("arquivo.txt", "r");
    if(arq!=NULL){
        while(fscanf(arq, "%d %d %[^\n]s", &valor[i].chave, 
                     &valor[i].dado1, valor[i].dado2) != EOF) {
            insere_ArvBin(raiz, valor[i]);
            ++i;
        }
    }

    int busca = consulta_ArvBin(raiz, 5);
    if(busca == 0)
        printf("Não encontrado!\n");
    else
        printf("Encontrado!\n");   

    return 0;
}

.h with all the functions of the TAD:

#ifndef ARVOREBINARIA_H
#define ARVOREBINARIA_H

typedef struct registro Registro;
typedef struct NO* ArvBin;
ArvBin* cria_ArvBin();
void libera_ArvBin(ArvBin *raiz);
int estaVazia_ArvBin(ArvBin *razi);
int altura_ArvBin(ArvBin *raiz);
int totalNO_ArvBin(ArvBin *raiz);
void preOrdem_ArvBin(ArvBin* raiz);
void emOrdem_ArvBin(ArvBin* raiz);
void posOrdem_ArvBin(ArvBin *raiz);
int insere_ArvBin(ArvBin *raiz, Registro valor);
int remove_ArvBin(ArvBin *raiz, int valor);
int consulta_ArvBin(ArvBin *raiz, int valor);

#endif /* ARVOREBINARIA_H */

I noticed that in tests of up to 8000 the program works normally, from 9000 the Segmentation fault; dump of the core .

The code to generate random txt files for testing:

#include <iostream>
#include <cstdlib>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <unistd.h>

#define TAM_TXT 100000
#define MAX_STR_SIZE    50
#define MIN_STR_SIZE    5

using namespace std;

char* geraStringAleatoria(){

    char *validchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    char *novastr;
    register int i;
    int str_len;

    // inicia o contador aleatório
    //srand ( time(NULL ));

    // novo tamanho
    str_len = (rand() % MAX_STR_SIZE );

    // checa tamanho
    str_len += ( str_len < MIN_STR_SIZE ) ? MIN_STR_SIZE : 0;

    // aloca memoria
    novastr = ( char * ) malloc ( (str_len + 1) * sizeof(char));
    if ( !novastr ){
        printf("[*] Erro ao alocar memoria.\n" );
        exit ( EXIT_FAILURE );
    }

    // gera string aleatória
    for ( i = 0; i < str_len; i++ ) {
        novastr[i] = validchars[ rand() % strlen(validchars) ];
        novastr[i + 1] = 0x0;
    }

        return novastr;

}

int escreverArquivo(){
  //srand( (unsigned)time(NULL) );

  int chave = rand() % INT_MAX;
  long int dado1 = rand() % LONG_MAX;

  FILE* info = fopen("arquivo[100000L].txt", "w");

  int i=0;
  while(i<TAM_TXT){
    fprintf(info,"%d %ld %s\n", (rand() % INT_MAX), (rand() % LONG_MAX), geraStringAleatoria());
    ++i;
  } 
}

int main() {

    escreverArquivo();

  return 0;
}
    
asked by anonymous 19.01.2018 / 06:30

4 answers

2

Notice that your libera_NO() function seems to try to compare if the last node points to null in a strange way, please note:

if(no = NULL)
    return;

libera_NO(no->esq)

This is not a comparison! This will make the last node, even if valid, start to point to null! Then the Boolean expression would be interpreted as if(0) , making the stream not pass through return .

In attempting to access the null pointer no->esq on the next line, your program will be terminated by the operating system with a segmentation error.

    
19.01.2018 / 13:44
1

Your header ArvoreBinaria.h , has a type definition:

typedef struct NO* ArvBin;

This means that ArvBin is a ponteiro para um nó , your code does a tremendous mess with it, for example:

ArvBin* cria_ArvBin(){
    ArvBin* raiz =  (ArvBin*) malloc(sizeof(ArvBin));
    if(raiz != NULL){
        *raiz = NULL;
    }
    return raiz;
}

When you do something like ArvBin * raiz; you are declaring a ponteiro para um ponteiro de um nó , that is, struct NO** !

Your code repeats the same error multiple times, and if it's working, even with few records, it's a miracle!

Your code could be simply like this:

typedef struct NO ArvBin;

...

ArvBin* cria_ArvBin(){
    ArvBin* raiz =  (ArvBin*) malloc(sizeof(ArvBin));
    return raiz;
}

Or even:

typedef struct NO ArvBin;

...

ArvBin* cria_ArvBin(){
    return (ArvBin*) malloc(sizeof(ArvBin));
}
    
21.01.2018 / 01:54
1

Following its rationale, it follows a solution with% (tested) able to solve its problem, with a more granular abstraction of the entities: C , árvore binária and . Notice that this is a truly dynamic solution, that is, it accepts files with any number of records:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* ************************************************************************** */
/* *                           TIPOS E ESTRUTURAS                           * */
/* ************************************************************************** */

/* Tipos */
typedef struct no_s no_t;
typedef struct arvore_binaria_s arvore_binaria_t;
typedef struct registro_s registro_t;


/* Representa um registro */
struct registro_s
{
    int chave;
    int dado1;
    char dado2[1000];
};

/* Representa um noh */
struct no_s
{
    registro_t reg;        /* Registro */
    no_t * esq;            /* Ponteiro para o noh da esquerda */
    no_t * dir;            /* Ponteiro para o noh da direita */
};

/* Representa uma arvore binaria */
struct arvore_binaria_s
{
    no_t * raiz;    /* Ponteiro para o noh raiz */
    int qtd;        /* Quantidade de nos contidos na arvore */
};


/* ************************************************************************** */
/* *                          PROTOTIPOS DAS FUNCOES                        * */
/* ************************************************************************** */

/* Cria uma arvore binaria vazia em memoria */
arvore_binaria_t * arvore_criar( void );

/* Libera memoria ocupada por uma arvore binaria */
void arvore_liberar( arvore_binaria_t * arv );

/* Cria um nó em memoria */
no_t * no_criar( registro_t * reg );

/* Libera memoria ocupara por um nó */
void no_liberar( no_t * no );

/* Insere registro na Arvore de acordo com a "chave" do registro */
int arvore_inserir_registro( arvore_binaria_t * arv, registro_t * registro );

/* Pesquisa um registro na arvore usando a "chave" como argumento */
registro_t * arvore_buscar_registro( arvore_binaria_t * arv, int chave );

/* Cria uma arvore binaria em memória a partir de um arquivo TXT */
arvore_binaria_t * arvore_criar_do_arquivo( const char * arq );

/* ************************************************************************** */
/* *                              IMPLEMENTACAO                             * */
/* ************************************************************************** */

arvore_binaria_t * arvore_criar( void )
{
    arvore_binaria_t * arv = (arvore_binaria_t*) malloc(sizeof(arvore_binaria_t));

    arv->raiz = NULL;
    arv->qtd = 0;
    return arv;
}


void arvore_liberar( arvore_binaria_t * arv )
{
    if(arv == NULL)
        return;

    no_liberar(arv->raiz);
    free(arv);
}


no_t * no_criar( registro_t * reg )
{
    no_t * no = (no_t*) malloc(sizeof(no_t));

    no->esq = NULL;
    no->dir = NULL;

    no->reg.chave = reg->chave;
    no->reg.dado1 = reg->dado1;
    strcpy( no->reg.dado2, reg->dado2 );

    return no;
}


void no_liberar( no_t * no )
{
    if(no == NULL)
        return;

    no_liberar(no->esq);
    no_liberar(no->dir);
    free(no);
}


int arvore_inserir_registro( arvore_binaria_t * arv, registro_t * registro )
{
    if( arv == NULL )
        return -1;

    if( arv->raiz == NULL )
    {
        no_t * novo = no_criar( registro );

        arv->raiz = novo;
        arv->qtd = 1;
    }
    else
    {
        no_t * atual = arv->raiz;
        no_t * ant = NULL;

        while( atual != NULL )
        {
            ant = atual;

            if( registro->chave == atual->reg.chave )
                return -1; /* Registro jah esxistente! */

            if( registro->chave > atual->reg.chave )
                atual = atual->dir;
            else
                atual = atual->esq;
        }

        no_t * novo = no_criar( registro );

        if( registro->chave > ant->reg.chave )
            ant->dir = novo;
        else
            ant->esq = novo;

        /* Incrementa contador de nós na arvore binária */
        arv->qtd++;
    }

    /* Em caso de sucesso, retorna quantidade de nós na arvore */
    return arv->qtd;
}


registro_t * arvore_buscar_registro( arvore_binaria_t * arv, int chave )
{
    if( arv == NULL )
        return NULL;

    no_t * atual = arv->raiz;

    while( atual != NULL )
    {
        if( chave == atual->reg.chave )
            return &atual->reg;

        if( chave > atual->reg.chave )
            atual = atual->dir;
        else
            atual = atual->esq;
    }

    return NULL;
}


arvore_binaria_t * arvore_criar_do_arquivo( const char * arq )
{
    arvore_binaria_t * arv = NULL;
    registro_t aux;

    FILE * fp = fopen( arq, "r");

    if(!arq)
        return NULL;

    arv = arvore_criar();

    while( fscanf( fp, "%d %d %[^\n]s", &aux.chave, &aux.dado1, aux.dado2 ) != EOF )
    {
        /* Insere registro na arvore */
        int n = arvore_inserir_registro( arv, &aux );

        /* Exibe mensagem de avido em caso de erro de insercao em um dos registros */
        if( n < 0 )
            fprintf( stderr, "Erro Inserindo Registro: chave=%d, dado1=%d, dado2='%s'\n", aux.chave, aux.dado1, aux.dado2 );
    }

    fclose(fp);

    return arv;
}

/* ************************************************************************** */
/* *                                    MAIN()                              * */
/* ************************************************************************** */

int main( void )
{
    /* Cria arvore binaria a partir do arquivo especificado */
    arvore_binaria_t * arv = arvore_criar_do_arquivo( "arquivo[100000L].txt" );

    if(!arv)
    {
        fprintf( stderr, "Erro Carregado Arvore Binaria a partir do Arquivo.");
        return 1;
    }

    /* Pesquisa registro com a chave especificada (no caso: 1773986255) */
    registro_t * registro = arvore_buscar_registro( arv, 1773986255 );

    if( registro == NULL )
    {
        printf("Registro nao encontrado!\n");
    }
    else
    {
        printf("Registro Encontrado: chave=%d, dado1=%d, dado2='%s'\n", registro->chave, registro->dado1, registro->dado2 );
    }

    arvore_liberar( arv );

    return 0;
}

/* eof */

Testing with a file of registro records using 100.000 :

$ valgrind --leak-check=full --tool=memcheck --show-reachable=yes ./arvore_binaria 
==4657== Memcheck, a memory error detector
==4657== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==4657== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==4657== Command: ./arvore_binaria
==4657== 
Erro Inserindo Registro: chave=972927972, dado1=1173501325, dado2='DswGY'
Erro Inserindo Registro: chave=1942444306, dado1=291188190, dado2='QNQBgfoNflfwYwmXwgwZPAg'
Registro Encontrado: chave=1773986255, dado1=1078115008, dado2='tkxXNlSxbKnJpvXfCfaLSESAweeHdrdwBB'
==4657== 
==4657== HEAP SUMMARY:
==4657==     in use at exit: 0 bytes in 0 blocks
==4657==   total heap usage: 100,000 allocs, 100,000 frees, 102,398,536 bytes allocated
==4657== 
==4657== All heap blocks were freed -- no leaks are possible
==4657== 
==4657== For counts of detected and suppressed errors, rerun with: -v
==4657== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
    
22.01.2018 / 14:39
0

I have found a solution! When dynamically allocating my struct register it works normally for large records. I tested up to 1,000,000 records and the search ran normally, of course a bit slower as the number of records increased:

#define TAM_REG 1000000

int main(int argc, char** argv) {

    Registro *valor = (Registro *)malloc(TAM_REG * sizeof(Registro));

    ArvBin* raiz = cria_ArvBin();
    int i = 0;   
    FILE* arq = fopen("arquivo[1000000L].txt", "r");
    if(arq!=NULL){
        while(fscanf(arq, "%d %d %[^\n]s", &valor[i].chave, &valor[i].dado1, valor[i].dado2) != EOF) {
            insere_ArvBin(raiz, valor[i]);
            ++i;
        }
    }

    int busca = consulta_ArvBin(raiz, 1398063906);
    if(busca == 0)
        printf("Não encontrado!\n");
    else
        printf("Encontrado!\n");   

    libera_ArvBin(raiz);

    return 0;
}

As suspected the error is linked to memory allocation as the Resgit vector is large. I just can not understand why. My pc has 8gb of ram and I do not think there would be any problems in allocating memory at compile time (as I was doing before, changing the size of the registry vector with the TAM_REG variable) or at runtime, dynamically as I did now . If anyone could have a theoretical explanation for the issue I think it would be cool! Vlw!

    
22.01.2018 / 01:52