Separate subsets of a base in R

1

I am having difficulty classifying get subsets of a given database. On a given line it has the experiment that indicates in which step those data belong, for example, the first line has the expression 1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1) and I need to create a column called ETAPA that all the data below it is written Etapa 1 until the line 2. BASES E REMUNERAÇÕES (ETAPA 2) which is written Etapa 2 in the column. The database follows below. The excess of NA is because the original base in excel has several columns merged and each step has a number of different columns.

EDIT: The number of lines in each Step is different, and below is only a sample of the original base. As an analysis of the dads of each step will be done monthly I would like to create a routine to separate the bases from stage step!

    structure(list(X__1 = c("1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)", 
"CNPJ", "03.066.219/0001-81", "03.066.219/0001-81", "04.809.688/0001-06", 
"2. BASES E REMUNERAÇÕES (ETAPA 2)", "Competência", "Março", 
"Março", "Março", "Março", "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)", 
"Competência", "Março", "Março", "Março", "Março"), X__2 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), X__3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), X__4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), X__5 = c(NA, NA, NA, NA, NA, 
NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
"AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro"
), X__6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), X__7 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    X__8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), X__9 = c(NA, "Nome", "Fundo Único de Previdência Social do Estado do Rio de Janeiro", 
    "Militares", "UENF - Universidade Estadual do Norte Fluminense Darcy Ribeiro", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__10 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__11 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__12 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__13 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__14 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__15 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__16 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__17 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__18 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__19 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__20 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__21 = c(NA, NA, NA, NA, NA, NA, 
    "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro", 
    NA, "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro"
    ), X__22 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__23 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__24 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__25 = c(NA, NA, NA, NA, NA, NA, "Referência", "PAT-SEG", 
    "SEG", "PAT-SEG", "SEG", NA, "Referência", "PAT-SEG", "SEG", 
    "PAT-SEG", "SEG"), X__26 = c(NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__27 = c(NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), X__28 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__29 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__30 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__31 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__32 = c(NA, NA, NA, NA, NA, NA, 
    "Remun. Bruta", "68881.39", "68881.39", "37259.550000000003", 
    "37259.550000000003", NA, "Data", "42089", "42094", "42090", 
    "42095"), X__33 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), X__34 = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__35 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__36 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, "Valor Original", "14727.4", "7363.7", "8197.9599999999991", 
    "4098.9799999999996"), X__37 = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), X__38 = c(NA, 
    NA, NA, NA, NA, NA, "Base de Cálculo", "61355.47", "61355.47", 
    "33024.04", "33024.04", NA, NA, NA, NA, NA, NA), X__39 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__40 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), X__41 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, "Dedução de Benefícios", NA, NA, 
    NA, NA), X__42 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), X__43 = c(NA, NA, NA, NA, NA, 
    NA, "Servidores", NA, "7", NA, "5", NA, NA, NA, NA, NA, NA
    ), X__44 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), X__45 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__46 = c(NA, 
    "Tipo", "Unidade Gestora", "Militares", "Administração Indireta (Autarquias e Fundações)", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__47 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__48 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, "Valores Compensados", NA, NA, NA, NA), X__49 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), X__50 = c(NA, NA, NA, NA, NA, NA, "Aposentados", NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), X__51 = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    X__52 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    "Acréscimos Legais", NA, NA, NA, NA), X__53 = c(NA, NA, NA, 
    NA, NA, NA, "Pensionistas", NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), X__54 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA), X__55 = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, "Total com Acréscimos", "14727.4", 
    "7363.7", "8197.9599999999991", "4098.9799999999996"), X__56 = c(NA, 
    NA, NA, NA, NA, NA, "Dependentes", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), X__57 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), X__58 = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    X__59 = c(NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("X__1", "X__2", "X__3", "X__4", 
"X__5", "X__6", "X__7", "X__8", "X__9", "X__10", "X__11", "X__12", 
"X__13", "X__14", "X__15", "X__16", "X__17", "X__18", "X__19", 
"X__20", "X__21", "X__22", "X__23", "X__24", "X__25", "X__26", 
"X__27", "X__28", "X__29", "X__30", "X__31", "X__32", "X__33", 
"X__34", "X__35", "X__36", "X__37", "X__38", "X__39", "X__40", 
"X__41", "X__42", "X__43", "X__44", "X__45", "X__46", "X__47", 
"X__48", "X__49", "X__50", "X__51", "X__52", "X__53", "X__54", 
"X__55", "X__56", "X__57", "X__58", "X__59"), row.names = c(NA, 
-17L), class = c("tbl_df", "tbl", "data.frame"))
    
asked by anonymous 25.07.2018 / 17:08

2 answers

4
fEtapas <- function(x) {
  linha <- c(grep('\(ETAPA [0-9]*)$', x[,1]), nrow(x))
  etapa <- rep(NA, nrow(x))
  for(i in 1:(length(linha)-1)) i -> etapa[linha[i]:linha[i+1]]
  x$etapa <- etapa
  return(x)
}

planilha <- fEtapas(planilha.original)

> planilha[c(1:2,59:60)]
                                                   X__1 X__2 X__59 etapa
1             1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)   NA  <NA>     1
2                                                  CNPJ   NA  <NA>     1
3                                    03.066.219/0001-81   NA  <NA>     1
4                                    03.066.219/0001-81   NA  <NA>     1
5                                    04.809.688/0001-06   NA  <NA>     1
6                     2. BASES E REMUNERAÇÕES (ETAPA 2)   NA  <NA>     2
7                                           Competência   NA  <NA>     2
8                                                 Março   NA  <NA>     2
9                                                 Março   NA  <NA>     2
10                                                Março   NA  <NA>     2
11                                                Março   NA  <NA>     2
12 3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)   NA  <NA>     3
13                                          Competência   NA  <NA>     3
14                                                Março   NA  <NA>     3
15                                                Março   NA  <NA>     3
16                                                Março   NA  <NA>     3
17                                                Março   NA  <NA>     3

It will work with any number of steps, assuming the steps are in sequence and always identified with the same pattern ("(STAGE XX)" at the end of the line). Putting the code inside a function lets you apply to any data.frame without having to copy and paste.

    
26.07.2018 / 04:39
1

The last column of the dataframe is the one of steps.

dados =  structure(list(X__1 = c("1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)", 
    "CNPJ", "03.066.219/0001-81", "03.066.219/0001-81", "04.809.688/0001-06", 
    "2. BASES E REMUNERAÇÕES (ETAPA 2)", "Competência", "Março", 
    "Março", "Março", "Março", "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)", 
    "Competência", "Março", "Março", "Março", "Março"), X__2 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), X__3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), X__4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), X__5 = c(NA, NA, NA, NA, NA, 
    NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
    NA, "Órgão/Entidade", "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGENERSA - Agência Reguladora de Energia e Saneamento Básico do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro", 
    "AGETRANSP - Agência Reguladora de Serv.Público de Transportes do Estado do Rio de Janeiro"
    ), X__6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), X__7 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_), 
        X__8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA), X__9 = c(NA, "Nome", "Fundo Único de Previdência Social do Estado do Rio de Janeiro", 
        "Militares", "UENF - Universidade Estadual do Norte Fluminense Darcy Ribeiro", 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__10 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__11 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__12 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__13 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__14 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__15 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__16 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__17 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__18 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__19 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__20 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__21 = c(NA, NA, NA, NA, NA, NA, 
        "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro", 
        NA, "Plano", "Financeiro", "Financeiro", "Financeiro", "Financeiro"
        ), X__22 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__23 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__24 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__25 = c(NA, NA, NA, NA, NA, NA, "Referência", "PAT-SEG", 
        "SEG", "PAT-SEG", "SEG", NA, "Referência", "PAT-SEG", "SEG", 
        "PAT-SEG", "SEG"), X__26 = c(NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__27 = c(NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
        ), X__28 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__29 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__30 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__31 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__32 = c(NA, NA, NA, NA, NA, NA, 
        "Remun. Bruta", "68881.39", "68881.39", "37259.550000000003", 
        "37259.550000000003", NA, "Data", "42089", "42094", "42090", 
        "42095"), X__33 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA), X__34 = c(NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__35 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__36 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, "Valor Original", "14727.4", "7363.7", "8197.9599999999991", 
        "4098.9799999999996"), X__37 = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), X__38 = c(NA, 
        NA, NA, NA, NA, NA, "Base de Cálculo", "61355.47", "61355.47", 
        "33024.04", "33024.04", NA, NA, NA, NA, NA, NA), X__39 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__40 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA), X__41 = c(NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, "Dedução de Benefícios", NA, NA, 
        NA, NA), X__42 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA), X__43 = c(NA, NA, NA, NA, NA, 
        NA, "Servidores", NA, "7", NA, "5", NA, NA, NA, NA, NA, NA
        ), X__44 = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_), X__45 = c(NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__46 = c(NA, 
        "Tipo", "Unidade Gestora", "Militares", "Administração Indireta (Autarquias e Fundações)", 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X__47 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__48 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, "Valores Compensados", NA, NA, NA, NA), X__49 = c(NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA), X__50 = c(NA, NA, NA, NA, NA, NA, "Aposentados", NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA), X__51 = c(NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
        X__52 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        "Acréscimos Legais", NA, NA, NA, NA), X__53 = c(NA, NA, NA, 
        NA, NA, NA, "Pensionistas", NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA), X__54 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA), X__55 = c(NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, "Total com Acréscimos", "14727.4", 
        "7363.7", "8197.9599999999991", "4098.9799999999996"), X__56 = c(NA, 
        NA, NA, NA, NA, NA, "Dependentes", NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA), X__57 = c(NA, NA, NA, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA), X__58 = c(NA, NA, NA, 
        NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
        X__59 = c(NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_)), .Names = c("X__1", "X__2", "X__3", "X__4", 
    "X__5", "X__6", "X__7", "X__8", "X__9", "X__10", "X__11", "X__12", 
    "X__13", "X__14", "X__15", "X__16", "X__17", "X__18", "X__19", 
    "X__20", "X__21", "X__22", "X__23", "X__24", "X__25", "X__26", 
    "X__27", "X__28", "X__29", "X__30", "X__31", "X__32", "X__33", 
    "X__34", "X__35", "X__36", "X__37", "X__38", "X__39", "X__40", 
    "X__41", "X__42", "X__43", "X__44", "X__45", "X__46", "X__47", 
    "X__48", "X__49", "X__50", "X__51", "X__52", "X__53", "X__54", 
    "X__55", "X__56", "X__57", "X__58", "X__59"), row.names = c(NA, 
    -17L), class = c("tbl_df", "tbl", "data.frame"))

x = dados$X__1 == "1.2. DADOS DOS ÓRGÃOS/ENTIDADES (ETAPA 1)"
y = dados$X__1 == "2. BASES E REMUNERAÇÕES (ETAPA 2)"
z = dados$X__1 == "3. CONTRIBUIÇÕES, APORTES E OUTROS VALORES (ETAPA 3)"

t = nrow(dados)
n = rep(1:t)

dados$etapa = c(rep('Etapa 1',n[y]-n[x]), rep('Etapa 2',n[z]-n[y]), rep('Etapa 3',(t-n[z])+1))
    
25.07.2018 / 19:24