R: How to count and sum the amount of a given "factor" in the observations (rows) of a data.frame?

3

Expensive, I would like to get the amount of "yes" (factor) in each row of a data.frame, like the one below. Would anyone know what arguments I would have to use to do this with the "mutate"? I tried it in different ways and I could not. I tried with:

  

Base = Base% >% mutate (Total_Sim =)

If anyone can help, I'll be very grateful!

    > dput(Base)
structure(list(ID = structure(1:100, .Label = c("110001", "110002", 
"110003", "110004", "110005", "110006", "110007", "110008", "110009", 
"110010", "110011", "110012", "110013", "110014", "110015", "110018", 
"110020", "110025", "110026", "110028", "110029", "110030", "110032", 
"110033", "110034", "110037", "110040", "110045", "110050", "110060", 
"110070", "110080", "110090", "110092", "110094", "110100", "110110", 
"110120", "110130", "110140", "110143", "110145", "110146", "110147", 
"110148", "110149", "110150", "110155", "110160", "110170", "110175", 
"110180", "120001", "120005", "120010", "120013", "120017", "120020", 
"120025", "120030", "120032", "120033", "120034", "120035", "120038", 
"120039", "120040", "120042", "120043", "120045", "120050", "120060", 
"120070", "120080", "130002", "130006", "130008", "130010", "130014", 
"130020", "130030", "130040", "130050", "130060", "130063", "130068", 
"130070", "130080", "130083", "130090", "130100", "130110", "130115", 
"130120", "130130", "130140", "130150", "130160", "130165", "130170"
), class = "factor"), Col_1 = structure(c(1L, 4L, 4L, 3L, 2L, 
1L, 4L, 4L, 3L, 2L, 1L, 2L, 4L, 3L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 
4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 1L, 
4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 3L, 4L, 4L, 
4L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 2L, 4L), .Label = c("NA", 
"Não", "Não disponível", "Sim"), class = "factor"), Col_2 = structure(c(4L, 
4L, 4L, 3L, 4L, 2L, 2L, 2L, 3L, 4L, 4L, 2L, 2L, 3L, 2L, 2L, 4L, 
4L, 2L, 4L, 2L, 4L, 2L, 4L, 3L, 3L, 2L, 4L, 4L, 4L, 3L, 4L, 4L, 
3L, 4L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 
4L, 2L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 4L, 4L, 
4L, 4L, 3L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 2L, 2L, 4L, 4L, 4L, 1L, 
4L, 3L, 2L, 4L, 1L, 4L, 2L, 2L, 1L, 4L, 4L, 2L, 1L, 2L, 2L, 1L, 
2L, 4L, 4L), .Label = c("NA", "Não", "Não disponível", "Sim"), class = "factor"), 
    Col_3 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L
    ), .Label = c("NA", "Não", "Sim"), class = "factor"), Col_4 = structure(c(3L, 
    3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 
    3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L), .Label = c("Não", "Não disponível", 
    "Sim"), class = "factor"), Col_5 = structure(c(4L, 4L, 4L, 
    2L, 4L, 2L, 2L, 4L, 2L, 4L, 1L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 
    2L, 4L, 2L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 
    3L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 
    4L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 2L, 4L, 
    4L, 4L, 4L, 4L, 2L, 3L, 2L, 4L, 4L, 4L, 4L, 2L, 4L, 2L, 2L, 
    2L, 4L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 
    4L, 2L, 4L, 4L, 2L, 4L, 4L), .Label = c("NA", "Não", "Não disponível", 
    "Sim"), class = "factor"), Col_6 = structure(c(2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Não", "Sim"), class = "factor"), 
    Col_7 = structure(c(4L, 4L, 2L, 4L, 2L, 4L, 3L, 4L, 4L, 4L, 
    4L, 2L, 4L, 3L, 2L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 1L, 1L, 
    1L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
    3L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L, 4L, 
    4L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 4L, 
    4L, 4L, 2L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L
    ), .Label = c("NA", "Não", "Não disponível", "Sim"), class = "factor"), 
    Col_8 = structure(c(3L, 3L, 1L, 1L, 3L, 3L, 2L, 3L, 1L, 3L, 
    3L, 3L, 1L, 2L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 
    3L, 1L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 3L, 1L, 3L, 
    2L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 2L, 1L, 1L, 
    3L, 1L, 3L, 2L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 
    1L, 3L, 3L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 
    1L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L
    ), .Label = c("Não", "Não disponível", "Sim"), class = "factor"), 
    Col_9 = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 
    2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 
    2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 
    3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 
    3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 
    2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L
    ), .Label = c("Ignorado", "Não", "Sim"), class = "factor"), 
    Col_10 = structure(c(3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 
    3L, 3L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
    2L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 
    2L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 
    3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 
    2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 
    3L), .Label = c("Não", "Não disponível", "Sim"), class = "factor"), 
    Total_Sim = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("ID", 
"Col_1", "Col_2", "Col_3", "Col_4", "Col_5", "Col_6", "Col_7", 
"Col_8", "Col_9", "Col_10", "Total_Sim"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -100L))
    
asked by anonymous 22.03.2018 / 22:20

3 answers

7

Using the dplyr package:

library(dplyr)
Base <- Base %>%
  mutate(Total_Sim = rowSums(. == "Sim"))
    
23.03.2018 / 13:12
4

Using the basic package of R:

# função para quantificar os 'Sim'
func <- function(x) length(which(base[x, c(2:11)] == 'Sim'))

# aplicar para todas as linhas
base$Total_Sim <- sapply(1: dim(base)[1], func)
    
23.03.2018 / 01:45
3

I think the simplest form is with rowSums . Since% comparisons result in logical values == that FALSE/TRUE encodes R , just add the values in each line.

rowSums(Base[, 2:11] == "Sim")
 #[1]  8  9  7  4  6  6  3  7  4  8  7  4  6  2  4  7  9  9  2 10  5  7  5  8  4
 #[26]  5  5  7  9 10  5  8  9  0  9  9  4  9  7  8  5  7  3  8  9  6  9  6  8  9
 #[51]  4  7  6  5  8  8  8  9  4 10  5  5  9  8 10  6 10  5  4  6  8  9  9  6  4
 #[76]  7  6  3  5  6  5  7  4  7  8  6  9  3  9  1  9  9  6  8  6  6  8  4  3  9

In addition to simplicity, it has the advantage of being very fast. The 0/1 function is programmed in rowSums and avoids the C cycles.

    
23.03.2018 / 12:39