dataset <- structure(list(PLACA = structure(c(5L, 5L, 5L, 4L, 1L, 2L, 3L,
7L, 6L, 8L), .Label = c("DSF9652", "EFR9618", "EQW6597", "ERB1522",
"EWM3539", "LOC1949", "LQQ5554", "OQT5917"), class = "factor"),
COD_REV = c(113195L, 113196L, 113197L, 113303L, 80719L, 80720L,
80722L, 113318L, 80788L, 113386L), DATA = structure(1:10, .Label = c("2016-01-14 12:13:00.000",
"2016-01-18 18:48:00.000", "2016-01-18 19:00:00.000", "2016-01-25 11:46:00.000",
"2016-01-25 19:20:00.000", "2016-01-25 19:28:00.000", "2016-01-25 19:33:00.000",
"2016-01-25 20:56:00.000", "2016-01-26 21:28:00.000", "2016-01-27 13:50:00.000"
), class = "factor"), KM_ATUAL = c(52100L, 52100L, 52100L,
110676L, 62300L, 31144L, 165022L, 41021L, 155646L, 55030L
), KM_MEDIA = c(0L, 42L, 40L, 20L, 17L, 18L, 120L, 100L,
10L, 38L)), .Names = c("PLACA", "COD_REV", "DATA", "KM_ATUAL",
"KM_MEDIA"), row.names = c(NA, -10L), class = "data.frame")
I have the above dataset and would like to group the boards to see how many visits the same customer did. So I need to calculate the difference between the dates and km_atual of visits, to compare with the field KM_media_dia and see the difference between these values. I can not figure out the difference between the dates. This was my attempt so far:
library(tidyverse)
# Carregando os datasets
dataset <- read_csv2("dados_atuais.csv")
dataset_revisao_km <- dataset %>%
# selecionar apenas colunas importantes
select(CPF, PLACA, COD_REV, DATA, KM_ATUAL) %>%
arrange(DATA) %>%
group_by(PLACA) %>%
mutate(ORDEM_REVISAO = row_number()) %>%
# manter apenas placas com mais de uma revisao
filter(n() > 1) %>%
mutate(DIFERENCA_KM = KM_ATUAL - lag(KM_ATUAL)) %>%
# filtrar fora a primeira revisao da placa
filter(ORDEM_REVISAO > 1)