I am again having date problems in R. I have the dataset below and would like to calculate the date difference between the revisions. It turns out I'm not getting it, sometimes it returns some values I do not know where it comes from, sometimes the dataset appears empty. Can anyone help me?
structure(list(CPF = c(4.71e+13, 4.71e+13, 222146826, 10491445890,
40557199816, 10799846821, 5702868838, 47011947868, 8.77e+13,
3.9e+11, 2511422050, 45250618049, 26690683087, 50286986949, 83178880020,
460414003, 52466078015, 2.31e+12, 10702601004, 18057438053),
PLACA = c("GJD-6942", "GJD-6942", "EVI-7442", "FSI-2944",
"FHY-4766", "FGK-0375", "EAA-3772", "FLE-1840", "EYH-8581",
"IVX2436", "IQL1145", "IIR3216", "INX8756", "ITI6522", "IRR7035",
"ISA8749", "IQB5774", "IUS4704", "ITL2283", "IOG3399"), COD_REV = c(563558L,
563559L, 563562L, 563567L, 563745L, 563961L, 564074L, 564184L,
564185L, 20313L, 20333L, 20338L, 20623L, 20627L, 20631L,
21107L, 21118L, 21121L, 21275L, 21284L), DATA = c("07/02/2018 15:32",
"07/02/2018 15:32", "07/02/2018 15:33", "07/02/2018 15:35",
"07/02/2018 16:56", "08/02/2018 08:21", "08/02/2018 09:26",
"08/02/2018 10:32", "08/02/2018 10:34", "11/04/2016 20:20",
"11/04/2016 20:55", "11/04/2016 21:05", "13/04/2016 20:41",
"13/04/2016 20:47", "13/04/2016 20:55", "16/04/2016 15:11",
"16/04/2016 15:39", "16/04/2016 15:45", "18/04/2016 20:57",
"18/04/2016 21:04"), KM_ATUAL = c(46250L, 46250L, 75425L,
70632L, 78257L, 176305L, 293761L, 54996L, 175200L, 32925L,
73251L, 151000L, 144500L, 57700L, 158000L, 52000L, 97200L,
47350L, 108000L, 129324L), KM_MEDIA = c(15000L, 15000L, 14000L,
23000L, 18500L, 35000L, 30L, 13000L, 32000L, 80L, 60L, 60L,
40L, 80L, 100L, 60L, 60L, 200L, 60L, 60L), TIPO_MEDIA = c("anl",
"anl", "anl", "anl", "anl", "anl", "dia", "dia", "anl", "NI",
"NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI"
), KM_MED_DIA = c("41,0958904", "41,0958904", "38,3561644",
"63,0136986", "50,6849315", "95,890411", "30", "13000", "87,6712329",
"NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI",
"NI"), ESTILO_DIR = c("etd", "etd", "cdd", "etd", "cdd",
"etd", "cdd", "cdd", "etd", "NI", "NI", "NI", "NI", "NI",
"NI", "NI", "NI", "NI", "NI", "NI"), CAMBIO = c("atc", "atc",
"mnl", "atc", "mnl", "mnl", "mnl", "mnl", "atc", "NI", "NI",
"NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI"), LOJA = c(9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 144L, 144L, 144L, 144L, 144L,
144L, 144L, 144L, 144L, 144L, 144L)), .Names = c("CPF", "PLACA",
"COD_REV", "DATA", "KM_ATUAL", "KM_MEDIA", "TIPO_MEDIA", "KM_MED_DIA",
"ESTILO_DIR", "CAMBIO", "LOJA"), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Code used:
# Carregando os pacotes
library(tidyverse)
library(taRifx)
library(lubridate)
# Carregando os datasets
revisao <- read_csv2("rev_seg.csv")
colnames(revisao)
#revisao$DATA <- dmy_hms(revisao$DATA)
dif_km <-
revisao %>%
select(CPF, PLACA, COD_REV, DATA, KM_ATUAL, KM_MEDIA, TIPO_MEDIA, KM_MED_DIA) %>%
arrange(DATA) %>%
mutate(KM_MED_DIA = as.numeric(KM_MED_DIA), CPF = as.character(CPF)) %>%
filter(KM_MED_DIA != "NULL") %>%
group_by(CPF,PLACA) %>%
mutate(ORDEM_REV = row_number()) %>%
filter(n() > 1) %>%
mutate(DIF_KM = KM_ATUAL - lag(KM_ATUAL)) %>%
mutate(DIF_DATA_DIAS = difftime(DATA, lag(DATA) , units = c("days"))) %>%
mutate(DIF_DATA_MIN = DIF_DATA_DIAS * 1440) %>%
mutate(KM_RODADA_ESPERADA = KM_MED_DIA * DIF_DATA_DIAS) %>%
na.omit() %>%
filter(ORDEM_REV > 1)