Problems with dates in R

0

I am again having date problems in R. I have the dataset below and would like to calculate the date difference between the revisions. It turns out I'm not getting it, sometimes it returns some values I do not know where it comes from, sometimes the dataset appears empty. Can anyone help me?

structure(list(CPF = c(4.71e+13, 4.71e+13, 222146826, 10491445890, 
40557199816, 10799846821, 5702868838, 47011947868, 8.77e+13, 
3.9e+11, 2511422050, 45250618049, 26690683087, 50286986949, 83178880020, 
460414003, 52466078015, 2.31e+12, 10702601004, 18057438053), 
    PLACA = c("GJD-6942", "GJD-6942", "EVI-7442", "FSI-2944", 
    "FHY-4766", "FGK-0375", "EAA-3772", "FLE-1840", "EYH-8581", 
    "IVX2436", "IQL1145", "IIR3216", "INX8756", "ITI6522", "IRR7035", 
    "ISA8749", "IQB5774", "IUS4704", "ITL2283", "IOG3399"), COD_REV = c(563558L, 
    563559L, 563562L, 563567L, 563745L, 563961L, 564074L, 564184L, 
    564185L, 20313L, 20333L, 20338L, 20623L, 20627L, 20631L, 
    21107L, 21118L, 21121L, 21275L, 21284L), DATA = c("07/02/2018 15:32", 
    "07/02/2018 15:32", "07/02/2018 15:33", "07/02/2018 15:35", 
    "07/02/2018 16:56", "08/02/2018 08:21", "08/02/2018 09:26", 
    "08/02/2018 10:32", "08/02/2018 10:34", "11/04/2016 20:20", 
    "11/04/2016 20:55", "11/04/2016 21:05", "13/04/2016 20:41", 
    "13/04/2016 20:47", "13/04/2016 20:55", "16/04/2016 15:11", 
    "16/04/2016 15:39", "16/04/2016 15:45", "18/04/2016 20:57", 
    "18/04/2016 21:04"), KM_ATUAL = c(46250L, 46250L, 75425L, 
    70632L, 78257L, 176305L, 293761L, 54996L, 175200L, 32925L, 
    73251L, 151000L, 144500L, 57700L, 158000L, 52000L, 97200L, 
    47350L, 108000L, 129324L), KM_MEDIA = c(15000L, 15000L, 14000L, 
    23000L, 18500L, 35000L, 30L, 13000L, 32000L, 80L, 60L, 60L, 
    40L, 80L, 100L, 60L, 60L, 200L, 60L, 60L), TIPO_MEDIA = c("anl", 
    "anl", "anl", "anl", "anl", "anl", "dia", "dia", "anl", "NI", 
    "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI"
    ), KM_MED_DIA = c("41,0958904", "41,0958904", "38,3561644", 
    "63,0136986", "50,6849315", "95,890411", "30", "13000", "87,6712329", 
    "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", 
    "NI"), ESTILO_DIR = c("etd", "etd", "cdd", "etd", "cdd", 
    "etd", "cdd", "cdd", "etd", "NI", "NI", "NI", "NI", "NI", 
    "NI", "NI", "NI", "NI", "NI", "NI"), CAMBIO = c("atc", "atc", 
    "mnl", "atc", "mnl", "mnl", "mnl", "mnl", "atc", "NI", "NI", 
    "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI", "NI"), LOJA = c(9L, 
    9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 144L, 144L, 144L, 144L, 144L, 
    144L, 144L, 144L, 144L, 144L, 144L)), .Names = c("CPF", "PLACA", 
"COD_REV", "DATA", "KM_ATUAL", "KM_MEDIA", "TIPO_MEDIA", "KM_MED_DIA", 
"ESTILO_DIR", "CAMBIO", "LOJA"), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

Code used:

# Carregando os pacotes
library(tidyverse)
library(taRifx)
library(lubridate)

# Carregando os datasets
revisao <- read_csv2("rev_seg.csv")
colnames(revisao)
#revisao$DATA <- dmy_hms(revisao$DATA)

dif_km <- 
  revisao %>%
  select(CPF, PLACA, COD_REV, DATA, KM_ATUAL, KM_MEDIA, TIPO_MEDIA, KM_MED_DIA) %>% 
  arrange(DATA) %>% 
  mutate(KM_MED_DIA = as.numeric(KM_MED_DIA), CPF = as.character(CPF)) %>% 
  filter(KM_MED_DIA != "NULL") %>% 

  group_by(CPF,PLACA) %>%
  mutate(ORDEM_REV = row_number()) %>%
  filter(n() > 1) %>%
  mutate(DIF_KM = KM_ATUAL - lag(KM_ATUAL)) %>%
  mutate(DIF_DATA_DIAS = difftime(DATA, lag(DATA) , units = c("days"))) %>% 
  mutate(DIF_DATA_MIN = DIF_DATA_DIAS * 1440) %>%
  mutate(KM_RODADA_ESPERADA = KM_MED_DIA * DIF_DATA_DIAS) %>% 
  na.omit() %>%
  filter(ORDEM_REV > 1)
    
asked by anonymous 23.04.2018 / 16:10

1 answer

1

Diego, I believe the code below does what you asked for. I added a date conversion that you were not doing mutate(DATA = dmy_hm(DATA)) and also replacing the commas with mutate(KM_MED_DIA = gsub("\,","\.",KM_MED_DIA)) points. With the database sent by you only one car appears more than once and the two revisions are with the same date and time. If your test to filter% s of% s was incorrect ( NA ), to test cells with the correct NA is to use the filter(KM_MED_DIA != "NULL" function

dif_km <- 
  revisao %>%
  select(CPF, PLACA, COD_REV, DATA, KM_ATUAL, KM_MEDIA, TIPO_MEDIA, KM_MED_DIA) %>% 
  arrange(DATA) %>% 
  mutate(KM_MED_DIA = gsub("\,","\.",KM_MED_DIA)) %>%
  mutate(KM_MED_DIA = as.numeric(KM_MED_DIA), CPF = as.character(CPF)) %>%
  filter(is.na(KM_MED_DIA)==F) %>%
  group_by(CPF, PLACA) %>%
  mutate(ORDEM_REV = row_number()) %>%
  filter(n()>1) %>%
  mutate(DATA = dmy_hm(DATA)) %>%
  mutate(DIF_KM = KM_ATUAL - lag(KM_ATUAL)) %>%
  mutate(DIF_DATA_DIAS = difftime(DATA, lag(DATA) , units = c("days"))) %>% 
  mutate(DIF_DATA_MIN = DIF_DATA_DIAS * 1440) %>%
  mutate(KM_RODADA_ESPERADA = KM_MED_DIA * DIF_DATA_DIAS) %>% 
  na.omit() %>%
  filter(ORDEM_REV > 1)
    
27.04.2018 / 14:43