dados1 <- c("10 ANOS DA POLÍTICA NACIONAL DE PROMOÇÃO DA SAÚDE: TRAJETÓRIAS E DESAFIOS", "4-CYCLOPROPYL-1-(1-METHYL-4-NITRO-1H-IMIDAZOL-5-YL)-1H-1,2,3-TRIAZOLE AND ETHYL 1-(1-METHYL-4-NITRO-1H-IMIDAZOL-5-YL)-1H-1,2,3-TRIAZOLE-4-CARBOXYLATE","7,7-DIMETHYLAPORPHINE AND OTHER ALKALOIDS FROM THE BARK OF", "ABSCESSO DO MÚSCULO PSOAS ASSOCIADO À INFECÇÃO POR MYCOBACTERIUM TUBERCULOSIS EM PACIENTE COM AIDS", "ABUNDANCE OF LUTZOMYIA LONGIPALPIS TESTE","ABUNDANCE OF LUTZOMYIA LONGIPALPIS", "ABUSO E DEPENDÊNCIA DE DROGAS NA PERSPECTIVA DA SAÚDE PÚBLICA (EDITORIAL)")
qualis <- c("A2", "B3", "A1", "B2", "A2", "A2", "A1")
m <- data.frame("Título da Produção" = dados1,
"Qualis" = qualis,
"Ano" = c(2010:2016))
The above df is only illustrative. Note that the fifth and sixth element of "data1" are pretty much the same thing, but since they are not written in the same way I can not use duplicated or unique.
Is there any other option to clean these lines, filtering by name?