I have code that goes into a website, fills in a form and pulls a table, however, I want to delete some rows from this table that I do not need.
Let's go to the code:
#library's
require(RCurl)
require(XML)
require(stringr)
require(plyr)
require(rvest)
require(xlsx)
#previa
info <- debugGatherer()
handle <- getCurlHandle(cookiejar = "",
followlocation = TRUE,
autoreferer = TRUE,
debugfunc = info$update,
verbose = TRUE,
httpheader = list(
from = "[email protected]",
'user-agent' = str_c(R.version$version.string,
", ", R.version$platform)
))
xmlAttrsToDF <- function(parsedHTML, xpath) {
x <- xpathApply(parsedHTML, xpath, xmlAttrs)
x <- lapply(x, function(x) as.data.frame(t(x)))
do.call(rbind.fill, x)
}
#website
url <- "http://www.ceagesp.gov.br/entrepostos/servicos/cotacoes/"
html_form <- getURL(url, curl = handle)
parsed_form <- htmlParse(html_form)
pgsession <-html_session(url)
pgform <-html_form(pgsession)[[3]]
xmlAttrsToDF(parsed_form, "//form")
######LEGUMES######
res2 <- postForm(uri = str_c(url, "#cotacao"),
curl = handle,
style = "POST",
"cot_grupo" = "legumes",
"cot_data" = "06/03/2017")
legumes <- readHTMLTable(res2)
legumes <- data.frame(legumes)
legumes <- subset(legumes, select = c(NULL.V1, NULL.V2, NULL.V5, NULL.V7))
It generates this table when I type legumes
:
NULL.V1 NULL.V2 NULL.V5 NULL.V7
1 Produto Classificação Comun Quilo
2 ABOBORA JAPONESA - 1,54 1
3 ABOBORA SECA - 1,46 1
4 ABOBORA MORANGA - 1,21 1
5 ABOBORA PAULISTA - 1,49 1
6 ABOBRINHA BRASILEIRA EXTRA AA 2,25 1
7 ABOBRINHA BRASILEIRA EXTRA A 1,97 1
8 ABOBRINHA BRASILEIRA EXTRA 1,5 1
9 ABOBRINHA ITALIANA EXTRA AA 2,8 1
10 ABOBRINHA ITALIANA EXTRA A 2,39 1
11 ABOBRINHA ITALIANA EXTRA 1,82 1
12 BATATA DOCE AMARELA EXTRA AA 3,11 1
13 BATATA DOCE AMARELA EXTRA A 2,66 1
14 BATATA DOCE AMARELA EXTRA 2,17 1
15 BATATA DOCE ROSADA EXTRA AA 1,86 1
16 BATATA DOCE ROSADA EXTRA A 1,66 1
17 BATATA DOCE ROSADA EXTRA 1,4 1
18 BERINJELA CONSERVA EXTRA 4,5 1
19 BERINJELA CONSERVA ESPECIAL 3,5 1
20 BERINJELA CONSERVA PRIMEIRA 2,5 1
21 BERINJELA JAPONESA EXTRA 3,64 1
22 BERINJELA JAPONESA ESPECIAL 3,03 1
23 BERINJELA JAPONESA PRIMEIRA 2,34 1
24 BERINJELA EXTRA AA 1,88 1
25 BERINJELA EXTRA A 1,55 1
26 BERINJELA EXTRA 1,2 1
27 BETERRABA EXTRA AA 1,82 1
28 BETERRABA EXTRA A 1,6 1
29 BETERRABA EXTRA 1,33 1
30 CARA EXTRA A 3,21 1
31 CARA EXTRA 2,63 1
32 CENOURA EXTRA AA 2,11 1
33 CENOURA EXTRA A 1,81 1
34 CENOURA EXTRA 1,55 1
35 CHUCHU EXTRA AA 1,78 1
36 CHUCHU EXTRA A 1,55 1
37 CHUCHU EXTRA 1,23 1
38 ERVILHA TORTA EXTRA AA 20,49 1
39 ERVILHA TORTA EXTRA A 15,24 1
40 ERVILHA TORTA EXTRA 11,55 1
41 TOMATE SWEET GRAPE EXTRA AA 9,16 1
42 GENGIBRE - 3,26 1
43 INHAME EXTRA A 3,4 1
44 INHAME EXTRA 2,86 1
45 INHAME ESPECIAL 2,33 1
46 JILO REDONDO EXTRA AA 2,27 1
47 JILO REDONDO EXTRA A 1,85 1
48 JILO REDONDO EXTRA 1,46 1
49 MANDIOCA GRAUDA 2,11 1
50 MANDIOCA MEDIA 1,83 1
51 MANDIOCA MIUDA 1,54 1
52 MANDIOQUINHA EXTRA AAA 4,21 1
53 MANDIOQUINHA EXTRA AA 3,49 1
54 MANDIOQUINHA EXTRA A 2,81 1
55 MAXIXE - 3,06 1
56 PEPINO CAIPIRA EXTRA AA 1,86 1
57 PEPINO CAIPIRA EXTRA A 1,57 1
58 PEPINO CAIPIRA EXTRA 1,28 1
59 PEPINO COMUM EXTRA AA 1,9 1
60 PEPINO COMUM EXTRA A 1,54 1
61 PEPINO COMUM EXTRA 1,24 1
62 PEPINO JAPONES EXTRA AA 2,69 1
63 PEPINO JAPONES EXTRA A 2,23 1
64 PEPINO JAPONES EXTRA 1,82 1
65 PIMENTA * VERMELHA EXTRA AA 3,8 1
66 PIMENTA * VERMELHA EXTRA A 3,2 1
67 PIMENTA CAMBUCI EXTRA AA 3,48 1
68 PIMENTA CAMBUCI EXTRA A 2,83 1
69 PIMENTA VERDE AMERICANA EXTRA AA 3,27 1
70 PIMENTA VERDE AMERICANA EXTRA A 2,82 1
71 PIMENTA VERDE AMERICANA EXTRA 2,34 1
72 PIMENTAO VERDE EXTRA AA 2,71 1
73 PIMENTAO VERDE EXTRA A 2,17 1
74 PIMENTAO VERDE EXTRA 1,73 1
75 PIMENTAO VERMELHO EXTRA AA 3,33 1
76 PIMENTAO VERMELHO EXTRA A 2,81 1
77 PIMENTAO VERMELHO EXTRA 2,27 1
78 PIMENTAO AMARELO EXTRA AA 2,65 1
79 QUIABO LISO EXTRA AA 4,25 1
80 QUIABO LISO EXTRA A 3,7 1
81 QUIABO LISO EXTRA 3,17 1
82 TOMATE ITALIANO EXTRA AAA 3,2 1
83 TOMATE ITALIANO EXTRA AA 2,57 1
84 TOMATE ITALIANO EXTRA A 2,01 1
85 TOMATE ACHATADO-REDOND EXTRA AAA 3,05 1
86 TOMATE ACHATADO-REDOND EXTRA AA 2,5 1
87 TOMATE ACHATADO-REDOND EXTRA A 2 1
88 TOMATE OBLONGO EXTRA AAA 2,99 1
89 TOMATE OBLONGO EXTRA AA 2,45 1
90 TOMATE OBLONGO EXTRA A 1,82 1
91 TOMATE CAQUI EXTRA AA 8,09 1
92 TOMATE CEREJA EXTRA AA 5,71 1
93 VAGEM MACARRAO CURTA EXTRA AA 5,07 1
94 VAGEM MACARRAO CURTA EXTRA A 3,97 1
95 VAGEM MACARRAO CURTA EXTRA 3,13 1
96 VAGEM MANTEIGA EXTRA AA 4,4 1
97 VAGEM MANTEIGA EXTRA A 3,8 1
98 VAGEM MANTEIGA EXTRA 3,2 1
However, I want only 2 items of this table (excluding the rest, because I want to pass Excel), and I want to select them by name, because sometimes this table is messed up.
I would like to select the products MANDIOCA and TOMATO ACHATADO-REDOND, with MEDIA and EXTRA A ratings, respectively.
That's it, if somebody has an idea how to make my code cleaner, I appreciate it!