Hi!
I'm a beginner in R programming, I've read quite a lot of confusing with libraries and everything and I still do not know what their limitations are, so I ask you if it's possible with R to do the following:
I put in file * txt list of videos of youtube, I import list for R, it puts in data.frame, changes link to " link ..." and then download the entire list automatically.
What they have is that you can only download it at a time, and you need to button.
Can you automate all this with R, or do I need to learn another programming language?
Back, but still very clumsy, as I am a beginner: (
# importar_listas
# fiz assim para importar várias listas se necessário
# mas já topei que dá erro se tiver algum *txt temporário
# ou directório dentro desse.
# já por isso também deixei nome dos *txt sem espcaços
# alguma dica?
# buscar nome dos ficheiros no directório "youtube"
nome_ficheiros <- list.files(path="youtube/.")
# cola directorio + nome de *txt
ficheiro_t <- paste("youtube",nome_ficheiros,sep="/")
[1] "motivacao_desportiva.txt" "natureza_linda.txt"
[3] "pegadinhas_divertidas.txt"
# cria as listas de links em cada *txt
lista_ficheiros <- lapply(ficheiro_t, read.table, stringsAsFactors = FALSE)
[1] "youtube/motivacao_desportiva.txt" "youtube/natureza_linda.txt"
[3] "youtube/pegadinhas_divertidas.txt"
# cria tabela com todos links, ordenados por linha
db_yt <- do.call(rbind, lista_ficheiros)
[[1]]
V1
1 https://www.youtube.com/watch?v=C-Lq6FTwRWI
2 https://www.youtube.com/watch?v=0fVdAsTp0EU
3 https://www.youtube.com/watch?v=kYSdIQ16UxY
4 https://www.youtube.com/watch?v=gHaP_oJa_l0
[[2]] ...
Here began the confusion, as I did not succeed with download, I created several "urls"
# com todas as variaçoes que encontrei
for(i in seq(nrow(db_yt))) {
# transforma "youtube"em "ssyoutube.com"
db_yt[i,2] = gsub(".*www.", "http://ss", db_yt[i,1])
# transforma_em_http://en.savefrom.net/#url=
db_yt[i,3] = gsub(".*www.", "http://en.savefrom.net/#url=", db_yt[i,1])
# transforma_em_link_completo_site_download
db_yt[i,4] = paste("http://en.savefrom.net/#url=", db_yt[i,1], "&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com", sep = "")
}
V1
1 https://www.youtube.com/watch?v=C-Lq6FTwRWI
2 https://www.youtube.com/watch?v=0fVdAsTp0EU
...
V2
1 http://ssyoutube.com/watch?v=C-Lq6FTwRWI
2 http://ssyoutube.com/watch?v=0fVdAsTp0EU
...
V3
1 http://en.savefrom.net/#url=youtube.com/watch?v=C-Lq6FTwRWI
2 http://en.savefrom.net/#url=youtube.com/watch?v=0fVdAsTp0EU
V4
1 http://en.savefrom.net/#url=https://www.youtube.com/watch?v=C-Lq6FTwRWI&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com
2 http://en.savefrom.net/#url=https://www.youtube.com/watch?v=0fVdAsTp0EU&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com
...
# ainda criei este vector para usar link original do *txt
save_url <-c("http://en.savefrom.net/#url=", "&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com")
# importar bibliotecas
library(xml2)
library(rvest)
library(httr)
# simulador_de_browser
uastring <- "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
# site de download (1º link "completo")
url <- "http://en.savefrom.net/#url=https://www.youtube.com/watch?v=PWkIoMOBiKE&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com"
# site para usar
site <- html_session(url, user_agent(uastring))
# descobrir o form
form <- site %>% html_form()
# só existe 1, logo
form <- form[[1]]
here is great trouble, because it looks like it's inside iframe and I can not get there
# nomear_ficheiro_para_gravar
# se analisar o html e fizer search pelo "nome do video" simplesmente não aparece :(
nome <- site %>% html_nodes(xpath='//*[@id="answer-28575576"]/table/tbody/tr[1]/td[2]/div/pre') %>% html_text()
# submeter dados
dados <- set_values(form = form,
sf_url="http://youtube.com/watch?v=Q84boOmiPW8"
)
download <- submit_form(site, dados)
download.file(url, paste0(download, destfile="mp4", method="wget", mode = "wb"))
# se estivesse tudo funcionando
for(i in seq(nrow(db_yt))) {
# site de download - 1º link
url <- db_yt[i,4]
# site para usar
site <- html_session(url, user_agent(uastring))
# demora um pouco a carregar página
Sys.sleep(20)
# submeter dados
dados <- set_values(form = form,
sf_url=db_yt[i,1]
)
download <- submit_form(site, dados)
download.file(url, paste0(download, destfile="mp4", method="wget", mode = "wb"))
# nova pausa
Sys.sleep(40)
}