In some situations like this problem you can study the page code a bit and make the calls that javascript would make.
Here's a class I've implemented that mimics this. It is in Python 2.7. If you debug each function it is easy to understand the path.
# anime.py
from bs4 import BeautifulSoup
import urllib2
import re
class Anime2MP4(object):
anime_zero_url = 'http://www.animesproject.com.br/serie/885/2162/Death-Parade-Episodio-00' # noqa
anime_url_format = 'http://www.animesproject.com.br/playerv52/player.php?a=0&0={0}&1={1}' # noqa
def build_episode_url(self, url_parameters):
return self.anime_url_format.format(*url_parameters)
def get_episodes_url(self):
webpage = urllib2.urlopen(self.anime_zero_url)
soup = BeautifulSoup(webpage)
id_tag = 'serie_lista_episodios'
episodes = soup.find(id=id_tag).find_all('a', href=True)
return [ep['href']for ep in episodes]
def get_parameters(self):
pars = []
for ep in self.episodes:
ep_split = ep.split('/')
pars.append((ep_split[2], ep_split[3]))
return pars
def get_mp4_episode(self, url, quality='MQ'):
"""
quality: Pode ser HD ou MQ
"""
webpage = urllib2.urlopen(url)
html_content = webpage.read()
pattern = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' # noqa
urls = set(re.findall(pattern, html_content)) # unique urls
urls = filter(lambda s: s.endswith('.mp4'), urls) # only .mp4
return next((url for url in urls if quality in url), None)
def run(self):
self.episodes = self.get_episodes_url()
list_episode = map(self.build_episode_url, self.get_parameters())
mp4_links = map(self.get_mp4_episode, list_episode)
for num, ep in enumerate(mp4_links):
print num, ep
if __name__ == '__main__':
anime = Anime2MP4()
anime.run()