Good afternoon everyone!
It is as follows: I found a script in Python 2.7 but I have version 3.6. As I am new in this scope, I wanted to work manually to convert this script. Here is the code below:
### Modified for naming pics by tag
import requests,urllib
import shutil
import re
import os
import os.path
import time
from bs4 import BeautifulSoup
def getnopages(noofpages):
for link in alla:
href = str(link.get('href'))
if href.find('page') >=0:
currpages = int((re.findall('\d+',href))[-1])
if currpages > noofpages:
noofpages = currpages
return noofpages
def getlink(url):
soup = BeautifulSoup(urllib.request import urlopen(url))
alla = soup.findAll("img")
for link in alla:
href = str(link.get('src'))
if href.startswith('http://images'):
parts = re.search('(.*\/\d*\/)(thumb.*?(\d+\.\w+))',href).groups()
href = parts[0] + parts[-1]
item_name = "item_"+parts[-1].split(".")[0]
atmp = soup.find("div", { 'id':item_name} ).findNext('span', { 'class' : 'thumb-info-big' })
if atmp == None:
atmp = soup.find("div", { 'id':item_name} ).findNext('span', { 'class' : 'thumb-info-small' })
if atmp == None:
atmp = soup.find("div", { 'id':item_name} ).findNext('span', { 'class' : 'thumb-info-medium' })
pic_ex = "." + parts[-1].split(".")[1]
pic_name = atmp.contents[-2].string
print ("href")
downloadable.append([href, pic_name, pic_ex])
def downloadfiles(downloadable):
no = 0
DEFAULT_DIRECTORY = str(os.getcwd()) + "/pics"
os.chdir(DEFAULT_DIRECTORY)
for item in downloadable[0:]:
pic_url = item[0]
pic_name = item[1]
pic_ex = item[2]
print(item)
print (str(no) + " from " + str(len(downloadable)) + " pictures")
response = requests.get(pic_url, stream=True)
outputDirectory = pic_name
if not os.path.exists(outputDirectory):
os.makedirs(outputDirectory)
os.chdir(DEFAULT_DIRECTORY +"/" + outputDirectory)
with open(pic_name+str(time.time())+pic_ex, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
os.chdir(DEFAULT_DIRECTORY)
no+=1
del response
#url = 'http://wall.alphacoders.com/search.php?search=avril&page=1'
#url = 'http://wall.alphacoders.com/by_collection.php?id=565&page=1'
url = 'http://wall.alphacoders.com/by_category.php?id=7&name=Celebrity+Wallpapers&page=1'
baseurl = url[0:url.find('page=')+5]
print(baseurl)
r = requests.get(url)
soup = BeautifulSoup(r.content)
alla = soup.find_all("a")
noofpages = -1
noofpages = getnopages(noofpages)
print(noofpages)
downloadable = []
for each in range(1,noofpages):
getlink (baseurl+str(each))
print(len(downloadable))
print (str(each) + " from " + (str(noofpages) + " pages"))
downloadfiles(downloadable)
Do you have any knowledge of where I get a base with command equivalencies?
If you have the knowledge of conversion, teach me!
Thanks, guys! :)
Update 1:
I've reviewed the code and have corrected most of the code syntax errors, but I'm still experiencing problems in the last few lines:
for each in range(1,noofpages):
getlink (baseurl+str(each))
print(len(downloadable))
print (str(each) + " from " + (str(noofpages) + " pages"))
It has a syntax error in the getlink, so I made the following change:
for each in range(1,noofpages):
print(getlink(baseurl+str(each)))
print(len(downloadable)
print(str(each) + "from" + (str(noofpages) + "pages"))
Still it continues to accuse syntax error in itself