Python procedural for Class problem

1

I'm having trouble turning a function into a class in python:

from bs4 import BeautifulSoup
from selenium import webdriver
import html2text



# driver.page_source = driver.get())#
def getPEP(strg):
    driver = webdriver.Firefox()
    driver.get(strg)
    html = driver.page_source
    driver.close()
    text=html2text.html2text(html)
    return(text);

# txt=getPEP('http://www.mtsamples.com/site/pages/sample.asp?type=3-Allergy%20/%20Immunology&sample=386-Allergic%20Rhinitis, Allergic Rhinitis')
# print(txt)


peps = open('PEP.txt', 'r')
lines = tuple(peps)
print(lines)
peps.close()
# i=1798 #http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation, Request For Consultation
for i in range(len(lines)):
    i=1798
    strg=lines[i].replace('\n','')
    text=getPEP(strg)# print(text)
    start = '#  '
    end = ', \n\n[ ![Join us on'
    cleaned=(text.split(start))[1].split(end)[0]
    # print(cleaned)
    file = open(str(i)+'.txt', 'w')
    file.write(cleaned.replace(' ** ','').replace('**',''))
    file.close()
    # print('arquivo ' + str(i) + ' gravado com sucesso')

I created a Class in python:

from bs4 import BeautifulSoup
from selenium import webdriver
import html2text

class ClassCrawler:

    def __init__(self):
        self.test = self.getPepFromInternt("http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation,")
        self.getAllPep('/home/angelica/Documents/gitbucket/mscangelica/dataset/LinksTomtsamples.txt')


    def getPepFromInternt(strg):
        driver = webdriver.Firefox()
        driver.get(strg)
        html = driver.page_source
        driver.close()
        text=html2text.html2text(html)
        return(text)

    def getAllPep(linksList):
        peps = open(linksList, 'r')
        lines = tuple(peps)
        print(lines)
        peps.close()
        # i=1798 #http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation, Request For Consultation
        for i in range(len(lines)):
            # i=1798 //when the connection is broken 
            strg=lines[i].replace('\n','')
            text= self.getPepFromInternt(strg)# print(text)
            start = '#  '
            end = ', \n\n[ ![Join us on'
            cleaned=(text.split(start))[1].split(end)[0]
            # print(cleaned)
            file = open(str(i)+'.txt', 'w')
            file.write(cleaned.replace(' ** ','').replace('**',''))
            file.close()

But when I call the class

>>> from ClassCrawlerPEP import ClassCrawler
>>> c = ClassCrawler()

generates the following error:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "~/PEPS/ClassCrawlerPEP.py", line 8, in __init__
    self.test = self.getPepFromInternt("http://www.mtsamples.com/site/pages/sample.asp?type=98-General%20Medicine&sample=487-Request%20For%20Consultation,")
TypeError: getPepFromInternt() takes 1 positional argument but 2 were given
    
asked by anonymous 12.11.2016 / 18:17

1 answer

1

In Python, the object itself is always passed as an implicit argument to its methods. So the interpreter complained that you had passed two arguments (itself and the string), but in the method definition there is only one parameter.

What you need is to only declare it explicitly ( self , in the usual pattern, but can be anything) as the first parameter of the class methods:

def getPepFromInternt(self, strg):

...

def getAllPep(self, linksList):
    
12.11.2016 / 19:09