빅데이터/Scrapy

BeautifulShop 크롤링

Petabyte 2020. 1. 5. 15:19

 

 

from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup

def getTitle(url):

    try:
        html = urlopen(url)
    except HTTPError as e:
        print(e)
        pass

    try:
        bsObj = BeautifulSoup(html.read(),"html.parser")
        #span class 가 red 이거나 green 인 경우
        #nameList = bsObj.findAll("span",{"class":{"green","red"}})
        #for name in nameList:
        #    content = name.get_text()
        #    print(content)

        # text 중에 the prince 가 몇번 들어가있는지 len 으로 갯수 확인
        #nameList = bsObj.findAll(text="the prince")
        #print(len(nameList))


        #id 가 text 인 값을 모두 찾아서 출력. 배열로 들어가기떄문에 선택적 출력을 할경우는 배열번지 선택
        allText = bsObj.findAll(id='text')
        print(allText)
        #print(allText[0].get_text())

    except AttributeError as e:
        print(e)
        pass


print('Search Start=================')
url = 'http://www.pythonscraping.com/pages/warandpeace.html'

getTitle(url)


print('Search End=================')