빅데이터/Scrapy
BeautifulShop 크롤링
Petabyte
2020. 1. 5. 15:19
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
def getTitle(url):
try:
html = urlopen(url)
except HTTPError as e:
print(e)
pass
try:
bsObj = BeautifulSoup(html.read(),"html.parser")
#span class 가 red 이거나 green 인 경우
#nameList = bsObj.findAll("span",{"class":{"green","red"}})
#for name in nameList:
# content = name.get_text()
# print(content)
# text 중에 the prince 가 몇번 들어가있는지 len 으로 갯수 확인
#nameList = bsObj.findAll(text="the prince")
#print(len(nameList))
#id 가 text 인 값을 모두 찾아서 출력. 배열로 들어가기떄문에 선택적 출력을 할경우는 배열번지 선택
allText = bsObj.findAll(id='text')
print(allText)
#print(allText[0].get_text())
except AttributeError as e:
print(e)
pass
print('Search Start=================')
url = 'http://www.pythonscraping.com/pages/warandpeace.html'
getTitle(url)
print('Search End=================')