python爬蟲範例(以google新聞2019為例)
import requests
from bs4 import BeautifulSoup
res = requests.get("https://news.google.com/topics/CAAqKggKIiRDQkFTRlFvSUwyMHZNRGx1YlY4U0JYcG9MVlJYR2dKVVZ5Z0FQAQ?hl=zh-TW&gl=TW&ceid=TW%3Azh-Hant")
#Google國際新聞的網址
soup = BeautifulSoup(res.text,"html.parser")
for title,url in zip(soup.select("h4"),soup.select("h4 > a[href]")):
#Google有時會換字體大小 像是h3改成現在的h4
print ('')
print (title.text)
a = url['href'].replace("./", "")
print ('https://news.google.com/'+a)
from bs4 import BeautifulSoup
res = requests.get("https://news.google.com/topics/CAAqKggKIiRDQkFTRlFvSUwyMHZNRGx1YlY4U0JYcG9MVlJYR2dKVVZ5Z0FQAQ?hl=zh-TW&gl=TW&ceid=TW%3Azh-Hant")
#Google國際新聞的網址
soup = BeautifulSoup(res.text,"html.parser")
for title,url in zip(soup.select("h4"),soup.select("h4 > a[href]")):
#Google有時會換字體大小 像是h3改成現在的h4
print ('')
print (title.text)
a = url['href'].replace("./", "")
print ('https://news.google.com/'+a)
留言
張貼留言