發表文章

目前顯示的是有「爬蟲」標籤的文章

python爬蟲範例(以google新聞2019為例)

import requests from bs4 import BeautifulSoup res = requests.get("https://news.google.com/topics/CAAqKggKIiRDQkFTRlFvSUwyMHZNRGx1YlY4U0JYcG9MVlJYR2dKVVZ5Z0FQAQ?hl=zh-TW&gl=TW&ceid=TW%3Azh-Hant") #Google國際新聞的網址 soup = BeautifulSoup(res.text,"html.parser") for title,url in zip(soup.select("h4"),soup.select("h4 > a[href]")):     #Google有時會換字體大小 像是h3改成現在的h4     print ('')     print (title.text)     a = url['href'].replace("./", "")        print ('https://news.google.com/'+a)

python爬蟲範例(mobile01 爬蟲電腦版為例)

import requests from bs4 import BeautifulSoup page = int(input("請輸入要擷取的頁數")) for i in range(1,page+1):     res = requests.get("https://www.mobile01.com/forumtopic.php?c=17&p="+ str(i))     soup = BeautifulSoup(res.text,"html.parser")     for title in soup.select(".topic_gen"):         print ("==============")         url = str(title.get('href'))         print ("[標題]:"+title.text,"\n"+"https://www.mobile01.com/"+url)