with open(r"C:\Users\ouoholly\Downloads\ACS.htm", encoding="utf-8") as f: soup = BeautifulSoup(f) c = soup.find_all(class_='title') for cc in c: txt = cc.text x = txt.strip() print(x)
抓取urls
抓取 h4 tag 下的連結
python
from bs4 import BeautifulSoup
with open(r"C:\Users\ouoholly\Downloads\ACS.htm", encoding="utf-8") as f: soup = BeautifulSoup(f) c = soup.select('h4 > a') for cc in c: print(cc.get('href') )
抓取某class下的連結
python
from bs4 import BeautifulSoup
with open(r"C:\Users\ouoholly\Downloads\ACS.htm", encoding="utf-8") as f: soup = BeautifulSoup(f) c = soup.select('.title > a') for cc in c: print(cc.get('href') )