Implement structural updates and optimizations across multiple modules

This commit is contained in:
吳元皓 2025-05-10 23:21:36 +08:00
parent 5d58016b1d
commit bf38065e52
4 changed files with 19 additions and 4 deletions

View File

@ -1,5 +1,7 @@
# 新聞解析 / News Analyze
![](https://hackatime-badge.hackclub.com/U087ATD163V/news-analyize)
## Why?
我們使用這個新聞來舉例:

View File

@ -1,7 +1,18 @@
from urllib.request import urlopen
import time
import re # Regular expressions
from urllib.request import urlopen # URL request lib.
from bs4 import BeautifulSoup # BeautifulSoup lib.
url = "https://tw.news.yahoo.com/"
headers = {
'User-Agent': 'NewsSceraperBot/1.0 (news.yuanhau.com)'
}
#url = "https://tw.news.yahoo.com/"
url = "https://news.google.com/home?hl=zh-TW&gl=TW&ceid=TW:zh-Hant"
page = urlopen(url)
html_bytes = page.read()
html = html_bytes.decode("utf-8")
soup = BeautifulSoup(html, "html.parser")
page
print(soup.find_all("article"))

1
scraping/news.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -1 +1,2 @@
urlopen
urlopen
beautifulsoup4