Implement structural updates and optimizations across multiple modules

2025-06-23 15:51:01 +08:00 · 2025-05-10 23:21:36 +08:00 · 2025-05-10 23:21:36 +08:00 · bf38065e52
commit bf38065e52
parent 5d58016b1d
4 changed files with 19 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 # 新聞解析 / News Analyze

+![](https://hackatime-badge.hackclub.com/U087ATD163V/news-analyize)
+
 ## Why?

 我們使用這個新聞來舉例：
--- a/scraping/main.py
+++ b/scraping/main.py
@ -1,7 +1,18 @@
-from urllib.request import urlopen
+import time
+import re # Regular expressions
+from urllib.request import urlopen # URL request lib.
+from bs4 import BeautifulSoup # BeautifulSoup lib.

-url = "https://tw.news.yahoo.com/"
+headers = {
+    'User-Agent': 'NewsSceraperBot/1.0 (news.yuanhau.com)'
+}
+
+#url = "https://tw.news.yahoo.com/"
+url = "https://news.google.com/home?hl=zh-TW&gl=TW&ceid=TW:zh-Hant"

 page = urlopen(url)
+html_bytes = page.read()
+html = html_bytes.decode("utf-8")
+soup = BeautifulSoup(html, "html.parser")

-page
+print(soup.find_all("article"))
--- a/scraping/news.json
+++ b/scraping/news.json
--- a/scraping/requirements.txt
+++ b/scraping/requirements.txt
@ -1 +1,2 @@
 urlopen
+beautifulsoup4