mirror of
https://github.com/hpware/news-analyze.git
synced 2025-06-23 15:51:01 +08:00
Implement structural updates and optimizations across multiple modules
This commit is contained in:
parent
5d58016b1d
commit
bf38065e52
@ -1,5 +1,7 @@
|
||||
# 新聞解析 / News Analyze
|
||||
|
||||

|
||||
|
||||
## Why?
|
||||
|
||||
我們使用這個新聞來舉例:
|
||||
|
@ -1,7 +1,18 @@
|
||||
from urllib.request import urlopen
|
||||
import time
|
||||
import re # Regular expressions
|
||||
from urllib.request import urlopen # URL request lib.
|
||||
from bs4 import BeautifulSoup # BeautifulSoup lib.
|
||||
|
||||
url = "https://tw.news.yahoo.com/"
|
||||
headers = {
|
||||
'User-Agent': 'NewsSceraperBot/1.0 (news.yuanhau.com)'
|
||||
}
|
||||
|
||||
#url = "https://tw.news.yahoo.com/"
|
||||
url = "https://news.google.com/home?hl=zh-TW&gl=TW&ceid=TW:zh-Hant"
|
||||
|
||||
page = urlopen(url)
|
||||
html_bytes = page.read()
|
||||
html = html_bytes.decode("utf-8")
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
page
|
||||
print(soup.find_all("article"))
|
||||
|
1
scraping/news.json
Normal file
1
scraping/news.json
Normal file
File diff suppressed because one or more lines are too long
@ -1 +1,2 @@
|
||||
urlopen
|
||||
beautifulsoup4
|
Loading…
x
Reference in New Issue
Block a user