mirror of
https://github.com/hpware/news-analyze.git
synced 2025-06-23 15:51:01 +08:00
THE LINE TODAY ONE WORKS, WOOOOOOOO
This commit is contained in:
parent
e4d2b4840a
commit
cbec267f7b
@ -119,7 +119,7 @@ onMounted(() => {
|
|||||||
></span
|
></span
|
||||||
></span>
|
></span>
|
||||||
<div class="flex flex-row justify-center align-center gap-0s">
|
<div class="flex flex-row justify-center align-center gap-0s">
|
||||||
<NuxtLink :to="localePath('/app/desktop/?')">
|
<NuxtLink :to="localePath('/app/desktop')">
|
||||||
<button
|
<button
|
||||||
class="m-4 mr-1 ml-1 bg-[#8C9393] text-white p-3 rounded-[10px] bg-gradient-to-l from-sky-500 to-purple-600 transition-all duration-150 hover:transform hover:scale-105 hover:shadow-lg"
|
class="m-4 mr-1 ml-1 bg-[#8C9393] text-white p-3 rounded-[10px] bg-gradient-to-l from-sky-500 to-purple-600 transition-all duration-150 hover:transform hover:scale-105 hover:shadow-lg"
|
||||||
>
|
>
|
||||||
|
File diff suppressed because it is too large
Load Diff
56
scraping/platforms/line_today.py
Normal file
56
scraping/platforms/line_today.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import re
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
import chardet
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import dotenv
|
||||||
|
import os
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*',
|
||||||
|
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Sec-Fetch-Dest': 'document',
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'Cache-Control': 'max-age=0',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
url = "https://today.line.me/tw/v2/article/oqmazXP"
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = Request(url, headers=headers)
|
||||||
|
response = urlopen(req)
|
||||||
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
|
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||||
|
html = gzip_file.read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
html = response.read().decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
# Extract content
|
||||||
|
title = soup.find('h1', class_="entityTitle")
|
||||||
|
title_text = title.text.strip() if title else "No title found"
|
||||||
|
|
||||||
|
article = soup.find('article', class_="news-content")
|
||||||
|
paragraph = article.text.strip() if article else ""
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"Title: {title_text}")
|
||||||
|
print(f"Content: {paragraph}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {str(e)}")
|
Loading…
x
Reference in New Issue
Block a user