From b62a3cda3d7c10d74bce7ec0549e218d33403f99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B3=E5=85=83=E7=9A=93?= Date: Sun, 18 May 2025 11:59:58 +0800 Subject: [PATCH] Add a few more database tables & made the api pull from the database when then article exists & not ddos line today, Also made a basic newsView with the api & the scraping script now scraps more stuff! :D --- components/app/windows/newsView.vue | 11 +++++++++++ createDatabase.ts | 24 +++++++++++++++++------- server/api/fetcharticle/[slug].ts | 28 ---------------------------- server/api/news/get/lt/[slug].ts | 26 ++++++++++++++++++++++++-- server/scrape/line_today.ts | 20 +++++++++++++++++--- server/scrape/save_scrape_data.ts | 15 +++++++++++++++ 6 files changed, 84 insertions(+), 40 deletions(-) create mode 100644 components/app/windows/newsView.vue delete mode 100644 server/api/fetcharticle/[slug].ts create mode 100644 server/scrape/save_scrape_data.ts diff --git a/components/app/windows/newsView.vue b/components/app/windows/newsView.vue new file mode 100644 index 0000000..f0ca405 --- /dev/null +++ b/components/app/windows/newsView.vue @@ -0,0 +1,11 @@ + + diff --git a/createDatabase.ts b/createDatabase.ts index 0d0b673..adb0857 100644 --- a/createDatabase.ts +++ b/createDatabase.ts @@ -38,13 +38,13 @@ CREATE TABLE IF NOT EXISTS chat_history ( const newsArticles = await sql` create table if not exists news_articles ( - uuid text primary key, - title text not null, - content text not null, - news_org text not null, - origin_link text not null, - author text, - related_uuid text not null +uuid text primary key, +title text not null, +content text not null, +news_org text not null, +origin_link text not null, +author text, +related_uuid text not null ) `; @@ -59,6 +59,16 @@ create table if not exists hot_news ( ) `; +const articlesLt = await sql` +create table if not exists articles_lt ( +uuid text primary key, +title text not null, +content text not null, +origin text not null, +author text, +) +`; + console.log("Creation Complete"); await sql.end(); diff --git a/server/api/fetcharticle/[slug].ts b/server/api/fetcharticle/[slug].ts deleted file mode 100644 index 1bfe5b9..0000000 --- a/server/api/fetcharticle/[slug].ts +++ /dev/null @@ -1,28 +0,0 @@ -import sql from "~/server/components/postgres"; -export default defineEventHandler(async (event) => { - const slug = getRouterParam(event, "slug"); - - // Validate and sanitize the slug - if (!slug || typeof slug !== "string") { - throw createError({ - statusCode: 400, - message: "Invalid slug parameter", - }); - } - const cleanSlug = slug.replace(/[^a-zA-Z0-9-_]/g, ""); - - try { - const result = await sql` - select * from articles - where slug = ${cleanSlug} - `; - - return result.rows[0] || null; - } catch (error) { - console.error("Database error:", error); - throw createError({ - statusCode: 500, - message: "Internal server error", - }); - } -}); diff --git a/server/api/news/get/lt/[slug].ts b/server/api/news/get/lt/[slug].ts index 329c8be..614430b 100644 --- a/server/api/news/get/lt/[slug].ts +++ b/server/api/news/get/lt/[slug].ts @@ -1,6 +1,28 @@ import lineToday from "~/server/scrape/line_today"; +import sql from "~/server/components/postgres"; +import saveDataToSql from "~/server/scrape/save_scrape_data"; + +function cleanUpSlug(orgslug: string) { + let slug = dirtySlug.trim(); + const validSlugRegex = /^[a-zA-Z0-9-]+$/; + if (!validSlugRegex.test(slug)) { + throw new Error("Invalid slug format"); + } + return slug; +} + export default defineEventHandler(async (event) => { const slug = getRouterParam(event, "slug"); - const data = await lineToday(slug); - return data; + const cleanSlug = await cleanUpSlug(slug); + const result = await sql` + select * from articles_lt + where slug = ${cleanSlug} + `; + if (result) { + return result; + } else { + const data = await lineToday(slug); + saveDataToSql(data, slug); + return data; + } }); diff --git a/server/scrape/line_today.ts b/server/scrape/line_today.ts index f10bdd9..b88d2cb 100644 --- a/server/scrape/line_today.ts +++ b/server/scrape/line_today.ts @@ -21,16 +21,30 @@ async function lineToday(slug: string) { // 加 await? no. // AHHH I NEED TO CHANGE TO SOMETHING ELSE. const html = cheerio.load(data); - const title = html("h1.entityTitle").text().replaceAll("\n", ""); - + const title = html("h1.entityTitle") + .text() + .replaceAll("\n", "") + .replace(" ", ""); const paragraph = html("article.news-content").text(); + const newsOrgdir = html("h4.entityPublishInfo-publisher") + .text() + .replaceAll("\n", "") + .replaceAll(" ", ""); + const author = html("span.entityPublishInfo-meta-info") + .text() + .replace(/更新於.*發布於.*•/g, "") + .replaceAll("\n", "") + .replaceAll(" ", ""); + return { title: title, paragraph: paragraph, + origin: newsOrgdir, + author: author, }; } // Texting on console only! -//console.log(await lineToday("oqmazXP")); +//console.log(await lineToday("kEJjxKw")); export default lineToday; diff --git a/server/scrape/save_scrape_data.ts b/server/scrape/save_scrape_data.ts new file mode 100644 index 0000000..c876945 --- /dev/null +++ b/server/scrape/save_scrape_data.ts @@ -0,0 +1,15 @@ +import postgres from "~/server/components/postgres"; +import { v4 as uuidv4 } from "uuid"; + +async function saveDataToSql( + data: { title: string; paragraph: string; author: string; origin: string }, + slug: string, +) { + const sql = postgres; + await sql` + INSERT INTO articles_lt (uuid, slug, title, content, author, origin) + VALUES (${uuidv4()}, ${slug}, ${data.title}, ${data.paragraph}, ${data.author}, ${data.origin}) + ON CONFLICT (slug) DO NOTHING + `; +} +export default saveDataToSql;