From cb34764c275a21e08ef0adc1deb27919bb5d329a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B3=E5=85=83=E7=9A=93?= Date: Tue, 27 May 2025 22:06:42 +0800 Subject: [PATCH] Get UpdateAt & PublishedAt and remove testing comment. --- server/scrape/line_today.ts | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/server/scrape/line_today.ts b/server/scrape/line_today.ts index d93039c..e723719 100644 --- a/server/scrape/line_today.ts +++ b/server/scrape/line_today.ts @@ -1,5 +1,25 @@ import * as cheerio from "cheerio"; +function findTime(timeText: string) { + const now = new Date(); + + const hourMatch = timeText.match(/(\d+)小時前/); + const dayMatch = timeText.match(/(\d+)天前/); + const minuteMatch = timeText.match(/(\d+)分鐘前/); + if (hourMatch) { + const hoursAgo = parseInt(hourMatch[1]); + return new Date(now.getTime() - hoursAgo * 60 * 60 * 1000); + } else if (dayMatch) { + const daysAgo = parseInt(dayMatch[1]); + return new Date(now.getTime() - daysAgo * 24 * 60 * 60 * 1000); + } else if (minuteMatch) { + const minutesAgo = parseInt(minuteMatch[1]); + return new Date(now.getTime() - minutesAgo * 60 * 1000); + } + + return null; +} + async function lineToday(slug: string) { const url = "https://today.line.me/tw/v2/article/" + slug; const fetchPageCode = await fetch(url, { @@ -46,6 +66,7 @@ async function lineToday(slug: string) { .text() .replaceAll("\n", "") .replaceAll(" ", ""); + let author = ""; const authorInfo = html("span.entityPublishInfo-meta-info") .text() @@ -57,16 +78,26 @@ async function lineToday(slug: string) { } else { author = authorInfo; } + const orgAuthorDateData = html("span.entityPublishInfo-meta-info").text() + const updateMatch = orgAuthorDateData.match(/更新於\s*([^•]+)/); + const publishMatch = orgAuthorDateData.match(/發布於\s*(.+)$/); +let updatedAt: Date | null = null; + if (updateMatch) { + updatedAt = findTime(updateMatch[1].trim()); + } +let publishedAt: Date | null = null; + if (publishMatch) { + publishedAt = findTime(publishMatch[1].trim()); + } return { title: title, paragraph: paragraph, origin: newsOrgdir, author: author, images: images, + updateat: updatedAt, + publishedat: publishedAt }; } -// Texting on console only! -//console.log(await lineToday("wJyR8Nw")); - export default lineToday;