mirror of
https://github.com/hpware/news-analyze.git
synced 2025-06-23 15:51:01 +08:00
feat: implement DraggableWindow component and update index.vue layout; enhance scraping scripts for better error handling and content extraction
This commit is contained in:
parent
8b07d4b3be
commit
b461e81360
87
components/DraggableWindow.vue
Normal file
87
components/DraggableWindow.vue
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, onMounted, onUnmounted } from 'vue'
|
||||||
|
|
||||||
|
const props = defineProps<{
|
||||||
|
title: string
|
||||||
|
initialX?: number
|
||||||
|
initialY?: number
|
||||||
|
width?: string
|
||||||
|
height?: string
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const emit = defineEmits(['close'])
|
||||||
|
|
||||||
|
const isDragging = ref(false)
|
||||||
|
const position = ref({
|
||||||
|
x: props.initialX || 100,
|
||||||
|
y: props.initialY || 100
|
||||||
|
})
|
||||||
|
const offset = ref({ x: 0, y: 0 })
|
||||||
|
|
||||||
|
const startDrag = (e: MouseEvent) => {
|
||||||
|
isDragging.value = true
|
||||||
|
offset.value = {
|
||||||
|
x: e.clientX - position.value.x,
|
||||||
|
y: e.clientY - position.value.y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const doDrag = (e: MouseEvent) => {
|
||||||
|
if (isDragging.value) {
|
||||||
|
position.value = {
|
||||||
|
x: e.clientX - offset.value.x,
|
||||||
|
y: e.clientY - offset.value.y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const stopDrag = () => {
|
||||||
|
isDragging.value = false
|
||||||
|
}
|
||||||
|
|
||||||
|
onMounted(() => {
|
||||||
|
document.addEventListener('mousemove', doDrag)
|
||||||
|
document.addEventListener('mouseup', stopDrag)
|
||||||
|
})
|
||||||
|
|
||||||
|
onUnmounted(() => {
|
||||||
|
document.removeEventListener('mousemove', doDrag)
|
||||||
|
document.removeEventListener('mouseup', stopDrag)
|
||||||
|
})
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<template>
|
||||||
|
<div
|
||||||
|
:style="{
|
||||||
|
left: `${position.x}px`,
|
||||||
|
top: `${position.y}px`,
|
||||||
|
width: props.width || '400px',
|
||||||
|
height: props.height || '300px'
|
||||||
|
}"
|
||||||
|
class="fixed bg-white dark:bg-gray-800 rounded-lg shadow-lg overflow-hidden"
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
@mousedown="startDrag"
|
||||||
|
class="bg-gray-700 p-2 cursor-move flex justify-between items-center"
|
||||||
|
>
|
||||||
|
<h3 class="font-semibold">{{ title }}</h3>
|
||||||
|
<div class="flex flex-row gap-1">
|
||||||
|
<button
|
||||||
|
@click="emit('close')"
|
||||||
|
class="p-1 hover:bg-gray-300 dark:hover:bg-gray-600 rounded"
|
||||||
|
>
|
||||||
|
━
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
@click="emit('close')"
|
||||||
|
class="p-1 rounded bg-red-500 text-white hover:bg-red-600 transition duration-200"
|
||||||
|
>
|
||||||
|
✕
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="p-4 text-black">
|
||||||
|
<slot></slot>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
178
layouts/macui.vue
Normal file
178
layouts/macui.vue
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
// No layout
|
||||||
|
|
||||||
|
// interfaces
|
||||||
|
interface currentNavBarInterface {
|
||||||
|
name: string;
|
||||||
|
icon: string;
|
||||||
|
action: any;
|
||||||
|
flash: boolean;
|
||||||
|
windowAssociated: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Import plugins
|
||||||
|
import { gsap } from "gsap";
|
||||||
|
import { TextPlugin } from "gsap/TextPlugin";
|
||||||
|
import { createApp } from "vue";
|
||||||
|
gsap.registerPlugin(TextPlugin);
|
||||||
|
|
||||||
|
// Import Windows
|
||||||
|
import SignIn from "~/components/app/windows/login.vue";
|
||||||
|
|
||||||
|
// Import Shadcn/UI components
|
||||||
|
import AlertComponent from "~/components/ui/alert/Alert.vue";
|
||||||
|
import ButtonComponent from "~/components/ui/button/Button.vue";
|
||||||
|
import DialogComponent from "~/components/ui/dialog/Dialog.vue";
|
||||||
|
import ProgressComponent from "~/components/ui/progress/Progress.vue";
|
||||||
|
import HoverCardComponent from "~/components/ui/hover-card/HoverCard.vue";
|
||||||
|
|
||||||
|
// Icons
|
||||||
|
import { ComputerDesktopIcon, UserIcon, LanguageIcon, ChevronRightIcon } from "@heroicons/vue/24/outline";
|
||||||
|
|
||||||
|
// i18n
|
||||||
|
const { t, locale, locales } = useI18n();
|
||||||
|
const switchLocalePath = useSwitchLocalePath();
|
||||||
|
const localePath = useLocalePath();
|
||||||
|
|
||||||
|
// Router
|
||||||
|
const router = useRouter();
|
||||||
|
const route = useRoute();
|
||||||
|
|
||||||
|
// values
|
||||||
|
const popMessage = ref(null);
|
||||||
|
const menuOpen = ref(false);
|
||||||
|
const langMenuOpen = ref(false);
|
||||||
|
const lang = ref(locale.value);
|
||||||
|
const alertOpen = ref(false);
|
||||||
|
const currentNavBar = ref<currentNavBarInterface[]>([]);
|
||||||
|
|
||||||
|
// Date
|
||||||
|
const currentDate = ref(
|
||||||
|
new Date().toLocaleDateString("zh-TW", {
|
||||||
|
month: "2-digit",
|
||||||
|
day: "2-digit",
|
||||||
|
year: "numeric",
|
||||||
|
hour: "2-digit",
|
||||||
|
minute: "2-digit",
|
||||||
|
second: "2-digit",
|
||||||
|
hour12: false,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
onMounted(() => {
|
||||||
|
setInterval(() => {
|
||||||
|
currentDate.value = new Date().toLocaleDateString("zh-TW", {
|
||||||
|
month: "2-digit",
|
||||||
|
day: "2-digit",
|
||||||
|
year: "numeric",
|
||||||
|
hour: "2-digit",
|
||||||
|
minute: "2-digit",
|
||||||
|
second: "2-digit",
|
||||||
|
hour12: false,
|
||||||
|
});
|
||||||
|
}, 1000);
|
||||||
|
});
|
||||||
|
|
||||||
|
// functions
|
||||||
|
const openWindow = (windowName?: string) => {
|
||||||
|
if (windowName === "leave") {
|
||||||
|
router.push(localePath("/home"));
|
||||||
|
}
|
||||||
|
console.log(windowName);
|
||||||
|
menuOpen.value = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unMinWindow = (windowName?: string) => {
|
||||||
|
console.log(windowName);
|
||||||
|
}
|
||||||
|
|
||||||
|
// menus
|
||||||
|
const menuItems = [
|
||||||
|
{ name: "Hot News", windowName: "hotnews"} ,
|
||||||
|
{ name: "News", windowName: "news"},
|
||||||
|
{ name: "Sources", windowName: "sources"},
|
||||||
|
{ name: 'About This Website', windowName: "about"},
|
||||||
|
{ name: 'Settings', windowName: "settings"},
|
||||||
|
{ name: 'Leave', windowName: "leave"},
|
||||||
|
]
|
||||||
|
const toggleMenu = () => {
|
||||||
|
menuOpen.value = !menuOpen.value
|
||||||
|
}
|
||||||
|
// Lang Menu
|
||||||
|
const toggleLangMenu = () => {
|
||||||
|
langMenuOpen.value = !langMenuOpen.value
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
<template>
|
||||||
|
<div
|
||||||
|
class="absolute inset-x-0 flex flex-row px-2 py-1 bg-[#7D7C7C]/70 text-white justify-between align-center text-center z-50"
|
||||||
|
>
|
||||||
|
<!--Menu container-->
|
||||||
|
<div class="flex flex-row g-2 text-gray-400 text-white z-999">
|
||||||
|
<button @click="toggleMenu" class="w-8 h-8 text-white hover:text-blue-500 transition-all duration-100 flex flex-row">
|
||||||
|
<ComputerDesktopIcon/>
|
||||||
|
</button>
|
||||||
|
<span class="ml-1 mr-2 text-[20px]">|</span>
|
||||||
|
<!--navbar icons for min and max application window-->
|
||||||
|
<button class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||||
|
</button>
|
||||||
|
<div v-for="item in currentNavBar" :key="item.name" class="flex flex-row items-center gap-x-2 hover:bg-gray-100 transition-all duration-100 px-4 py-2 cursor-pointer">
|
||||||
|
<button @click="unMinWindow(item.windowAssociated)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||||
|
<span>{{ item.name }}</span>
|
||||||
|
<span v-if="item.flash" class="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
|
||||||
|
<span v-if="item.icon" :class="item.icon">
|
||||||
|
</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="text-center align-middle justify-center text-white">{{ currentDate }}</div>
|
||||||
|
</div>
|
||||||
|
<div class="w-full h-[2.5em]"></div>
|
||||||
|
<!--Menu-->
|
||||||
|
<Transition
|
||||||
|
enter-active-class="animate__animated animate__fadeInDown animate_fast03"
|
||||||
|
leave-active-class="animate__animated animate__fadeOutUp animate_fast03"
|
||||||
|
>
|
||||||
|
<div class="m-2 p-2 bg-gray-800 shadow-lg w-fit rounded-[10px] v-998" v-if="menuOpen">
|
||||||
|
<div v-for="item in menuItems" :key="item.name" class="">
|
||||||
|
<button @click="openWindow(item.windowName)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
||||||
|
<span>{{ item.name }}</span>
|
||||||
|
<ChevronRightIcon class="w-4 h-4 justify-center align-center" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</Transition>
|
||||||
|
<!--Main desktop contents-->
|
||||||
|
<div
|
||||||
|
class="flex flex-col justify-center align-center text-center absolute w-full h-screen inset-x-0 inset-y-0 z-[-1]"
|
||||||
|
id="desktop"
|
||||||
|
>
|
||||||
|
</div>
|
||||||
|
<slot/>
|
||||||
|
<!--Footer-->
|
||||||
|
<div
|
||||||
|
class="absolute w-[calc(100% - 5px)] inset-x-0 bottom-0 mx-[1.5px] p-3 justify-between align-center flex flex-row"
|
||||||
|
>
|
||||||
|
<div class="">
|
||||||
|
<!--Lang-->
|
||||||
|
<span>Lang: </span>
|
||||||
|
<span class="text-lg">{{ t("localeflag") }}</span>
|
||||||
|
<button class="w-4 h-4 hover:text-blue-200 transition-all duration-100" @click="toggleLangMenu">
|
||||||
|
<LanguageIcon />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div class="gap-2 flex flex-row">
|
||||||
|
<!--版權資訊-->
|
||||||
|
<span class="text-sm">1.0.0</span>
|
||||||
|
<span class="text-sm">|</span>
|
||||||
|
<span class="text-sm">MIT License</span>
|
||||||
|
<span class="text-sm">|</span>
|
||||||
|
<span class="text-sm">{{ new Date().getFullYear() }} © yh</span>
|
||||||
|
</div>
|
||||||
|
<div class="">
|
||||||
|
<button @click="openWindow('login')" class="w-8 h-8 text-gray-400 flex flex-row">
|
||||||
|
<UserIcon class="w-8 h-8 text-gray-400 hover:text-blue-500 transition-all duration-100" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
@ -1,42 +1,8 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
// No layout
|
import DraggableWindow from "~/components/DraggableWindow.vue";
|
||||||
definePageMeta({
|
definePageMeta({
|
||||||
layout: false,
|
layout: "macui",
|
||||||
});
|
});
|
||||||
|
|
||||||
// interfaces
|
|
||||||
interface currentNavBarInterface {
|
|
||||||
name: string;
|
|
||||||
icon: string;
|
|
||||||
action: any;
|
|
||||||
flash: boolean;
|
|
||||||
windowAssociated: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Import plugins
|
|
||||||
import { gsap } from "gsap";
|
|
||||||
import { TextPlugin } from "gsap/TextPlugin";
|
|
||||||
import { createApp } from "vue";
|
|
||||||
gsap.registerPlugin(TextPlugin);
|
|
||||||
|
|
||||||
// Import Windows
|
|
||||||
import SignIn from "~/components/app/windows/login.vue";
|
|
||||||
|
|
||||||
// Import Shadcn/UI components
|
|
||||||
import AlertComponent from "~/components/ui/alert/Alert.vue";
|
|
||||||
import ButtonComponent from "~/components/ui/button/Button.vue";
|
|
||||||
import DialogComponent from "~/components/ui/dialog/Dialog.vue";
|
|
||||||
import ProgressComponent from "~/components/ui/progress/Progress.vue";
|
|
||||||
import HoverCardComponent from "~/components/ui/hover-card/HoverCard.vue";
|
|
||||||
|
|
||||||
// Icons
|
|
||||||
import { ComputerDesktopIcon, UserIcon, LanguageIcon, ChevronRightIcon } from "@heroicons/vue/24/outline";
|
|
||||||
|
|
||||||
// i18n
|
|
||||||
const { t, locale, locales } = useI18n();
|
|
||||||
const switchLocalePath = useSwitchLocalePath();
|
|
||||||
const localePath = useLocalePath();
|
|
||||||
|
|
||||||
// Router
|
// Router
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const route = useRoute();
|
const route = useRoute();
|
||||||
@ -55,139 +21,7 @@ watch(() => route.query.openapp, (newVal) => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// values
|
|
||||||
const popMessage = ref(null);
|
|
||||||
const menuOpen = ref(false);
|
|
||||||
const langMenuOpen = ref(false);
|
|
||||||
const lang = ref(locale.value);
|
|
||||||
const alertOpen = ref(false);
|
|
||||||
const currentNavBar = ref<currentNavBarInterface[]>([]);
|
|
||||||
|
|
||||||
// Date
|
|
||||||
const currentDate = ref(
|
|
||||||
new Date().toLocaleDateString("zh-TW", {
|
|
||||||
month: "2-digit",
|
|
||||||
day: "2-digit",
|
|
||||||
year: "numeric",
|
|
||||||
hour: "2-digit",
|
|
||||||
minute: "2-digit",
|
|
||||||
second: "2-digit",
|
|
||||||
hour12: false,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
onMounted(() => {
|
|
||||||
setInterval(() => {
|
|
||||||
currentDate.value = new Date().toLocaleDateString("zh-TW", {
|
|
||||||
month: "2-digit",
|
|
||||||
day: "2-digit",
|
|
||||||
year: "numeric",
|
|
||||||
hour: "2-digit",
|
|
||||||
minute: "2-digit",
|
|
||||||
second: "2-digit",
|
|
||||||
hour12: false,
|
|
||||||
});
|
|
||||||
}, 1000);
|
|
||||||
});
|
|
||||||
|
|
||||||
// functions
|
|
||||||
const openWindow = (windowName?: string) => {
|
|
||||||
if (windowName === "leave") {
|
|
||||||
router.push(localePath("/home"));
|
|
||||||
}
|
|
||||||
console.log(windowName);
|
|
||||||
menuOpen.value = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const unMinWindow = (windowName?: string) => {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// menus
|
|
||||||
const menuItems = [
|
|
||||||
{ name: "Hot News", windowName: "hotnews"} ,
|
|
||||||
{ name: "News", windowName: "news"},
|
|
||||||
{ name: "Sources", windowName: "sources"},
|
|
||||||
{ name: 'About This Website', windowName: "about"},
|
|
||||||
{ name: 'Settings', windowName: "settings"},
|
|
||||||
{ name: 'Leave', windowName: "leave"},
|
|
||||||
]
|
|
||||||
const toggleMenu = () => {
|
|
||||||
menuOpen.value = !menuOpen.value
|
|
||||||
}
|
|
||||||
// Lang Menu
|
|
||||||
const toggleLangMenu = () => {
|
|
||||||
langMenuOpen.value = !langMenuOpen.value
|
|
||||||
}
|
|
||||||
</script>
|
</script>
|
||||||
<template>
|
<template>
|
||||||
<div
|
<DraggableWindow title="Title">Hi This is a window</DraggableWindow>
|
||||||
class="absolute inset-x-0 flex flex-row px-2 py-1 bg-[#7D7C7C]/70 text-white justify-between align-center text-center z-50"
|
</template>
|
||||||
>
|
|
||||||
<!--Menu container-->
|
|
||||||
<div class="flex flex-row g-2 text-gray-400 text-white ">
|
|
||||||
<button @click="toggleMenu" class="w-8 h-8 text-white hover:text-blue-500 transition-all duration-100 flex flex-row">
|
|
||||||
<ComputerDesktopIcon/>
|
|
||||||
</button>
|
|
||||||
<span class="ml-1 mr-2 text-[20px]">|</span>
|
|
||||||
<!--navbar icons for min and max application window-->
|
|
||||||
<div v-for="item in currentNavBar" :key="item.name" class="flex flex-row items-center gap-x-2 hover:bg-gray-100 transition-all duration-100 px-4 py-2 cursor-pointer">
|
|
||||||
<button @click="unMinWindow(item.windowAssociated)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
|
||||||
<span>{{ item.name }}</span>
|
|
||||||
<span v-if="item.flash" class="animate-ping absolute inline-flex h-3 w-3 rounded-full bg-red-400 opacity-75"></span>
|
|
||||||
<span v-if="item.icon" :class="item.icon">
|
|
||||||
</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="text-center align-middle justify-center text-white">{{ currentDate }}</div>
|
|
||||||
</div>
|
|
||||||
<div class="w-full h-[2.5em]"></div>
|
|
||||||
<!--Menu-->
|
|
||||||
<Transition
|
|
||||||
enter-active-class="animate__animated animate__fadeInDown animate_fast03"
|
|
||||||
leave-active-class="animate__animated animate__fadeOutUp animate_fast03"
|
|
||||||
>
|
|
||||||
<div class="m-2 p-2 bg-gray-800 shadow-lg w-fit rounded-[10px]" v-if="menuOpen">
|
|
||||||
<div v-for="item in menuItems" :key="item.name" class="">
|
|
||||||
<button @click="openWindow(item.windowName)" class="flex flex-row items-center gap-x-2 text-gray-400 hover:text-gray-600 transition-all duration-100">
|
|
||||||
<span>{{ item.name }}</span>
|
|
||||||
<ChevronRightIcon class="w-4 h-4 justify-center align-center" />
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</Transition>
|
|
||||||
<!--Main desktop contents-->
|
|
||||||
<div
|
|
||||||
class="flex flex-col justify-center align-center text-center absolute w-full h-screen inset-x-0 inset-y-0 z-[-1]"
|
|
||||||
id="desktop"
|
|
||||||
>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
<!--Footer-->
|
|
||||||
<div
|
|
||||||
class="absolute w-[calc(100% - 5px)] inset-x-0 bottom-0 mx-[1.5px] p-3 justify-between align-center flex flex-row"
|
|
||||||
>
|
|
||||||
<div class="">
|
|
||||||
<!--Lang-->
|
|
||||||
<span>Lang: </span>
|
|
||||||
<span class="text-lg">{{ t("localeflag") }}</span>
|
|
||||||
<button class="w-4 h-4 hover:text-blue-200 transition-all duration-100" @click="toggleLangMenu">
|
|
||||||
<LanguageIcon />
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
<div class="gap-2 flex flex-row">
|
|
||||||
<!--版權資訊-->
|
|
||||||
<span class="text-sm">1.0.0</span>
|
|
||||||
<span class="text-sm">|</span>
|
|
||||||
<span class="text-sm">MIT License</span>
|
|
||||||
<span class="text-sm">|</span>
|
|
||||||
<span class="text-sm">{{ new Date().getFullYear() }} © yh</span>
|
|
||||||
</div>
|
|
||||||
<div class="">
|
|
||||||
<button @click="openWindow('login')" class="w-8 h-8 text-gray-400 flex flex-row">
|
|
||||||
<UserIcon class="w-8 h-8 text-gray-400 hover:text-blue-500 transition-all duration-100" />
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
@ -1,7 +1,25 @@
|
|||||||
# Status
|
# Status
|
||||||
|
|
||||||
## cna.py
|
|
||||||
Not working
|
|
||||||
|
|
||||||
## setn.py
|
## setn.py
|
||||||
Working
|
Working
|
||||||
|
|
||||||
|
## tvbs.py
|
||||||
|
Working
|
||||||
|
|
||||||
|
## taisounds.py
|
||||||
|
Working
|
||||||
|
|
||||||
|
## cna.py
|
||||||
|
Broken
|
||||||
|
|
||||||
|
Error: `Error: 'utf-8' codec can't decode byte 0x83 in position 0: invalid start byte`
|
||||||
|
|
||||||
|
## chinatimes.py
|
||||||
|
Broken
|
||||||
|
|
||||||
|
Error: `Error: 'utf-8' codec can't decode byte 0xa3 in position 0: invalid start byte`
|
||||||
|
|
||||||
|
## twreporter.py
|
||||||
|
Broken
|
||||||
|
|
||||||
|
Error: `Error: 'utf-8' codec can't decode byte 0xc0 in position 2: invalid start byte`
|
56
scraping/findText/chinatimes.py
Normal file
56
scraping/findText/chinatimes.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import re
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
import chardet
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import dotenv
|
||||||
|
import os
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
|
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*',
|
||||||
|
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Sec-Fetch-Dest': 'document',
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'Cache-Control': 'max-age=0',
|
||||||
|
}
|
||||||
|
|
||||||
|
url = "https://www.chinatimes.com/realtimenews/20250511002798-260407?chdtv"
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = Request(url, headers=headers)
|
||||||
|
response = urlopen(req)
|
||||||
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
|
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||||
|
html = gzip_file.read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
html = response.read().decode('utf-8')
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
title = soup.find('h1', class_='article-title')
|
||||||
|
title_text = title.text.strip() if title else "No title found"
|
||||||
|
|
||||||
|
article = soup.find('div', class_="article-body")
|
||||||
|
content = article.text.strip() if article else "No content found"
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"Title: {title_text}")
|
||||||
|
print(f"Content: {content}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {str(e)}")
|
||||||
|
if 'soup' in locals():
|
||||||
|
print("\nAvailable classes in HTML:")
|
||||||
|
for tag in soup.find_all(class_=True):
|
||||||
|
print(f"Tag: {tag.name}, Class: {tag['class']}")
|
61
scraping/findText/taisounds.py
Normal file
61
scraping/findText/taisounds.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import re
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
import chardet
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import dotenv
|
||||||
|
import os
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
|
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*',
|
||||||
|
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Sec-Fetch-Dest': 'document',
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'Cache-Control': 'max-age=0',
|
||||||
|
}
|
||||||
|
|
||||||
|
url = "https://www.taisounds.com/news/content/84/189872"
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = Request(url, headers=headers)
|
||||||
|
response = urlopen(req)
|
||||||
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
|
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||||
|
html = gzip_file.read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
html = response.read().decode('utf-8')
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
title = soup.find('h1')
|
||||||
|
title_text = title.text.strip() if title else "No title found"
|
||||||
|
|
||||||
|
#author = soup.find('div', class_='publish')
|
||||||
|
#author_text = author.text.strip().soup.find('a').text.strip() if author else "No author found"
|
||||||
|
|
||||||
|
article = soup.find('div', class_='news-box-text')
|
||||||
|
content = article.text.strip() if article else "No content found"
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"Title: {title_text}")
|
||||||
|
#print(f"Author: {author_text}")
|
||||||
|
print(f"Content: {content}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {str(e)}")
|
||||||
|
if 'soup' in locals():
|
||||||
|
print("\nAvailable classes in HTML:")
|
||||||
|
for tag in soup.find_all(class_=True):
|
||||||
|
print(f"Tag: {tag.name}, Class: {tag['class']}")
|
57
scraping/findText/tvbs.py
Normal file
57
scraping/findText/tvbs.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# BROKEN
|
||||||
|
import re
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
import chardet
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import dotenv
|
||||||
|
import os
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*',
|
||||||
|
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Sec-Fetch-Dest': 'document',
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'Cache-Control': 'max-age=0',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
url = "https://news.tvbs.com.tw/politics/2866915"
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = Request(url, headers=headers)
|
||||||
|
response = urlopen(req)
|
||||||
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
|
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||||
|
html = gzip_file.read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
html = response.read().decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
# Extract content
|
||||||
|
title = soup.find('h1', class_='title')
|
||||||
|
title_text = title.text.strip() if title else "No title found"
|
||||||
|
|
||||||
|
article = soup.find('div', class_="article_content")
|
||||||
|
paragraph = article.text.strip() if article else ""
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"Title: {title_text}")
|
||||||
|
print(f"Content: {paragraph}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {str(e)}")
|
57
scraping/findText/twreporter.py
Normal file
57
scraping/findText/twreporter.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# BROKEN
|
||||||
|
import re
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
import chardet
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import psycopg2
|
||||||
|
import pandas as pd
|
||||||
|
import dotenv
|
||||||
|
import os
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
#'User-Agent': 'NewsSceraperBot/1.0 (https://github.com/hpware/news-analyze)',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*',
|
||||||
|
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Sec-Fetch-Dest': 'document',
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'Sec-Fetch-Site': 'same-origin',
|
||||||
|
'Cache-Control': 'max-age=0',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
url = "https://www.twreporter.org/a/olena-yagupova-kidnapped-by-russian-soldiers"
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = Request(url, headers=headers)
|
||||||
|
response = urlopen(req)
|
||||||
|
if response.info().get('Content-Encoding') == 'gzip':
|
||||||
|
gzip_file = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
|
||||||
|
html = gzip_file.read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
html = response.read().decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
# Extract content
|
||||||
|
title = soup.find('div', class_=r'headline__DefaultContainer.*?')
|
||||||
|
title_text = title.text.strip() if title else "No title found"
|
||||||
|
|
||||||
|
article = soup.find('div', class_=r"article-page__ContentBlock.*?")
|
||||||
|
paragraph = article.text.strip() if article else ""
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
print(f"Title: {title_text}")
|
||||||
|
print(f"Content: {paragraph}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {str(e)}")
|
Loading…
x
Reference in New Issue
Block a user