# 抓Yahoo新聞頭條新聞
# 2024/10/15
# 導入內容
import requests  # 用于发送HTTP请求
from bs4 import BeautifulSoup  # 用于解析HTML

# 设置目标网页的URL
url = 'https://tw.news.yahoo.com/'
# 发送GET请求获取网页内容
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
response = requests.get(url, headers=headers)
# 设置响应的编码为UTF-8，确保正确处理中文
response.encoding = 'utf-8'

# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, 'html.parser')

# 找到所有的新闻文章
articles = soup.find_all('li', class_='js-stream-content')

# 遍历并打印每个新闻文章的信息
for article in articles:
    # 尝试获取新闻标题
    title = article.find('h3')
    if title:
        print("标题:", title.text.strip())
    
    # 尝试获取新闻链接
    link = article.find('a')
    if link and 'href' in link.attrs:
        full_link = 'https://tw.news.yahoo.com' + link['href']
        print("链接:", full_link)
    
    # 尝试获取新闻来源和时间
    source_time = article.find('span', class_='C(#959595)')
    if source_time:
        print("来源和时间:", source_time.text.strip())
    
    # 尝试获取新闻摘要
    summary = article.find('p')
    if summary:
        print("摘要:", summary.text.strip())
    
    print("---")  # 分隔线，用于区分不同的新闻

# 如果没有找到任何新闻，打印提示信息
if not articles:
    print("未找到任何新闻文章，可能需要调整选择器或检查网页结构。")