#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FreshRSS 文章备份脚本 - 简化版
"""

import argparse
import json
import os
import re
import sys
import time
from datetime import datetime
from pathlib import Path
from urllib.parse import urlparse
import html

try:
    import requests
    from bs4 import BeautifulSoup
except ImportError:
    print("需要安装：pip install requests beautifulsoup4")
    sys.exit(1)


def sanitize_filename(name):
    name = re.sub(r'[<>:"/\\|？*]', '_', name)
    name = name.strip('. ')
    return name[:200] if len(name) > 200 else (name or 'unnamed')


def download_image(session, url, save_path, timeout=30):
    try:
        if not url or url.startswith('data:'):
            return False
        if os.path.exists(save_path):
            return True
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = session.get(url, headers=headers, timeout=timeout, stream=True)
        if response.status_code == 200:
            with open(save_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            return True
    except Exception as e:
        print(f"  图片下载失败：{url}")
    return False


def extract_images(session, content, article_id, images_dir):
    if not content:
        return content, 0
    
    soup = BeautifulSoup(content, 'html.parser')
    images = soup.find_all('img')
    downloaded = 0
    
    for i, img in enumerate(images):
        img_url = img.get('src') or img.get('data-src')
        if not img_url or any(x in img_url.lower() for x in ['icon', 'logo', 'favicon']):
            continue
        
        parsed = urlparse(img_url)
        ext = os.path.splitext(parsed.path)[1] or '.jpg'
        img_filename = f"{article_id}_img{i}{ext}"
        img_path = images_dir / img_filename
        
        if download_image(session, img_url, str(img_path)):
            img['src'] = f"../images/{img_filename}"
            downloaded += 1
    
    return str(soup) if downloaded > 0 else content, downloaded


def get_auth_token(base_url, username, api_password):
    url = f"{base_url}/api/greader.php/accounts/ClientLogin"
    params = {'Email': username, 'Passwd': api_password}
    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception(f"认证失败：{response.status_code}")
    auth_line = response.text.strip().split('\n')[2]
    return auth_line.split('=')[1]


def get_subscriptions(base_url, auth_token):
    url = f"{base_url}/api/greader.php/reader/api/0/subscription/list"
    headers = {'Authorization': f'GoogleLogin auth={auth_token}'}
    params = {'output': 'json'}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    
    data = response.json()
    subscriptions = []
    for sub in data.get('subscriptions', []):
        name = sub.get('title', 'Unknown')
        categories = sub.get('categories', [])
        category_name = '未分类'
        for cat in categories:
            cat_id = cat.get('id', '')
            if 'label/' in cat_id:
                category_name = cat_id.split('/')[-1]
                break
        
        subscriptions.append({
            'id': sub.get('id', ''),
            'name': name,
            'url': sub.get('htmlUrl', ''),
            'rss_url': sub.get('url', ''),
            'category': category_name
        })
    
    return subscriptions


def get_article_ids(base_url, auth_token, feed_id, date_str):
    from datetime import timedelta
    date_obj = datetime.strptime(date_str, '%Y-%m-%d')
    start_ts = int(date_obj.timestamp())
    end_ts = int((date_obj + timedelta(days=1)).timestamp())
    article_ids = []
    continuation = None
    
    while True:
        url = f"{base_url}/api/greader.php/reader/api/0/stream/items/ids"
        headers = {
            'Authorization': f'GoogleLogin auth={auth_token}',
            'Content-Type': 'application/x-www-form-urlencoded'
        }
        
        data = {'s': feed_id, 'ot': 'json', 'n': 1000}
        if start_ts:
            data['ck'] = start_ts
        if end_ts:
            data['ct'] = end_ts
        if continuation:
            data['c'] = continuation
        
        response = requests.post(url, headers=headers, data=data)
        if response.status_code != 200:
            break
        
        result = response.json()
        item_refs = result.get('itemRefs', [])
        
        for item in item_refs:
            article_id = item.get('id', '').split('/')[-1]
            article_ids.append(article_id)
        
        continuation = result.get('continuation')
        if not continuation:
            break
        
        time.sleep(0.3)
    
    return article_ids


def get_article_content(base_url, auth_token, article_id):
    url = f"{base_url}/api/greader.php/reader/api/0/stream/items/contents"
    headers = {
        'Authorization': f'GoogleLogin auth={auth_token}',
        'Content-Type': 'application/x-www-form-urlencoded'
    }
    data = {
        'i': f'tag:google.com,2005:reader/item/{article_id}',
        'output': 'json'
    }
    
    response = requests.post(url, headers=headers, data=data)
    if response.status_code != 200:
        return None
    
    result = response.json()
    items = result.get('items', [])
    if not items:
        return None
    
    item = items[0]
    title = item.get('title', '无标题')
    
    author = '未知作者'
    if 'origin' in item:
        author = item['origin'].get('author', author)
    
    published = item.get('published', 0)
    published_str = datetime.fromtimestamp(published).strftime('%Y-%m-%d %H:%M:%S') if published else ''
    
    link = ''
    for alternate in item.get('alternate', []):
        if alternate.get('type') == 'text/html':
            link = alternate.get('href', '')
            break
    
    content = ''
    for c in item.get('content', []):
        if c.get('type') == 'text/html':
            content = c.get('content', '')
            break
    
    if not content:
        for s in item.get('summary', []):
            if s.get('type') == 'text/html':
                content = s.get('content', '')
                break
    
    tags = []
    for category in item.get('categories', []):
        if 'label/' in category:
            tags.append(category.split('/')[-1])
    
    return {
        'id': article_id,
        'title': title,
        'author': author,
        'link': link,
        'published': published_str,
        'content': content,
        'tags': tags
    }


def save_article(article, feed_dir, images_dir, session, download_images):
    article_id = article['id']
    title = sanitize_filename(article['title']) or '无标题'
    article_path = feed_dir / f"{article_id}.html"
    
    content = article['content']
    img_count = 0
    if download_images and content:
        content, img_count = extract_images(session, content, article_id, images_dir)
    
    tags_html = ''
    if article['tags']:
        tags_html = '<div class="tags">标签：' + ''.join(
            f'<span class="tag">{html.escape(tag)}</span>' for tag in article['tags']
        ) + '</div>'
    
    html_content = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{html.escape(title)}</title>
    <style>
        body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; line-height: 1.6; max-width: 800px; margin: 0 auto; padding: 20px; background: #f9f9f9; }}
        .article {{ background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }}
        h1 {{ color: #333; font-size: 1.8em; margin-bottom: 10px; }}
        .meta {{ color: #666; font-size: 0.9em; margin-bottom: 20px; padding-bottom: 15px; border-bottom: 1px solid #eee; }}
        .meta a {{ color: #007bff; text-decoration: none; }}
        .content {{ color: #333; font-size: 1.1em; }}
        .content img {{ max-width: 100%; height: auto; display: block; margin: 15px auto; border-radius: 4px; }}
        .content a {{ color: #007bff; }}
        .tags {{ margin-top: 20px; padding-top: 15px; border-top: 1px solid #eee; }}
        .tag {{ display: inline-block; background: #e7f3ff; color: #007bff; padding: 3px 10px; border-radius: 12px; font-size: 0.85em; margin-right: 8px; margin-bottom: 5px; }}
        .back-link {{ display: inline-block; margin-bottom: 20px; color: #007bff; text-decoration: none; }}
    </style>
</head>
<body>
    <a href="index.html" class="back-link">← 返回列表</a>
    <div class="article">
        <h1>{html.escape(title)}</h1>
        <div class="meta">
            <div>作者：<strong>{html.escape(article['author'])}</strong></div>
            <div>发布时间：<time>{article['published']}</time></div>
            <div>原文：<a href="{html.escape(article['link'])}" target="_blank">查看</a></div>
        </div>
        <div class="content">{content}</div>
        {tags_html}
    </div>
</body>
</html>
"""
    
    with open(article_path, 'w', encoding='utf-8') as f:
        f.write(html_content)
    
    return article_path, img_count


def main():
    parser = argparse.ArgumentParser(description='FreshRSS 备份')
    parser.add_argument('--url', required=True)
    parser.add_argument('--user', required=True)
    parser.add_argument('--api-password', required=True)
    parser.add_argument('--date', required=True)
    parser.add_argument('--output-dir', required=True)
    parser.add_argument('--download-images', action='store_true')
    
    args = parser.parse_args()
    
    backup_dir = Path(args.output_dir)
    backup_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"开始备份 {args.date}")
    
    session = requests.Session()
    session.headers.update({'User-Agent': 'Mozilla/5.0'})
    
    print("认证中...")
    auth_token = get_auth_token(args.url, args.user, args.api_password)
    print("✅ 认证成功")
    
    print("获取订阅列表...")
    subscriptions = get_subscriptions(args.url, auth_token)
    print(f"找到 {len(subscriptions)} 个订阅源")
    
    feeds_data = []
    total_articles = 0
    total_images = 0
    
    for feed in subscriptions:
        feed_name = sanitize_filename(feed['name'])
        print(f"\n备份：{feed_name}")
        
        feed_dir = backup_dir / feed_name
        feed_dir.mkdir(parents=True, exist_ok=True)
        
        images_dir = feed_dir / 'images'
        images_dir.mkdir(exist_ok=True)
        
        article_ids = get_article_ids(args.url, auth_token, feed['id'], args.date)
        
        if not article_ids:
            print("  无文章")
            feeds_data.append({'name': feed_name, 'article_count': 0, 'articles': []})
            continue
        
        print(f"  找到 {len(article_ids)} 篇文章")
        
        articles_data = []
        for i, aid in enumerate(article_ids, 1):
            article = get_article_content(args.url, auth_token, aid)
            if not article:
                continue
            
            article_path, img_count = save_article(
                article, feed_dir, images_dir, session, args.download_images
            )
            
            articles_data.append({
                'id': article['id'],
                'title': article['title'],
                'author': article['author'],
                'link': article['link'],
                'published': article['published'],
                'file': article_path.name
            })
            
            total_articles += 1
            total_images += img_count
            
            if i % 20 == 0:
                print(f"  进度：{i}/{len(article_ids)}")
            
            time.sleep(0.2)
        
        # 生成索引
        article_links = ''
        for art in articles_data:
            article_links += f'''
        <li class="article-item">
            <a href="{art['file']}">{html.escape(art['title'])}</a>
            <div class="article-meta">
                作者：{html.escape(art['author'])} | 时间：{art['published']} | 
                <a href="{html.escape(art['link'])}" target="_blank">原文</a>
            </div>
        </li>'''
        
        index_html = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <title>{html.escape(feed_name)} - FreshRSS 备份</title>
    <style>
        body {{ font-family: -apple-system, sans-serif; margin: 20px; background: #f5f5f5; }}
        .container {{ max-width: 1000px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; }}
        h1 {{ color: #333; border-bottom: 2px solid #007bff; padding-bottom: 10px; }}
        .article-list {{ list-style: none; padding: 0; }}
        .article-item {{ margin: 15px 0; padding: 15px; background: #f8f9fa; border-radius: 4px; border-left: 3px solid #007bff; }}
        .article-item a {{ color: #007bff; text-decoration: none; font-weight: 500; }}
        .article-meta {{ color: #666; font-size: 0.85em; margin-top: 8px; }}
        .back-link {{ display: inline-block; margin-bottom: 20px; color: #007bff; text-decoration: none; }}
    </style>
</head>
<body>
    <div class="container">
        <a href="../index.html" class="back-link">← 返回</a>
        <h1>{html.escape(feed_name)}</h1>
        <p>共 <strong>{len(articles_data)}</strong> 篇文章</p>
        <ul class="article-list">{article_links}</ul>
    </div>
</body>
</html>
"""
        
        with open(feed_dir / 'index.html', 'w', encoding='utf-8') as f:
            f.write(index_html)
        
        feeds_data.append({
            'name': feed_name,
            'article_count': len(articles_data),
            'articles': articles_data
        })
        
        time.sleep(0.5)
    
    with open(backup_dir / 'feeds.json', 'w', encoding='utf-8') as f:
        json.dump(feeds_data, f, ensure_ascii=False, indent=2)
    
    print(f"\n✅ 完成！")
    print(f"文章：{total_articles}")
    print(f"图片：{total_images}")


if __name__ == '__main__':
    main()
