#!/usr/bin/env python3
"""文件分类器 - 根据规则对文件进行分类"""

from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any, Optional
import logging
import re

logger = logging.getLogger(__name__)


class FileClassifier:
    """文件分类器 - 支持多种分类策略"""
    
    def __init__(self, config: Dict):
        self.config = config
        self.ext_rules = config.get('by_extension', {})
        self.date_rules = config.get('by_date', {})
        self.size_rules = config.get('by_size', {})
        self.custom_rules = config.get('custom', {})
        
        # 编译扩展名匹配表（提高性能）
        self.ext_map = self._build_extension_map()
    
    def _build_extension_map(self) -> Dict[str, str]:
        """构建扩展名到类别的映射表"""
        ext_map = {}
        categories = self.ext_rules.get('categories', {})
        
        for category, extensions in categories.items():
            if not extensions:  # 'other' 类别
                continue
            for ext in extensions:
                ext_map[ext.lower()] = category
        
        return ext_map
    
    def classify(self, file_info: Dict[str, Any]) -> str:
        """
        对文件进行分类，返回目标文件夹路径
        
        分类优先级：
        1. 自定义规则（如果有）
        2. 扩展名分类
        3. 日期分类
        4. 大小分类
        """
        name = file_info['name']
        size = file_info.get('size', 0)
        modified = file_info.get('modified', datetime.now())
        path = file_info.get('path', '')
        
        categories = []
        
        # 1. 检查自定义规则（最高优先级）
        if self.custom_rules:
            custom_category = self._apply_custom_rules(file_info)
            if custom_category:
                return custom_category
        
        # 2. 按扩展名分类
        if self.ext_rules.get('enabled', False):
            ext_category = self._classify_by_extension(name)
            if ext_category:
                categories.append(ext_category)
        
        # 3. 按日期分类
        if self.date_rules.get('enabled', False):
            date_category = self._classify_by_date(modified)
            if date_category:
                categories.append(date_category)
        
        # 4. 按大小分类
        if self.size_rules.get('enabled', False):
            size_category = self._classify_by_size(size)
            if size_category:
                categories.append(size_category)
        
        # 组合分类路径
        if categories:
            return '/'.join(categories)
        
        return 'other'
    
    def _classify_by_extension(self, filename: str) -> Optional[str]:
        """按扩展名分类"""
        ext = Path(filename).suffix.lower().lstrip('.')
        
        if not ext:
            # 无扩展名，检查是否隐藏文件
            if filename.startswith('.'):
                return 'hidden'
            return None
        
        # 查映射表
        category = self.ext_map.get(ext)
        if category:
            return category
        
        return 'other'
    
    def _classify_by_date(self, modified: datetime) -> str:
        """按日期分类"""
        date_format = self.date_rules.get('format', '%Y/%m')
        return modified.strftime(date_format)
    
    def _classify_by_size(self, size: int) -> Optional[str]:
        """按大小分类"""
        categories = self.size_rules.get('categories', {})
        
        # 按最小值降序排列，优先匹配更大的类别
        sorted_cats = sorted(
            categories.items(),
            key=lambda x: x[1].get('min', 0),
            reverse=True
        )
        
        for category, rules in sorted_cats:
            min_size = rules.get('min', 0)
            max_size = rules.get('max', float('inf'))
            
            if min_size <= size <= max_size:
                return category
        
        return None
    
    def _apply_custom_rules(self, file_info: Dict[str, Any]) -> Optional[str]:
        """应用自定义规则"""
        name = file_info['name']
        path = file_info.get('path', '')
        size = file_info.get('size', 0)
        
        rules = self.custom_rules.get('rules', [])
        
        for rule in rules:
            rule_type = rule.get('type')
            pattern = rule.get('pattern')
            category = rule.get('category')
            
            if rule_type == 'filename':
                # 文件名匹配
                if pattern and re.search(pattern, name, re.IGNORECASE):
                    return category
            
            elif rule_type == 'path':
                # 路径匹配
                if pattern and re.search(pattern, path, re.IGNORECASE):
                    return category
            
            elif rule_type == 'size':
                # 大小匹配
                min_size = rule.get('min_size', 0)
                max_size = rule.get('max_size', float('inf'))
                if min_size <= size <= max_size:
                    return category
        
        return None
    
    def get_category_stats(self, files: List[Dict[str, Any]]) -> Dict[str, Any]:
        """统计各分类的文件数量和大小"""
        stats = {}
        
        for file_info in files:
            category = self.classify(file_info)
            
            if category not in stats:
                stats[category] = {'count': 0, 'size': 0}
            
            stats[category]['count'] += 1
            stats[category]['size'] += file_info.get('size', 0)
        
        # 添加人类可读的大小
        for cat in stats:
            stats[cat]['size_human'] = self._format_size(stats[cat]['size'])
        
        return stats
    
    @staticmethod
    def _format_size(size_bytes: int) -> str:
        """格式化文件大小"""
        for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
            if size_bytes < 1024:
                return f"{size_bytes:.2f} {unit}"
            size_bytes /= 1024
        return f"{size_bytes:.2f} PB"


class SmartClassifier(FileClassifier):
    """智能分类器 - 支持 AI 内容识别（可选）"""
    
    def __init__(self, config: Dict, use_ai: bool = False):
        super().__init__(config)
        self.use_ai = use_ai
        self.ai_model = None
        
        if use_ai:
            self._init_ai_model()
    
    def _init_ai_model(self):
        """初始化 AI 模型（用于图片/视频内容识别）"""
        try:
            # 可选：集成 CLIP 等模型进行内容识别
            # import clip
            # self.ai_model, _ = clip.load("ViT-B/32")
            logger.info("AI 内容识别已启用")
        except ImportError:
            logger.warning("AI 模型依赖未安装，降级为规则分类")
            self.use_ai = False
    
    def classify(self, file_info: Dict[str, Any]) -> str:
        """增强分类 - 支持 AI 内容识别"""
        # 首先使用基础分类
        category = super().classify(file_info)
        
        # 对图片/视频进行 AI 内容分析（如果启用）
        if self.use_ai and category in ['images', 'videos']:
            ai_category = self._classify_by_content(file_info)
            if ai_category:
                return f"{category}/{ai_category}"
        
        return category
    
    def _classify_by_content(self, file_info: Dict[str, Any]) -> Optional[str]:
        """基于内容分类（AI）"""
        # TODO: 实现 AI 内容识别
        # - 图片：人物、风景、文档、截图等
        # - 视频：电影、教程、监控、动画等
        return None