akmon/uni_modules/ak-ai-news/services/AIContentAnalysisService.uts

// AI Content Analysis Service - Content classification, sentiment analysis, and quality assessment

import {
  ContentAnalysisResult,
  EntityResult,
  TopicResult,
  CategoryResult,
  AIProvider,
  AIResponse,
  AIServiceConfig,
  BatchProcessingOptions,
  AIServiceError,
  ContentInfo
} from '../types/ai-types.uts'

// 分析类型枚举
type AnalysisType = 'sentiment' | 'entities' | 'topics' | 'categories' | 'readability' | 'credibility' | 'toxicity' | 'summary' | 'keywords'

// 分析选项
type AnalysisOptions = {
  types: AnalysisType[]
  provider?: AIProvider
  model?: string
  includeScores?: boolean
  detailedResults?: boolean
  language?: string
  customCategories?: string[]
}

// 内容质量评估结果
type QualityAssessment = {
  overallScore: number
  factualAccuracy: number
  sourceReliability: number
  writingQuality: number
  objectivity: number
  completeness: number
  timeliness: number
  relevance: number
}

// 关键词提取结果
type KeywordResult = {
  keyword: string
  frequency: number
  importance: number
  type: 'noun' | 'verb' | 'adjective' | 'entity' | 'concept'
}

// 分析统计
type AnalysisStats = {
  totalAnalyses: number
  successCount: number
  errorCount: number
  avgProcessingTimeMs: number
  totalCost: number
  byProvider: Record<AIProvider, number>
  byAnalysisType: Record<AnalysisType, number>
}

/**
 * AI内容分析服务类
 * 提供情感分析、实体识别、主题提取、内容分类、质量评估等功能
 */
export class AIContentAnalysisService {
  private config: AIServiceConfig
  private stats: AnalysisStats = {
    totalAnalyses: 0,
    successCount: 0,
    errorCount: 0,
    avgProcessingTimeMs: 0,
    totalCost: 0,
    byProvider: {} as Record<AIProvider, number>,
    byAnalysisType: {} as Record<AnalysisType, number>
  }

  // 预定义的新闻分类
  private readonly NEWS_CATEGORIES = [
    { id: 'politics', name: '政治', keywords: ['政府', '政策', '选举', '法律', '议会', '总统', '部长'] },
    { id: 'economy', name: '经济', keywords: ['经济', '金融', '股市', '投资', '银行', '贸易', 'GDP'] },
    { id: 'technology', name: '科技', keywords: ['科技', '人工智能', '互联网', '软件', '硬件', '创新', '数字化'] },
    { id: 'sports', name: '体育', keywords: ['体育', '足球', '篮球', '奥运', '比赛', '运动员', '锦标赛'] },
    { id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', '综艺', '演出', '艺术'] },
    { id: 'health', name: '健康', keywords: ['健康', '医疗', '病毒', '疫苗', '医院', '药物', '疾病'] },
    { id: 'education', name: '教育', keywords: ['教育', '学校', '大学', '学生', '教师', '考试', '学习'] },
    { id: 'environment', name: '环境', keywords: ['环境', '气候', '污染', '环保', '生态', '绿色', '可持续'] },
    { id: 'international', name: '国际', keywords: ['国际', '外交', '战争', '和平', '联合国', '条约', '全球'] },
    { id: 'social', name: '社会', keywords: ['社会', '社区', '公益', '慈善', '志愿者', '文化', '传统'] }
  ]

  constructor(config: AIServiceConfig) {
    this.config = config
    this.initializeStats()
  }

  /**
   * 分析内容
   * @param content 内容文本
   * @param options 分析选项
   */
  async analyzeContent(
    content: string,
    options: AnalysisOptions = {
      types: ['sentiment', 'entities', 'topics', 'categories', 'readability', 'summary', 'keywords']
    }
  ): Promise<AIResponse<ContentAnalysisResult>> {
    try {
      this.stats.totalAnalyses++
      const startTime = Date.now()

      // 选择提供商
      const provider = options.provider || this.selectBestProvider()

      // 执行各种分析
      const results = await Promise.allSettled([
        this.analyzeSentiment(content, provider, options),
        this.extractEntities(content, provider, options),
        this.extractTopics(content, provider, options),
        this.classifyContent(content, options),
        this.assessReadability(content, options.language),
        this.assessCredibility(content),
        this.assessToxicity(content, provider),
        this.generateSummary(content, provider, options),
        this.extractKeywords(content, options)
      ])

      const processingTime = Date.now() - startTime

      // 合并结果
      const analysisResult: ContentAnalysisResult = {
        contentId: this.generateContentId(content),
        sentimentScore: this.extractResult(results[0], 0),
        sentimentLabel: this.getSentimentLabel(this.extractResult(results[0], 0)),
        readabilityScore: this.extractResult(results[4], 0.5),
        credibilityScore: this.extractResult(results[5], 0.5),
        toxicityScore: this.extractResult(results[6], 0),
        keywords: this.extractResult(results[8], []),
        entities: this.extractResult(results[1], []),
        topics: this.extractResult(results[2], []),
        categories: this.extractResult(results[3], []),
        summary: this.extractResult(results[7], ''),
        keyPhrases: this.extractKeyPhrases(content),
        language: options.language || await this.detectLanguage(content),
        processingTimeMs: processingTime,
        provider
      }

      // 更新统计
      this.updateStats(provider, options.types, processingTime)
      this.stats.successCount++

      return {
        success: true,
        data: analysisResult,
        processingTimeMs: processingTime,
        provider
      }

    } catch (error) {
      this.stats.errorCount++
      const aiError: AIServiceError = {
        code: 'ANALYSIS_ERROR',
        message: error.message || 'Content analysis failed',
        provider: options.provider,
        retryable: this.isRetryableError(error)
      }

      return {
        success: false,
        error: aiError.message,
        errorCode: aiError.code
      }
    }
  }

  /**
   * 批量内容分析
   * @param contents 内容数组
   * @param options 分析选项
   * @param batchOptions 批处理选项
   */
  async analyzeContentBatch(
    contents: string[],
    options: AnalysisOptions = { types: ['sentiment', 'categories', 'summary'] },
    batchOptions: BatchProcessingOptions = {
      batchSize: 5,
      concurrency: 2,
      retryCount: 2,
      delayMs: 1000
    }
  ): Promise<AIResponse<ContentAnalysisResult[]>> {
    try {
      const results: ContentAnalysisResult[] = []
      const batches = this.createBatches(contents, batchOptions.batchSize)

      for (let i = 0; i < batches.length; i++) {
        const batch = batches[i]
        const batchPromises = batch.map(async (content) => {
          try {
            const response = await this.analyzeContent(content, options)
            if (response.success && response.data) {
              return response.data
            }
            throw new Error(response.error || 'Analysis failed')
          } catch (error) {
            if (batchOptions.onError) {
              batchOptions.onError(error, content)
            }
            throw error
          }
        })

        const batchResults = await Promise.allSettled(batchPromises)

        for (const result of batchResults) {
          if (result.status === 'fulfilled') {
            results.push(result.value)
          }
        }

        // 进度回调
        if (batchOptions.onProgress) {
          batchOptions.onProgress(results.length, contents.length)
        }

        // 批次间延迟
        if (i < batches.length - 1 && batchOptions.delayMs > 0) {
          await this.delay(batchOptions.delayMs)
        }
      }

      return { success: true, data: results }

    } catch (error) {
      return {
        success: false,
        error: error.message || 'Batch analysis failed'
      }
    }
  }

  /**
   * 质量评估
   * @param content 内容文本
   * @param metadata 内容元数据
   */
  async assessQuality(content: string, metadata?: Partial<ContentInfo>): Promise<AIResponse<QualityAssessment>> {
    try {
      const [
        factualScore,
        sourceScore,
        writingScore,
        objectivityScore,
        completenessScore,
        timelinessScore,
        relevanceScore
      ] = await Promise.all([
        this.assessFactualAccuracy(content),
        this.assessSourceReliability(metadata?.sourceUrl || ''),
        this.assessWritingQuality(content),
        this.assessObjectivity(content),
        this.assessCompleteness(content),
        this.assessTimeliness(metadata?.publishedAt || Date.now()),
        this.assessRelevance(content, metadata?.categoryId)
      ])

      const overallScore = (
        factualScore + sourceScore + writingScore + objectivityScore +
        completenessScore + timelinessScore + relevanceScore
      ) / 7

      const assessment: QualityAssessment = {
        overallScore,
        factualAccuracy: factualScore,
        sourceReliability: sourceScore,
        writingQuality: writingScore,
        objectivity: objectivityScore,
        completeness: completenessScore,
        timeliness: timelinessScore,
        relevance: relevanceScore
      }

      return { success: true, data: assessment }

    } catch (error) {
      return {
        success: false,
        error: error.message || 'Quality assessment failed'
      }
    }
  }

  /**
   * 获取统计信息
   */
  getStatistics(): AnalysisStats {
    return { ...this.stats }
  }

  // Private methods

  private async analyzeSentiment(content: string, provider: AIProvider, options: AnalysisOptions): Promise<number> {
    if (!options.types.includes('sentiment')) return 0

    switch (provider) {
      case 'openai':
        return await this.analyzeSentimentWithOpenAI(content)
      case 'google':
        return await this.analyzeSentimentWithGoogle(content)
      case 'baidu':
        return await this.analyzeSentimentWithBaidu(content)
      default:
        return this.analyzeSentimentBasic(content)
    }
  }

  private async extractEntities(content: string, provider: AIProvider, options: AnalysisOptions): Promise<EntityResult[]> {
    if (!options.types.includes('entities')) return []

    switch (provider) {
      case 'openai':
        return await this.extractEntitiesWithOpenAI(content)
      case 'google':
        return await this.extractEntitiesWithGoogle(content)
      default:
        return this.extractEntitiesBasic(content)
    }
  }

  private async extractTopics(content: string, provider: AIProvider, options: AnalysisOptions): Promise<TopicResult[]> {
    if (!options.types.includes('topics')) return []

    switch (provider) {
      case 'openai':
        return await this.extractTopicsWithOpenAI(content)
      default:
        return this.extractTopicsBasic(content)
    }
  }

  private async classifyContent(content: string, options: AnalysisOptions): Promise<CategoryResult[]> {
    if (!options.types.includes('categories')) return []

    const categories: CategoryResult[] = []

    // 基于关键词的分类
    for (const category of this.NEWS_CATEGORIES) {
      const matches = category.keywords.filter(keyword =>
        content.toLowerCase().includes(keyword.toLowerCase())
      )

      if (matches.length > 0) {
        const confidence = Math.min(matches.length / category.keywords.length, 1)
        categories.push({
          categoryId: category.id,
          categoryName: category.name,
          confidence,
          level: 1
        })
      }
    }

    // 按置信度排序
    return categories.sort((a, b) => b.confidence - a.confidence).slice(0, 3)
  }

  private assessReadability(content: string, language?: string): number {
    // 简化的可读性评估
    const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0)
    const words = content.split(/\s+/).filter(w => w.length > 0)
    const characters = content.replace(/\s/g, '').length

    if (sentences.length === 0 || words.length === 0) return 0

    const avgWordsPerSentence = words.length / sentences.length
    const avgCharsPerWord = characters / words.length

    // 基于句子长度和词汇复杂度的评分
    let score = 1.0

    // 句子长度惩罚
    if (avgWordsPerSentence > 20) score -= 0.2
    if (avgWordsPerSentence > 30) score -= 0.3

    // 词汇复杂度惩罚
    if (avgCharsPerWord > 6) score -= 0.1
    if (avgCharsPerWord > 8) score -= 0.2

    return Math.max(0, Math.min(1, score))
  }

  private assessCredibility(content: string): number {
    let score = 0.5 // 基础分

    // 包含引用或来源
    if (content.includes('据') || content.includes('根据') || content.includes('来源')) {
      score += 0.2
    }

    // 包含具体数据
    if (/\d+%|\d+万|\d+亿|\d{4}年/.test(content)) {
      score += 0.15
    }

    // 避免极端词汇
    const extremeWords = ['绝对', '必然', '完全', '永远', '从来', '所有']
    const extremeCount = extremeWords.filter(word => content.includes(word)).length
    score -= extremeCount * 0.05

    // 避免情绪化表达
    const emotionalWords = ['震惊', '愤怒', '可怕', '惊人', '令人发指']
    const emotionalCount = emotionalWords.filter(word => content.includes(word)).length
    score -= emotionalCount * 0.03

    return Math.max(0, Math.min(1, score))
  }

  private async assessToxicity(content: string, provider: AIProvider): Promise<number> {
    // 基础毒性检测
    const toxicWords = ['仇恨', '歧视', '暴力', '威胁', '诽谤', '侮辱']
    const toxicCount = toxicWords.filter(word => content.includes(word)).length

    return Math.min(toxicCount / 10, 1)
  }

  private async generateSummary(content: string, provider: AIProvider, options: AnalysisOptions): Promise<string> {
    if (!options.types.includes('summary')) return ''

    // 简单的摘要生成：提取前两句
    const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 10)
    return sentences.slice(0, 2).join('。') + (sentences.length > 2 ? '。' : '')
  }

  private extractKeywords(content: string, options: AnalysisOptions): string[] {
    if (!options.types.includes('keywords')) return []

    // 简单的关键词提取
    const words = content
      .replace(/[^\u4e00-\u9fa5\w\s]/g, '') // 保留中文、英文和空格
      .split(/\s+/)
      .filter(word => word.length > 1)

    // 统计词频
    const wordCount: Record<string, number> = {}
    words.forEach(word => {
      const lower = word.toLowerCase()
      wordCount[lower] = (wordCount[lower] || 0) + 1
    })

    // 按频率排序并返回前10个
    return Object.entries(wordCount)
      .sort(([, a], [, b]) => b - a)
      .slice(0, 10)
      .map(([word]) => word)
  }

  private extractKeyPhrases(content: string): string[] {
    // 提取2-3个词的短语
    const phrases: string[] = []
    const words = content.split(/\s+/)

    for (let i = 0; i < words.length - 1; i++) {
      const twoWordPhrase = words.slice(i, i + 2).join(' ')
      if (twoWordPhrase.length > 4) {
        phrases.push(twoWordPhrase)
      }

      if (i < words.length - 2) {
        const threeWordPhrase = words.slice(i, i + 3).join(' ')
        if (threeWordPhrase.length > 6) {
          phrases.push(threeWordPhrase)
        }
      }
    }

    // 去重并返回前5个
    return [...new Set(phrases)].slice(0, 5)
  }

  private async detectLanguage(content: string): Promise<string> {
    // 基础语言检测
    const chineseRegex = /[\u4e00-\u9fff]/
    const englishRegex = /[a-zA-Z]/

    const chineseMatches = content.match(chineseRegex)?.length || 0
    const englishMatches = content.match(englishRegex)?.length || 0

    if (chineseMatches > englishMatches) return 'zh-CN'
    if (englishMatches > chineseMatches) return 'en'
    return 'auto'
  }

  private getSentimentLabel(score: number): 'positive' | 'negative' | 'neutral' {
    if (score > 0.1) return 'positive'
    if (score < -0.1) return 'negative'
    return 'neutral'
  }

  private generateContentId(content: string): string {
    // 简单的内容ID生成
    return `content_${Date.now()}_${content.substring(0, 10).replace(/\s/g, '_')}`
  }

  private extractResult<T>(result: PromiseSettledResult<T>, defaultValue: T): T {
    return result.status === 'fulfilled' ? result.value : defaultValue
  }

  private selectBestProvider(): AIProvider {
    if (this.config.openai?.apiKey) return 'openai'
    if (this.config.google?.apiKey) return 'google'
    if (this.config.baidu?.apiKey) return 'baidu'
    return 'openai'
  }

  private createBatches<T>(items: T[], batchSize: number): T[][] {
    const batches: T[][] = []
    for (let i = 0; i < items.length; i += batchSize) {
      batches.push(items.slice(i, i + batchSize))
    }
    return batches
  }

  private async delay(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms))
  }

  private initializeStats(): void {
    const providers: AIProvider[] = ['openai', 'google', 'baidu', 'custom']
    const analysisTypes: AnalysisType[] = ['sentiment', 'entities', 'topics', 'categories', 'readability', 'credibility', 'toxicity', 'summary', 'keywords']

    providers.forEach(provider => {
      this.stats.byProvider[provider] = 0
    })

    analysisTypes.forEach(type => {
      this.stats.byAnalysisType[type] = 0
    })
  }

  private updateStats(provider: AIProvider, types: AnalysisType[], processingTime: number): void {
    this.stats.byProvider[provider]++
    types.forEach(type => {
      this.stats.byAnalysisType[type]++
    })

    this.stats.avgProcessingTimeMs = (this.stats.avgProcessingTimeMs * (this.stats.totalAnalyses - 1) + processingTime) / this.stats.totalAnalyses
  }

  private isRetryableError(error: any): boolean {
    const retryableCodes = ['TIMEOUT', 'RATE_LIMIT', 'SERVER_ERROR']
    return retryableCodes.includes(error.code) || error.status >= 500
  }

  // Quality assessment methods
  private async assessFactualAccuracy(content: string): Promise<number> {
    // 检查是否包含可验证的事实
    let score = 0.5

    // 包含日期
    if (/\d{4}年|\d{1,2}月|\d{1,2}日/.test(content)) score += 0.1

    // 包含具体数字
    if (/\d+\.?\d*%|\d+万|\d+亿|\d+千/.test(content)) score += 0.1

    // 包含地点
    if (/市|省|县|区|国|州/.test(content)) score += 0.1

    // 包含人名或机构名
    if (/先生|女士|部长|主席|公司|集团|大学|医院/.test(content)) score += 0.1

    return Math.min(1, score)
  }

  private async assessSourceReliability(sourceUrl: string): Promise<number> {
    if (!sourceUrl) return 0.3

    // 简单的源可靠性评估
    const reliableDomains = ['gov.cn', 'edu.cn', 'xinhuanet.com', 'people.com.cn', 'cctv.com']
    const domain = sourceUrl.toLowerCase()

    for (const reliableDomain of reliableDomains) {
      if (domain.includes(reliableDomain)) return 0.9
    }

    if (domain.includes('.gov') || domain.includes('.edu')) return 0.8
    if (domain.includes('news') || domain.includes('media')) return 0.6

    return 0.4
  }

  private async assessWritingQuality(content: string): Promise<number> {
    let score = 0.5

    // 检查语法和结构
    const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0)
    if (sentences.length > 2) score += 0.1

    // 检查段落结构
    const paragraphs = content.split('\n\n').filter(p => p.trim().length > 0)
    if (paragraphs.length > 1) score += 0.1

    // 检查词汇丰富度
    const words = content.split(/\s+/).filter(w => w.length > 0)
    const uniqueWords = new Set(words.map(w => w.toLowerCase()))
    const diversity = uniqueWords.size / words.length
    score += diversity * 0.3

    return Math.min(1, score)
  }

  private async assessObjectivity(content: string): Promise<number> {
    let score = 0.7 // 基础客观性分数

    // 主观词汇惩罚
    const subjectiveWords = ['我认为', '个人觉得', '显然', '明显', '无疑', '肯定']
    const subjectiveCount = subjectiveWords.filter(word => content.includes(word)).length
    score -= subjectiveCount * 0.1

    // 情感词汇惩罚
    const emotionalWords = ['愤怒', '激动', '兴奋', '失望', '震惊', '惊喜']
    const emotionalCount = emotionalWords.filter(word => content.includes(word)).length
    score -= emotionalCount * 0.05

    return Math.max(0, Math.min(1, score))
  }

  private async assessCompleteness(content: string): Promise<number> {
    let score = 0.3

    // 基于内容长度
    if (content.length > 200) score += 0.2
    if (content.length > 500) score += 0.2
    if (content.length > 1000) score += 0.2

    // 包含关键新闻要素（5W1H）
    const hasWho = /人|者|员|家|国|公司|组织/.test(content)
    const hasWhat = /事件|活动|发生|进行|宣布|决定/.test(content)
    const hasWhen = /\d{4}年|\d{1,2}月|\d{1,2}日|今天|昨天|明天/.test(content)
    const hasWhere = /市|省|县|区|国|地区|地点/.test(content)
    const hasWhy = /因为|由于|原因|目的|为了/.test(content)

    const elements = [hasWho, hasWhat, hasWhen, hasWhere, hasWhy].filter(Boolean).length
    score += elements * 0.06

    return Math.min(1, score)
  }

  private async assessTimeliness(publishedAt: number): Promise<number> {
    const now = Date.now()
    const ageHours = (now - publishedAt) / (1000 * 60 * 60)

    // 新闻越新，时效性越高
    if (ageHours < 1) return 1.0
    if (ageHours < 6) return 0.9
    if (ageHours < 24) return 0.7
    if (ageHours < 72) return 0.5
    if (ageHours < 168) return 0.3
    return 0.1
  }

  private async assessRelevance(content: string, categoryId?: string): Promise<number> {
    if (!categoryId) return 0.5

    // 根据分类检查相关性
    const category = this.NEWS_CATEGORIES.find(c => c.id === categoryId)
    if (!category) return 0.5

    const matches = category.keywords.filter(keyword =>
      content.toLowerCase().includes(keyword.toLowerCase())
    ).length

    return Math.min(1, matches / category.keywords.length + 0.3)
  }

  // Mock AI service methods
  private async analyzeSentimentWithOpenAI(content: string): Promise<number> {
    // 模拟OpenAI情感分析
    await this.delay(Math.random() * 500 + 200)

    // 简单的情感检测
    const positiveWords = ['好', '棒', '优秀', '成功', '胜利', '喜悦', '高兴', '满意']
    const negativeWords = ['坏', '糟糕', '失败', '问题', '困难', '悲伤', '愤怒', '失望']

    const positiveCount = positiveWords.filter(word => content.includes(word)).length
    const negativeCount = negativeWords.filter(word => content.includes(word)).length

    const score = (positiveCount - negativeCount) / Math.max(positiveCount + negativeCount, 1)
    return Math.max(-1, Math.min(1, score))
  }

  private async analyzeSentimentWithGoogle(content: string): Promise<number> {
    await this.delay(Math.random() * 400 + 150)
    return Math.random() * 2 - 1 // -1 to 1
  }

  private async analyzeSentimentWithBaidu(content: string): Promise<number> {
    await this.delay(Math.random() * 300 + 100)
    return Math.random() * 2 - 1
  }

  private analyzeSentimentBasic(content: string): number {
    // 基础情感分析
    const positiveWords = ['好', '棒', '优秀', '成功', '胜利', 'great', 'good', 'excellent']
    const negativeWords = ['坏', '糟糕', '失败', '问题', 'bad', 'terrible', 'awful']

    const positiveCount = positiveWords.filter(word => content.toLowerCase().includes(word)).length
    const negativeCount = negativeWords.filter(word => content.toLowerCase().includes(word)).length

    return (positiveCount - negativeCount) / Math.max(positiveCount + negativeCount, 1)
  }

  private async extractEntitiesWithOpenAI(content: string): Promise<EntityResult[]> {
    await this.delay(Math.random() * 600 + 300)

    // 模拟实体提取
    const entities: EntityResult[] = []
    const patterns = [
      { regex: /[\u4e00-\u9fa5]{2,4}(公司|集团|企业|机构)/g, type: 'organization' as const },
      { regex: /[\u4e00-\u9fa5]{2,3}(市|省|县|区)/g, type: 'location' as const },
      { regex: /[\u4e00-\u9fa5]{2,4}(先生|女士|部长|主席|总裁|经理)/g, type: 'person' as const },
      { regex: /\d{4}年\d{1,2}月\d{1,2}日/g, type: 'date' as const },
      { regex: /\d+\.?\d*(万|亿|千)?(元|美元|英镑)/g, type: 'money' as const }
    ]

    patterns.forEach(pattern => {
      const matches = content.matchAll(pattern.regex)
      for (const match of matches) {
        entities.push({
          text: match[0],
          type: pattern.type,
          confidence: 0.8 + Math.random() * 0.2,
          startPosition: match.index || 0,
          endPosition: (match.index || 0) + match[0].length
        })
      }
    })

    return entities.slice(0, 10)
  }

  private async extractEntitiesWithGoogle(content: string): Promise<EntityResult[]> {
    await this.delay(Math.random() * 500 + 250)
    return this.extractEntitiesBasic(content)
  }

  private extractEntitiesBasic(content: string): EntityResult[] {
    // 基础实体提取
    const entities: EntityResult[] = []

    // 提取组织
    const orgMatches = content.matchAll(/[\u4e00-\u9fa5]{2,4}(公司|集团)/g)
    for (const match of orgMatches) {
      entities.push({
        text: match[0],
        type: 'organization',
        confidence: 0.7,
        startPosition: match.index || 0,
        endPosition: (match.index || 0) + match[0].length
      })
    }

    return entities
  }

  private async extractTopicsWithOpenAI(content: string): Promise<TopicResult[]> {
    await this.delay(Math.random() * 400 + 200)

    // 基于关键词聚类的主题提取
    const topics: TopicResult[] = []

    for (const category of this.NEWS_CATEGORIES.slice(0, 3)) {
      const matches = category.keywords.filter(keyword =>
        content.toLowerCase().includes(keyword.toLowerCase())
      )

      if (matches.length > 0) {
        topics.push({
          name: category.name,
          confidence: matches.length / category.keywords.length,
          keywords: matches
        })
      }
    }

    return topics.sort((a, b) => b.confidence - a.confidence)
  }

  private extractTopicsBasic(content: string): TopicResult[] {
    // 基础主题提取
    const topics: TopicResult[] = []

    // 检查科技相关关键词
    const techKeywords = ['科技', '技术', '互联网', 'AI', '人工智能']
    const techMatches = techKeywords.filter(keyword => content.includes(keyword))

    if (techMatches.length > 0) {
      topics.push({
        name: '科技',
        confidence: techMatches.length / techKeywords.length,
        keywords: techMatches
      })
    }

    return topics
  }
}