Initial commit of akmon project

2026-01-20 08:04:15 +08:00
commit 77a2bab985
1309 changed files with 343305 additions and 0 deletions
--- a/uni_modules/ak-ai-news/services/ai-content-analysis-service.uts
+++ b/uni_modules/ak-ai-news/services/ai-content-analysis-service.uts
@@ -0,0 +1,563 @@
+// AI内容分析服务
+// filepath: h:\blews\akmon\uni_modules\ak-ai-news\services\ai-content-analysis-service.uts
+
+import { AkReq } from '@/uni_modules/ak-req/index.uts'
+import type { 
+  ContentAnalysisResult, 
+  EntityResult, 
+  TopicResult, 
+  CategoryResult,
+  AIProvider, 
+  AIResponse,
+  AIServiceConfig,
+  ContentInfo
+} from '../types/ai-types.uts'
+
+export class AIContentAnalysisService {
+  private config: AIServiceConfig
+  private req: AkReq
+  private cache: Map<string, ContentAnalysisResult> = new Map()
+  
+  constructor(config: AIServiceConfig) {
+    this.config = config
+    this.req = new AkReq()
+  }
+  
+  /**
+   * 综合内容分析
+   */
+  async analyzeContent(
+    content: ContentInfo,
+    options?: {
+      includeEntities?: boolean
+      includeTopics?: boolean
+      includeSentiment?: boolean
+      includeReadability?: boolean
+      includeCredibility?: boolean
+      language?: string
+    }
+  ): Promise<AIResponse<ContentAnalysisResult>> {
+    try {
+      const startTime = Date.now()
+      
+      // 检查缓存
+      const cacheKey = this.generateContentCacheKey(content.id, options)
+      const cached = this.cache.get(cacheKey)
+      if (cached) {
+        return {
+          success: true,
+          data: cached,
+          processingTimeMs: 0
+        }
+      }
+      
+      const analysisPromises: Promise<any>[] = []
+      
+      // 情感分析
+      if (options?.includeSentiment !== false) {
+        analysisPromises.push(this.analyzeSentiment(content.content, content.title))
+      }
+      
+      // 实体识别
+      if (options?.includeEntities !== false) {
+        analysisPromises.push(this.extractEntities(content.content))
+      }
+      
+      // 主题提取
+      if (options?.includeTopics !== false) {
+        analysisPromises.push(this.extractTopics(content.content))
+      }
+      
+      // 可读性分析
+      if (options?.includeReadability !== false) {
+        analysisPromises.push(this.analyzeReadability(content.content))
+      }
+      
+      // 可信度分析
+      if (options?.includeCredibility !== false) {
+        analysisPromises.push(this.analyzeCredibility(content))
+      }
+      
+      // 并行执行所有分析
+      const results = await Promise.all(analysisPromises)
+      
+      // 生成摘要
+      const summary = await this.generateSummary(content.content)
+      
+      // 提取关键词
+      const keywords = await this.extractKeywords(content.content)
+      
+      // 分类内容
+      const categories = await this.classifyContent(content)
+      
+      const analysisResult: ContentAnalysisResult = {
+        contentId: content.id,
+        sentimentScore: results[0]?.score || 0,
+        sentimentLabel: results[0]?.label || 'neutral',
+        readabilityScore: results[3] || 0.5,
+        credibilityScore: results[4] || 0.5,
+        toxicityScore: 0, // 可以添加毒性检测
+        keywords: keywords || [],
+        entities: results[1] || [],
+        topics: results[2] || [],
+        categories: categories || [],
+        summary: summary || '',
+        keyPhrases: this.extractKeyPhrases(content.content),
+        language: options?.language || content.originalLanguage,
+        processingTimeMs: Date.now() - startTime,
+        provider: 'openai'
+      }
+      
+      // 缓存结果
+      this.cache.set(cacheKey, analysisResult)
+      
+      return {
+        success: true,
+        data: analysisResult,
+        processingTimeMs: analysisResult.processingTimeMs
+      }
+      
+    } catch (error) {
+      console.error('内容分析失败:', error)
+      return {
+        success: false,
+        error: error instanceof Error ? error.message : '内容分析服务异常',
+        errorCode: 'CONTENT_ANALYSIS_FAILED'
+      }
+    }
+  }
+  
+  /**
+   * 情感分析
+   */
+  private async analyzeSentiment(content: string, title?: string): Promise<{score: number, label: string}> {
+    try {
+      const openaiConfig = this.config.openai
+      if (!openaiConfig) {
+        throw new Error('OpenAI配置未找到')
+      }
+      
+      const text = title ? `${title}\n\n${content}` : content
+      const prompt = `请分析以下文本的情感倾向，返回一个-1到1之间的数值（-1表示非常负面，0表示中性，1表示非常正面）和对应的标签（positive/negative/neutral）。
+      
+文本：${text.substring(0, 2000)}
+
+请以JSON格式返回：{"score": 数值, "label": "标签"}`
+      
+      const response = await this.req.post<any>({
+        url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
+        headers: {
+          'Authorization': `Bearer ${openaiConfig.apiKey}`,
+          'Content-Type': 'application/json'
+        },
+        data: {
+          model: openaiConfig.model,
+          messages: [
+            {
+              role: 'system',
+              content: '你是一个专业的文本情感分析助手。'
+            },
+            {
+              role: 'user',
+              content: prompt
+            }
+          ],
+          max_tokens: 100,
+          temperature: 0.1
+        }
+      })
+      
+      if (!response.success || !response.data?.choices?.[0]) {
+        throw new Error('情感分析API调用失败')
+      }
+      
+      const result = JSON.parse(response.data.choices[0].message.content)
+      return {
+        score: Math.max(-1, Math.min(1, parseFloat(result.score) || 0)),
+        label: result.label || 'neutral'
+      }
+      
+    } catch (error) {
+      console.error('情感分析失败:', error)
+      return { score: 0, label: 'neutral' }
+    }
+  }
+  
+  /**
+   * 实体识别
+   */
+  private async extractEntities(content: string): Promise<EntityResult[]> {
+    try {
+      const openaiConfig = this.config.openai
+      if (!openaiConfig) {
+        return []
+      }
+      
+      const prompt = `请从以下文本中识别出人名、地名、机构名、日期、金额等实体。
+      
+文本：${content.substring(0, 2000)}
+
+请以JSON数组格式返回，每个实体包含：text(实体文本)、type(类型：person/location/organization/date/money/other)、confidence(置信度0-1)。`
+      
+      const response = await this.req.post<any>({
+        url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
+        headers: {
+          'Authorization': `Bearer ${openaiConfig.apiKey}`,
+          'Content-Type': 'application/json'
+        },
+        data: {
+          model: openaiConfig.model,
+          messages: [
+            {
+              role: 'system',
+              content: '你是一个专业的命名实体识别助手。'
+            },
+            {
+              role: 'user',
+              content: prompt
+            }
+          ],
+          max_tokens: 500,
+          temperature: 0.1
+        }
+      })
+      
+      if (!response.success || !response.data?.choices?.[0]) {
+        return []
+      }
+      
+      const entities = JSON.parse(response.data.choices[0].message.content)
+      return entities.map((entity: any, index: number) => ({
+        text: entity.text || '',
+        type: entity.type || 'other',
+        confidence: entity.confidence || 0.8,
+        startPosition: 0, // 简化处理
+        endPosition: entity.text?.length || 0
+      })) as EntityResult[]
+      
+    } catch (error) {
+      console.error('实体识别失败:', error)
+      return []
+    }
+  }
+  
+  /**
+   * 主题提取
+   */
+  private async extractTopics(content: string): Promise<TopicResult[]> {
+    try {
+      const openaiConfig = this.config.openai
+      if (!openaiConfig) {
+        return []
+      }
+      
+      const prompt = `请分析以下文本的主要主题，提取3-5个核心主题。
+      
+文本：${content.substring(0, 2000)}
+
+请以JSON数组格式返回，每个主题包含：name(主题名称)、confidence(置信度0-1)、keywords(相关关键词数组)。`
+      
+      const response = await this.req.post<any>({
+        url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
+        headers: {
+          'Authorization': `Bearer ${openaiConfig.apiKey}`,
+          'Content-Type': 'application/json'
+        },
+        data: {
+          model: openaiConfig.model,
+          messages: [
+            {
+              role: 'system',
+              content: '你是一个专业的文本主题分析助手。'
+            },
+            {
+              role: 'user',
+              content: prompt
+            }
+          ],
+          max_tokens: 400,
+          temperature: 0.2
+        }
+      })
+      
+      if (!response.success || !response.data?.choices?.[0]) {
+        return []
+      }
+      
+      const topics = JSON.parse(response.data.choices[0].message.content)
+      return topics.map((topic: any) => ({
+        name: topic.name || '',
+        confidence: topic.confidence || 0.8,
+        keywords: topic.keywords || []
+      })) as TopicResult[]
+      
+    } catch (error) {
+      console.error('主题提取失败:', error)
+      return []
+    }
+  }
+  
+  /**
+   * 可读性分析
+   */
+  private async analyzeReadability(content: string): Promise<number> {
+    try {
+      // 简化的可读性计算
+      const sentences = content.split(/[.!?。！？]/).length
+      const words = content.split(/\s+/).length
+      const avgWordsPerSentence = words / sentences
+      
+      // 基于平均句长计算可读性分数
+      let score = 1.0
+      if (avgWordsPerSentence > 30) score = 0.3
+      else if (avgWordsPerSentence > 20) score = 0.5
+      else if (avgWordsPerSentence > 15) score = 0.7
+      else if (avgWordsPerSentence > 10) score = 0.9
+      
+      return score
+      
+    } catch (error) {
+      console.error('可读性分析失败:', error)
+      return 0.5
+    }
+  }
+  
+  /**
+   * 可信度分析
+   */
+  private async analyzeCredibility(content: ContentInfo): Promise<number> {
+    try {
+      let score = 0.5 // 基础分数
+      
+      // 来源可信度
+      if (content.sourceUrl) {
+        const domain = this.extractDomain(content.sourceUrl)
+        const credibleDomains = ['reuters.com', 'bbc.com', 'xinhuanet.com', 'nhk.or.jp']
+        if (credibleDomains.some(d => domain.includes(d))) {
+          score += 0.2
+        }
+      }
+      
+      // 作者信息
+      if (content.author && content.author.length > 0) {
+        score += 0.1
+      }
+      
+      // 内容长度和结构
+      if (content.content.length > 500) {
+        score += 0.1
+      }
+      
+      // 时效性
+      const daysSincePublished = (Date.now() - content.publishedAt) / (1000 * 60 * 60 * 24)
+      if (daysSincePublished < 1) {
+        score += 0.1
+      }
+      
+      return Math.min(1.0, score)
+      
+    } catch (error) {
+      console.error('可信度分析失败:', error)
+      return 0.5
+    }
+  }
+  
+  /**
+   * 生成摘要
+   */
+  private async generateSummary(content: string): Promise<string> {
+    try {
+      const openaiConfig = this.config.openai
+      if (!openaiConfig) {
+        return ''
+      }
+      
+      if (content.length < 200) {
+        return content
+      }
+      
+      const prompt = `请为以下文本生成一个简洁的摘要（100字以内）：\n\n${content.substring(0, 2000)}`
+      
+      const response = await this.req.post<any>({
+        url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
+        headers: {
+          'Authorization': `Bearer ${openaiConfig.apiKey}`,
+          'Content-Type': 'application/json'
+        },
+        data: {
+          model: openaiConfig.model,
+          messages: [
+            {
+              role: 'system',
+              content: '你是一个专业的文本摘要助手。'
+            },
+            {
+              role: 'user',
+              content: prompt
+            }
+          ],
+          max_tokens: 200,
+          temperature: 0.3
+        }
+      })
+      
+      if (!response.success || !response.data?.choices?.[0]) {
+        return ''
+      }
+      
+      return response.data.choices[0].message.content.trim()
+      
+    } catch (error) {
+      console.error('摘要生成失败:', error)
+      return ''
+    }
+  }
+  
+  /**
+   * 提取关键词
+   */
+  private async extractKeywords(content: string): Promise<string[]> {
+    try {
+      const openaiConfig = this.config.openai
+      if (!openaiConfig) {
+        return []
+      }
+      
+      const prompt = `请从以下文本中提取5-10个关键词：\n\n${content.substring(0, 1500)}\n\n请以JSON数组格式返回关键词。`
+      
+      const response = await this.req.post<any>({
+        url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
+        headers: {
+          'Authorization': `Bearer ${openaiConfig.apiKey}`,
+          'Content-Type': 'application/json'
+        },
+        data: {
+          model: openaiConfig.model,
+          messages: [
+            {
+              role: 'system',
+              content: '你是一个专业的关键词提取助手。'
+            },
+            {
+              role: 'user',
+              content: prompt
+            }
+          ],
+          max_tokens: 200,
+          temperature: 0.1
+        }
+      })
+      
+      if (!response.success || !response.data?.choices?.[0]) {
+        return []
+      }
+      
+      const keywords = JSON.parse(response.data.choices[0].message.content)
+      return Array.isArray(keywords) ? keywords : []
+      
+    } catch (error) {
+      console.error('关键词提取失败:', error)
+      return []
+    }
+  }
+  
+  /**
+   * 内容分类
+   */
+  private async classifyContent(content: ContentInfo): Promise<CategoryResult[]> {
+    try {
+      // 预定义分类
+      const categories = [
+        { id: 'politics', name: '政治', keywords: ['政治', '政府', '选举', '政策', 'politics', 'government'] },
+        { id: 'technology', name: '科技', keywords: ['科技', '技术', '人工智能', 'AI', 'technology', 'tech'] },
+        { id: 'business', name: '商业', keywords: ['商业', '经济', '金融', '市场', 'business', 'economy'] },
+        { id: 'sports', name: '体育', keywords: ['体育', '运动', '比赛', '足球', 'sports', 'game'] },
+        { id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', 'entertainment', 'movie'] },
+        { id: 'health', name: '健康', keywords: ['健康', '医疗', '疾病', '医院', 'health', 'medical'] }
+      ]
+      
+      const text = `${content.title} ${content.content}`.toLowerCase()
+      const results: CategoryResult[] = []
+      
+      for (const category of categories) {
+        let score = 0
+        for (const keyword of category.keywords) {
+          const matches = (text.match(new RegExp(keyword.toLowerCase(), 'g')) || []).length
+          score += matches
+        }
+        
+        if (score > 0) {
+          results.push({
+            categoryId: category.id,
+            categoryName: category.name,
+            confidence: Math.min(1.0, score / 10),
+            level: 1
+          })
+        }
+      }
+      
+      return results.sort((a, b) => b.confidence - a.confidence).slice(0, 3)
+      
+    } catch (error) {
+      console.error('内容分类失败:', error)
+      return []
+    }
+  }
+  
+  /**
+   * 提取关键短语
+   */
+  private extractKeyPhrases(content: string): string[] {
+    try {
+      // 简单的关键短语提取
+      const sentences = content.split(/[.!?。！？]/)
+      const phrases: string[] = []
+      
+      for (const sentence of sentences) {
+        const words = sentence.trim().split(/\s+/)
+        if (words.length >= 2 && words.length <= 5) {
+          phrases.push(sentence.trim())
+        }
+      }
+      
+      return phrases.slice(0, 10)
+      
+    } catch (error) {
+      console.error('关键短语提取失败:', error)
+      return []
+    }
+  }
+  
+  /**
+   * 提取域名
+   */
+  private extractDomain(url: string): string {
+    try {
+      const matches = url.match(/https?:\/\/([^\/]+)/)
+      return matches ? matches[1] : ''
+    } catch (error) {
+      return ''
+    }
+  }
+  
+  /**
+   * 生成缓存键
+   */
+  private generateContentCacheKey(contentId: string, options?: any): string {
+    const optionsStr = JSON.stringify(options || {})
+    return `content-${contentId}-${this.simpleHash(optionsStr)}`
+  }
+  
+  /**
+   * 简单哈希函数
+   */
+  private simpleHash(str: string): string {
+    let hash = 0
+    for (let i = 0; i < str.length; i++) {
+      const char = str.charCodeAt(i)
+      hash = ((hash << 5) - hash) + char
+      hash = hash & hash
+    }
+    return Math.abs(hash).toString(36)
+  }
+}