// AI内容分析服务 // filepath: h:\blews\akmon\uni_modules\ak-ai-news\services\ai-content-analysis-service.uts import { AkReq } from '@/uni_modules/ak-req/index.uts' import type { ContentAnalysisResult, EntityResult, TopicResult, CategoryResult, AIProvider, AIResponse, AIServiceConfig, ContentInfo } from '../types/ai-types.uts' export class AIContentAnalysisService { private config: AIServiceConfig private req: AkReq private cache: Map = new Map() constructor(config: AIServiceConfig) { this.config = config this.req = new AkReq() } /** * 综合内容分析 */ async analyzeContent( content: ContentInfo, options?: { includeEntities?: boolean includeTopics?: boolean includeSentiment?: boolean includeReadability?: boolean includeCredibility?: boolean language?: string } ): Promise> { try { const startTime = Date.now() // 检查缓存 const cacheKey = this.generateContentCacheKey(content.id, options) const cached = this.cache.get(cacheKey) if (cached) { return { success: true, data: cached, processingTimeMs: 0 } } const analysisPromises: Promise[] = [] // 情感分析 if (options?.includeSentiment !== false) { analysisPromises.push(this.analyzeSentiment(content.content, content.title)) } // 实体识别 if (options?.includeEntities !== false) { analysisPromises.push(this.extractEntities(content.content)) } // 主题提取 if (options?.includeTopics !== false) { analysisPromises.push(this.extractTopics(content.content)) } // 可读性分析 if (options?.includeReadability !== false) { analysisPromises.push(this.analyzeReadability(content.content)) } // 可信度分析 if (options?.includeCredibility !== false) { analysisPromises.push(this.analyzeCredibility(content)) } // 并行执行所有分析 const results = await Promise.all(analysisPromises) // 生成摘要 const summary = await this.generateSummary(content.content) // 提取关键词 const keywords = await this.extractKeywords(content.content) // 分类内容 const categories = await this.classifyContent(content) const analysisResult: ContentAnalysisResult = { contentId: content.id, sentimentScore: results[0]?.score || 0, sentimentLabel: results[0]?.label || 'neutral', readabilityScore: results[3] || 0.5, credibilityScore: results[4] || 0.5, toxicityScore: 0, // 可以添加毒性检测 keywords: keywords || [], entities: results[1] || [], topics: results[2] || [], categories: categories || [], summary: summary || '', keyPhrases: this.extractKeyPhrases(content.content), language: options?.language || content.originalLanguage, processingTimeMs: Date.now() - startTime, provider: 'openai' } // 缓存结果 this.cache.set(cacheKey, analysisResult) return { success: true, data: analysisResult, processingTimeMs: analysisResult.processingTimeMs } } catch (error) { console.error('内容分析失败:', error) return { success: false, error: error instanceof Error ? error.message : '内容分析服务异常', errorCode: 'CONTENT_ANALYSIS_FAILED' } } } /** * 情感分析 */ private async analyzeSentiment(content: string, title?: string): Promise<{score: number, label: string}> { try { const openaiConfig = this.config.openai if (!openaiConfig) { throw new Error('OpenAI配置未找到') } const text = title ? `${title}\n\n${content}` : content const prompt = `请分析以下文本的情感倾向,返回一个-1到1之间的数值(-1表示非常负面,0表示中性,1表示非常正面)和对应的标签(positive/negative/neutral)。 文本:${text.substring(0, 2000)} 请以JSON格式返回:{"score": 数值, "label": "标签"}` const response = await this.req.post({ url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`, headers: { 'Authorization': `Bearer ${openaiConfig.apiKey}`, 'Content-Type': 'application/json' }, data: { model: openaiConfig.model, messages: [ { role: 'system', content: '你是一个专业的文本情感分析助手。' }, { role: 'user', content: prompt } ], max_tokens: 100, temperature: 0.1 } }) if (!response.success || !response.data?.choices?.[0]) { throw new Error('情感分析API调用失败') } const result = JSON.parse(response.data.choices[0].message.content) return { score: Math.max(-1, Math.min(1, parseFloat(result.score) || 0)), label: result.label || 'neutral' } } catch (error) { console.error('情感分析失败:', error) return { score: 0, label: 'neutral' } } } /** * 实体识别 */ private async extractEntities(content: string): Promise { try { const openaiConfig = this.config.openai if (!openaiConfig) { return [] } const prompt = `请从以下文本中识别出人名、地名、机构名、日期、金额等实体。 文本:${content.substring(0, 2000)} 请以JSON数组格式返回,每个实体包含:text(实体文本)、type(类型:person/location/organization/date/money/other)、confidence(置信度0-1)。` const response = await this.req.post({ url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`, headers: { 'Authorization': `Bearer ${openaiConfig.apiKey}`, 'Content-Type': 'application/json' }, data: { model: openaiConfig.model, messages: [ { role: 'system', content: '你是一个专业的命名实体识别助手。' }, { role: 'user', content: prompt } ], max_tokens: 500, temperature: 0.1 } }) if (!response.success || !response.data?.choices?.[0]) { return [] } const entities = JSON.parse(response.data.choices[0].message.content) return entities.map((entity: any, index: number) => ({ text: entity.text || '', type: entity.type || 'other', confidence: entity.confidence || 0.8, startPosition: 0, // 简化处理 endPosition: entity.text?.length || 0 })) as EntityResult[] } catch (error) { console.error('实体识别失败:', error) return [] } } /** * 主题提取 */ private async extractTopics(content: string): Promise { try { const openaiConfig = this.config.openai if (!openaiConfig) { return [] } const prompt = `请分析以下文本的主要主题,提取3-5个核心主题。 文本:${content.substring(0, 2000)} 请以JSON数组格式返回,每个主题包含:name(主题名称)、confidence(置信度0-1)、keywords(相关关键词数组)。` const response = await this.req.post({ url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`, headers: { 'Authorization': `Bearer ${openaiConfig.apiKey}`, 'Content-Type': 'application/json' }, data: { model: openaiConfig.model, messages: [ { role: 'system', content: '你是一个专业的文本主题分析助手。' }, { role: 'user', content: prompt } ], max_tokens: 400, temperature: 0.2 } }) if (!response.success || !response.data?.choices?.[0]) { return [] } const topics = JSON.parse(response.data.choices[0].message.content) return topics.map((topic: any) => ({ name: topic.name || '', confidence: topic.confidence || 0.8, keywords: topic.keywords || [] })) as TopicResult[] } catch (error) { console.error('主题提取失败:', error) return [] } } /** * 可读性分析 */ private async analyzeReadability(content: string): Promise { try { // 简化的可读性计算 const sentences = content.split(/[.!?。!?]/).length const words = content.split(/\s+/).length const avgWordsPerSentence = words / sentences // 基于平均句长计算可读性分数 let score = 1.0 if (avgWordsPerSentence > 30) score = 0.3 else if (avgWordsPerSentence > 20) score = 0.5 else if (avgWordsPerSentence > 15) score = 0.7 else if (avgWordsPerSentence > 10) score = 0.9 return score } catch (error) { console.error('可读性分析失败:', error) return 0.5 } } /** * 可信度分析 */ private async analyzeCredibility(content: ContentInfo): Promise { try { let score = 0.5 // 基础分数 // 来源可信度 if (content.sourceUrl) { const domain = this.extractDomain(content.sourceUrl) const credibleDomains = ['reuters.com', 'bbc.com', 'xinhuanet.com', 'nhk.or.jp'] if (credibleDomains.some(d => domain.includes(d))) { score += 0.2 } } // 作者信息 if (content.author && content.author.length > 0) { score += 0.1 } // 内容长度和结构 if (content.content.length > 500) { score += 0.1 } // 时效性 const daysSincePublished = (Date.now() - content.publishedAt) / (1000 * 60 * 60 * 24) if (daysSincePublished < 1) { score += 0.1 } return Math.min(1.0, score) } catch (error) { console.error('可信度分析失败:', error) return 0.5 } } /** * 生成摘要 */ private async generateSummary(content: string): Promise { try { const openaiConfig = this.config.openai if (!openaiConfig) { return '' } if (content.length < 200) { return content } const prompt = `请为以下文本生成一个简洁的摘要(100字以内):\n\n${content.substring(0, 2000)}` const response = await this.req.post({ url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`, headers: { 'Authorization': `Bearer ${openaiConfig.apiKey}`, 'Content-Type': 'application/json' }, data: { model: openaiConfig.model, messages: [ { role: 'system', content: '你是一个专业的文本摘要助手。' }, { role: 'user', content: prompt } ], max_tokens: 200, temperature: 0.3 } }) if (!response.success || !response.data?.choices?.[0]) { return '' } return response.data.choices[0].message.content.trim() } catch (error) { console.error('摘要生成失败:', error) return '' } } /** * 提取关键词 */ private async extractKeywords(content: string): Promise { try { const openaiConfig = this.config.openai if (!openaiConfig) { return [] } const prompt = `请从以下文本中提取5-10个关键词:\n\n${content.substring(0, 1500)}\n\n请以JSON数组格式返回关键词。` const response = await this.req.post({ url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`, headers: { 'Authorization': `Bearer ${openaiConfig.apiKey}`, 'Content-Type': 'application/json' }, data: { model: openaiConfig.model, messages: [ { role: 'system', content: '你是一个专业的关键词提取助手。' }, { role: 'user', content: prompt } ], max_tokens: 200, temperature: 0.1 } }) if (!response.success || !response.data?.choices?.[0]) { return [] } const keywords = JSON.parse(response.data.choices[0].message.content) return Array.isArray(keywords) ? keywords : [] } catch (error) { console.error('关键词提取失败:', error) return [] } } /** * 内容分类 */ private async classifyContent(content: ContentInfo): Promise { try { // 预定义分类 const categories = [ { id: 'politics', name: '政治', keywords: ['政治', '政府', '选举', '政策', 'politics', 'government'] }, { id: 'technology', name: '科技', keywords: ['科技', '技术', '人工智能', 'AI', 'technology', 'tech'] }, { id: 'business', name: '商业', keywords: ['商业', '经济', '金融', '市场', 'business', 'economy'] }, { id: 'sports', name: '体育', keywords: ['体育', '运动', '比赛', '足球', 'sports', 'game'] }, { id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', 'entertainment', 'movie'] }, { id: 'health', name: '健康', keywords: ['健康', '医疗', '疾病', '医院', 'health', 'medical'] } ] const text = `${content.title} ${content.content}`.toLowerCase() const results: CategoryResult[] = [] for (const category of categories) { let score = 0 for (const keyword of category.keywords) { const matches = (text.match(new RegExp(keyword.toLowerCase(), 'g')) || []).length score += matches } if (score > 0) { results.push({ categoryId: category.id, categoryName: category.name, confidence: Math.min(1.0, score / 10), level: 1 }) } } return results.sort((a, b) => b.confidence - a.confidence).slice(0, 3) } catch (error) { console.error('内容分类失败:', error) return [] } } /** * 提取关键短语 */ private extractKeyPhrases(content: string): string[] { try { // 简单的关键短语提取 const sentences = content.split(/[.!?。!?]/) const phrases: string[] = [] for (const sentence of sentences) { const words = sentence.trim().split(/\s+/) if (words.length >= 2 && words.length <= 5) { phrases.push(sentence.trim()) } } return phrases.slice(0, 10) } catch (error) { console.error('关键短语提取失败:', error) return [] } } /** * 提取域名 */ private extractDomain(url: string): string { try { const matches = url.match(/https?:\/\/([^\/]+)/) return matches ? matches[1] : '' } catch (error) { return '' } } /** * 生成缓存键 */ private generateContentCacheKey(contentId: string, options?: any): string { const optionsStr = JSON.stringify(options || {}) return `content-${contentId}-${this.simpleHash(optionsStr)}` } /** * 简单哈希函数 */ private simpleHash(str: string): string { let hash = 0 for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i) hash = ((hash << 5) - hash) + char hash = hash & hash } return Math.abs(hash).toString(36) } }