Files
akmon/uni_modules/ak-ai-news/services/ai-content-analysis-service.uts
2026-01-20 08:04:15 +08:00

564 lines
16 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// AI内容分析服务
// filepath: h:\blews\akmon\uni_modules\ak-ai-news\services\ai-content-analysis-service.uts
import { AkReq } from '@/uni_modules/ak-req/index.uts'
import type {
ContentAnalysisResult,
EntityResult,
TopicResult,
CategoryResult,
AIProvider,
AIResponse,
AIServiceConfig,
ContentInfo
} from '../types/ai-types.uts'
export class AIContentAnalysisService {
private config: AIServiceConfig
private req: AkReq
private cache: Map<string, ContentAnalysisResult> = new Map()
constructor(config: AIServiceConfig) {
this.config = config
this.req = new AkReq()
}
/**
* 综合内容分析
*/
async analyzeContent(
content: ContentInfo,
options?: {
includeEntities?: boolean
includeTopics?: boolean
includeSentiment?: boolean
includeReadability?: boolean
includeCredibility?: boolean
language?: string
}
): Promise<AIResponse<ContentAnalysisResult>> {
try {
const startTime = Date.now()
// 检查缓存
const cacheKey = this.generateContentCacheKey(content.id, options)
const cached = this.cache.get(cacheKey)
if (cached) {
return {
success: true,
data: cached,
processingTimeMs: 0
}
}
const analysisPromises: Promise<any>[] = []
// 情感分析
if (options?.includeSentiment !== false) {
analysisPromises.push(this.analyzeSentiment(content.content, content.title))
}
// 实体识别
if (options?.includeEntities !== false) {
analysisPromises.push(this.extractEntities(content.content))
}
// 主题提取
if (options?.includeTopics !== false) {
analysisPromises.push(this.extractTopics(content.content))
}
// 可读性分析
if (options?.includeReadability !== false) {
analysisPromises.push(this.analyzeReadability(content.content))
}
// 可信度分析
if (options?.includeCredibility !== false) {
analysisPromises.push(this.analyzeCredibility(content))
}
// 并行执行所有分析
const results = await Promise.all(analysisPromises)
// 生成摘要
const summary = await this.generateSummary(content.content)
// 提取关键词
const keywords = await this.extractKeywords(content.content)
// 分类内容
const categories = await this.classifyContent(content)
const analysisResult: ContentAnalysisResult = {
contentId: content.id,
sentimentScore: results[0]?.score || 0,
sentimentLabel: results[0]?.label || 'neutral',
readabilityScore: results[3] || 0.5,
credibilityScore: results[4] || 0.5,
toxicityScore: 0, // 可以添加毒性检测
keywords: keywords || [],
entities: results[1] || [],
topics: results[2] || [],
categories: categories || [],
summary: summary || '',
keyPhrases: this.extractKeyPhrases(content.content),
language: options?.language || content.originalLanguage,
processingTimeMs: Date.now() - startTime,
provider: 'openai'
}
// 缓存结果
this.cache.set(cacheKey, analysisResult)
return {
success: true,
data: analysisResult,
processingTimeMs: analysisResult.processingTimeMs
}
} catch (error) {
console.error('内容分析失败:', error)
return {
success: false,
error: error instanceof Error ? error.message : '内容分析服务异常',
errorCode: 'CONTENT_ANALYSIS_FAILED'
}
}
}
/**
* 情感分析
*/
private async analyzeSentiment(content: string, title?: string): Promise<{score: number, label: string}> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
throw new Error('OpenAI配置未找到')
}
const text = title ? `${title}\n\n${content}` : content
const prompt = `请分析以下文本的情感倾向,返回一个-1到1之间的数值-1表示非常负面0表示中性1表示非常正面和对应的标签positive/negative/neutral
文本:${text.substring(0, 2000)}
请以JSON格式返回{"score": 数值, "label": "标签"}`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的文本情感分析助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 100,
temperature: 0.1
}
})
if (!response.success || !response.data?.choices?.[0]) {
throw new Error('情感分析API调用失败')
}
const result = JSON.parse(response.data.choices[0].message.content)
return {
score: Math.max(-1, Math.min(1, parseFloat(result.score) || 0)),
label: result.label || 'neutral'
}
} catch (error) {
console.error('情感分析失败:', error)
return { score: 0, label: 'neutral' }
}
}
/**
* 实体识别
*/
private async extractEntities(content: string): Promise<EntityResult[]> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return []
}
const prompt = `请从以下文本中识别出人名、地名、机构名、日期、金额等实体。
文本:${content.substring(0, 2000)}
请以JSON数组格式返回每个实体包含text(实体文本)、type(类型person/location/organization/date/money/other)、confidence(置信度0-1)。`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的命名实体识别助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 500,
temperature: 0.1
}
})
if (!response.success || !response.data?.choices?.[0]) {
return []
}
const entities = JSON.parse(response.data.choices[0].message.content)
return entities.map((entity: any, index: number) => ({
text: entity.text || '',
type: entity.type || 'other',
confidence: entity.confidence || 0.8,
startPosition: 0, // 简化处理
endPosition: entity.text?.length || 0
})) as EntityResult[]
} catch (error) {
console.error('实体识别失败:', error)
return []
}
}
/**
* 主题提取
*/
private async extractTopics(content: string): Promise<TopicResult[]> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return []
}
const prompt = `请分析以下文本的主要主题提取3-5个核心主题。
文本:${content.substring(0, 2000)}
请以JSON数组格式返回每个主题包含name(主题名称)、confidence(置信度0-1)、keywords(相关关键词数组)。`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的文本主题分析助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 400,
temperature: 0.2
}
})
if (!response.success || !response.data?.choices?.[0]) {
return []
}
const topics = JSON.parse(response.data.choices[0].message.content)
return topics.map((topic: any) => ({
name: topic.name || '',
confidence: topic.confidence || 0.8,
keywords: topic.keywords || []
})) as TopicResult[]
} catch (error) {
console.error('主题提取失败:', error)
return []
}
}
/**
* 可读性分析
*/
private async analyzeReadability(content: string): Promise<number> {
try {
// 简化的可读性计算
const sentences = content.split(/[.!?。!?]/).length
const words = content.split(/\s+/).length
const avgWordsPerSentence = words / sentences
// 基于平均句长计算可读性分数
let score = 1.0
if (avgWordsPerSentence > 30) score = 0.3
else if (avgWordsPerSentence > 20) score = 0.5
else if (avgWordsPerSentence > 15) score = 0.7
else if (avgWordsPerSentence > 10) score = 0.9
return score
} catch (error) {
console.error('可读性分析失败:', error)
return 0.5
}
}
/**
* 可信度分析
*/
private async analyzeCredibility(content: ContentInfo): Promise<number> {
try {
let score = 0.5 // 基础分数
// 来源可信度
if (content.sourceUrl) {
const domain = this.extractDomain(content.sourceUrl)
const credibleDomains = ['reuters.com', 'bbc.com', 'xinhuanet.com', 'nhk.or.jp']
if (credibleDomains.some(d => domain.includes(d))) {
score += 0.2
}
}
// 作者信息
if (content.author && content.author.length > 0) {
score += 0.1
}
// 内容长度和结构
if (content.content.length > 500) {
score += 0.1
}
// 时效性
const daysSincePublished = (Date.now() - content.publishedAt) / (1000 * 60 * 60 * 24)
if (daysSincePublished < 1) {
score += 0.1
}
return Math.min(1.0, score)
} catch (error) {
console.error('可信度分析失败:', error)
return 0.5
}
}
/**
* 生成摘要
*/
private async generateSummary(content: string): Promise<string> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return ''
}
if (content.length < 200) {
return content
}
const prompt = `请为以下文本生成一个简洁的摘要100字以内\n\n${content.substring(0, 2000)}`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的文本摘要助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 200,
temperature: 0.3
}
})
if (!response.success || !response.data?.choices?.[0]) {
return ''
}
return response.data.choices[0].message.content.trim()
} catch (error) {
console.error('摘要生成失败:', error)
return ''
}
}
/**
* 提取关键词
*/
private async extractKeywords(content: string): Promise<string[]> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return []
}
const prompt = `请从以下文本中提取5-10个关键词\n\n${content.substring(0, 1500)}\n\n请以JSON数组格式返回关键词。`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的关键词提取助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 200,
temperature: 0.1
}
})
if (!response.success || !response.data?.choices?.[0]) {
return []
}
const keywords = JSON.parse(response.data.choices[0].message.content)
return Array.isArray(keywords) ? keywords : []
} catch (error) {
console.error('关键词提取失败:', error)
return []
}
}
/**
* 内容分类
*/
private async classifyContent(content: ContentInfo): Promise<CategoryResult[]> {
try {
// 预定义分类
const categories = [
{ id: 'politics', name: '政治', keywords: ['政治', '政府', '选举', '政策', 'politics', 'government'] },
{ id: 'technology', name: '科技', keywords: ['科技', '技术', '人工智能', 'AI', 'technology', 'tech'] },
{ id: 'business', name: '商业', keywords: ['商业', '经济', '金融', '市场', 'business', 'economy'] },
{ id: 'sports', name: '体育', keywords: ['体育', '运动', '比赛', '足球', 'sports', 'game'] },
{ id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', 'entertainment', 'movie'] },
{ id: 'health', name: '健康', keywords: ['健康', '医疗', '疾病', '医院', 'health', 'medical'] }
]
const text = `${content.title} ${content.content}`.toLowerCase()
const results: CategoryResult[] = []
for (const category of categories) {
let score = 0
for (const keyword of category.keywords) {
const matches = (text.match(new RegExp(keyword.toLowerCase(), 'g')) || []).length
score += matches
}
if (score > 0) {
results.push({
categoryId: category.id,
categoryName: category.name,
confidence: Math.min(1.0, score / 10),
level: 1
})
}
}
return results.sort((a, b) => b.confidence - a.confidence).slice(0, 3)
} catch (error) {
console.error('内容分类失败:', error)
return []
}
}
/**
* 提取关键短语
*/
private extractKeyPhrases(content: string): string[] {
try {
// 简单的关键短语提取
const sentences = content.split(/[.!?。!?]/)
const phrases: string[] = []
for (const sentence of sentences) {
const words = sentence.trim().split(/\s+/)
if (words.length >= 2 && words.length <= 5) {
phrases.push(sentence.trim())
}
}
return phrases.slice(0, 10)
} catch (error) {
console.error('关键短语提取失败:', error)
return []
}
}
/**
* 提取域名
*/
private extractDomain(url: string): string {
try {
const matches = url.match(/https?:\/\/([^\/]+)/)
return matches ? matches[1] : ''
} catch (error) {
return ''
}
}
/**
* 生成缓存键
*/
private generateContentCacheKey(contentId: string, options?: any): string {
const optionsStr = JSON.stringify(options || {})
return `content-${contentId}-${this.simpleHash(optionsStr)}`
}
/**
* 简单哈希函数
*/
private simpleHash(str: string): string {
let hash = 0
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i)
hash = ((hash << 5) - hash) + char
hash = hash & hash
}
return Math.abs(hash).toString(36)
}
}