564 lines
16 KiB
Plaintext
564 lines
16 KiB
Plaintext
// AI内容分析服务
|
||
// filepath: h:\blews\akmon\uni_modules\ak-ai-news\services\ai-content-analysis-service.uts
|
||
|
||
import { AkReq } from '@/uni_modules/ak-req/index.uts'
|
||
import type {
|
||
ContentAnalysisResult,
|
||
EntityResult,
|
||
TopicResult,
|
||
CategoryResult,
|
||
AIProvider,
|
||
AIResponse,
|
||
AIServiceConfig,
|
||
ContentInfo
|
||
} from '../types/ai-types.uts'
|
||
|
||
export class AIContentAnalysisService {
|
||
private config: AIServiceConfig
|
||
private req: AkReq
|
||
private cache: Map<string, ContentAnalysisResult> = new Map()
|
||
|
||
constructor(config: AIServiceConfig) {
|
||
this.config = config
|
||
this.req = new AkReq()
|
||
}
|
||
|
||
/**
|
||
* 综合内容分析
|
||
*/
|
||
async analyzeContent(
|
||
content: ContentInfo,
|
||
options?: {
|
||
includeEntities?: boolean
|
||
includeTopics?: boolean
|
||
includeSentiment?: boolean
|
||
includeReadability?: boolean
|
||
includeCredibility?: boolean
|
||
language?: string
|
||
}
|
||
): Promise<AIResponse<ContentAnalysisResult>> {
|
||
try {
|
||
const startTime = Date.now()
|
||
|
||
// 检查缓存
|
||
const cacheKey = this.generateContentCacheKey(content.id, options)
|
||
const cached = this.cache.get(cacheKey)
|
||
if (cached) {
|
||
return {
|
||
success: true,
|
||
data: cached,
|
||
processingTimeMs: 0
|
||
}
|
||
}
|
||
|
||
const analysisPromises: Promise<any>[] = []
|
||
|
||
// 情感分析
|
||
if (options?.includeSentiment !== false) {
|
||
analysisPromises.push(this.analyzeSentiment(content.content, content.title))
|
||
}
|
||
|
||
// 实体识别
|
||
if (options?.includeEntities !== false) {
|
||
analysisPromises.push(this.extractEntities(content.content))
|
||
}
|
||
|
||
// 主题提取
|
||
if (options?.includeTopics !== false) {
|
||
analysisPromises.push(this.extractTopics(content.content))
|
||
}
|
||
|
||
// 可读性分析
|
||
if (options?.includeReadability !== false) {
|
||
analysisPromises.push(this.analyzeReadability(content.content))
|
||
}
|
||
|
||
// 可信度分析
|
||
if (options?.includeCredibility !== false) {
|
||
analysisPromises.push(this.analyzeCredibility(content))
|
||
}
|
||
|
||
// 并行执行所有分析
|
||
const results = await Promise.all(analysisPromises)
|
||
|
||
// 生成摘要
|
||
const summary = await this.generateSummary(content.content)
|
||
|
||
// 提取关键词
|
||
const keywords = await this.extractKeywords(content.content)
|
||
|
||
// 分类内容
|
||
const categories = await this.classifyContent(content)
|
||
|
||
const analysisResult: ContentAnalysisResult = {
|
||
contentId: content.id,
|
||
sentimentScore: results[0]?.score || 0,
|
||
sentimentLabel: results[0]?.label || 'neutral',
|
||
readabilityScore: results[3] || 0.5,
|
||
credibilityScore: results[4] || 0.5,
|
||
toxicityScore: 0, // 可以添加毒性检测
|
||
keywords: keywords || [],
|
||
entities: results[1] || [],
|
||
topics: results[2] || [],
|
||
categories: categories || [],
|
||
summary: summary || '',
|
||
keyPhrases: this.extractKeyPhrases(content.content),
|
||
language: options?.language || content.originalLanguage,
|
||
processingTimeMs: Date.now() - startTime,
|
||
provider: 'openai'
|
||
}
|
||
|
||
// 缓存结果
|
||
this.cache.set(cacheKey, analysisResult)
|
||
|
||
return {
|
||
success: true,
|
||
data: analysisResult,
|
||
processingTimeMs: analysisResult.processingTimeMs
|
||
}
|
||
|
||
} catch (error) {
|
||
console.error('内容分析失败:', error)
|
||
return {
|
||
success: false,
|
||
error: error instanceof Error ? error.message : '内容分析服务异常',
|
||
errorCode: 'CONTENT_ANALYSIS_FAILED'
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 情感分析
|
||
*/
|
||
private async analyzeSentiment(content: string, title?: string): Promise<{score: number, label: string}> {
|
||
try {
|
||
const openaiConfig = this.config.openai
|
||
if (!openaiConfig) {
|
||
throw new Error('OpenAI配置未找到')
|
||
}
|
||
|
||
const text = title ? `${title}\n\n${content}` : content
|
||
const prompt = `请分析以下文本的情感倾向,返回一个-1到1之间的数值(-1表示非常负面,0表示中性,1表示非常正面)和对应的标签(positive/negative/neutral)。
|
||
|
||
文本:${text.substring(0, 2000)}
|
||
|
||
请以JSON格式返回:{"score": 数值, "label": "标签"}`
|
||
|
||
const response = await this.req.post<any>({
|
||
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
|
||
headers: {
|
||
'Authorization': `Bearer ${openaiConfig.apiKey}`,
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
model: openaiConfig.model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: '你是一个专业的文本情感分析助手。'
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: prompt
|
||
}
|
||
],
|
||
max_tokens: 100,
|
||
temperature: 0.1
|
||
}
|
||
})
|
||
|
||
if (!response.success || !response.data?.choices?.[0]) {
|
||
throw new Error('情感分析API调用失败')
|
||
}
|
||
|
||
const result = JSON.parse(response.data.choices[0].message.content)
|
||
return {
|
||
score: Math.max(-1, Math.min(1, parseFloat(result.score) || 0)),
|
||
label: result.label || 'neutral'
|
||
}
|
||
|
||
} catch (error) {
|
||
console.error('情感分析失败:', error)
|
||
return { score: 0, label: 'neutral' }
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 实体识别
|
||
*/
|
||
private async extractEntities(content: string): Promise<EntityResult[]> {
|
||
try {
|
||
const openaiConfig = this.config.openai
|
||
if (!openaiConfig) {
|
||
return []
|
||
}
|
||
|
||
const prompt = `请从以下文本中识别出人名、地名、机构名、日期、金额等实体。
|
||
|
||
文本:${content.substring(0, 2000)}
|
||
|
||
请以JSON数组格式返回,每个实体包含:text(实体文本)、type(类型:person/location/organization/date/money/other)、confidence(置信度0-1)。`
|
||
|
||
const response = await this.req.post<any>({
|
||
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
|
||
headers: {
|
||
'Authorization': `Bearer ${openaiConfig.apiKey}`,
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
model: openaiConfig.model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: '你是一个专业的命名实体识别助手。'
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: prompt
|
||
}
|
||
],
|
||
max_tokens: 500,
|
||
temperature: 0.1
|
||
}
|
||
})
|
||
|
||
if (!response.success || !response.data?.choices?.[0]) {
|
||
return []
|
||
}
|
||
|
||
const entities = JSON.parse(response.data.choices[0].message.content)
|
||
return entities.map((entity: any, index: number) => ({
|
||
text: entity.text || '',
|
||
type: entity.type || 'other',
|
||
confidence: entity.confidence || 0.8,
|
||
startPosition: 0, // 简化处理
|
||
endPosition: entity.text?.length || 0
|
||
})) as EntityResult[]
|
||
|
||
} catch (error) {
|
||
console.error('实体识别失败:', error)
|
||
return []
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 主题提取
|
||
*/
|
||
private async extractTopics(content: string): Promise<TopicResult[]> {
|
||
try {
|
||
const openaiConfig = this.config.openai
|
||
if (!openaiConfig) {
|
||
return []
|
||
}
|
||
|
||
const prompt = `请分析以下文本的主要主题,提取3-5个核心主题。
|
||
|
||
文本:${content.substring(0, 2000)}
|
||
|
||
请以JSON数组格式返回,每个主题包含:name(主题名称)、confidence(置信度0-1)、keywords(相关关键词数组)。`
|
||
|
||
const response = await this.req.post<any>({
|
||
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
|
||
headers: {
|
||
'Authorization': `Bearer ${openaiConfig.apiKey}`,
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
model: openaiConfig.model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: '你是一个专业的文本主题分析助手。'
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: prompt
|
||
}
|
||
],
|
||
max_tokens: 400,
|
||
temperature: 0.2
|
||
}
|
||
})
|
||
|
||
if (!response.success || !response.data?.choices?.[0]) {
|
||
return []
|
||
}
|
||
|
||
const topics = JSON.parse(response.data.choices[0].message.content)
|
||
return topics.map((topic: any) => ({
|
||
name: topic.name || '',
|
||
confidence: topic.confidence || 0.8,
|
||
keywords: topic.keywords || []
|
||
})) as TopicResult[]
|
||
|
||
} catch (error) {
|
||
console.error('主题提取失败:', error)
|
||
return []
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 可读性分析
|
||
*/
|
||
private async analyzeReadability(content: string): Promise<number> {
|
||
try {
|
||
// 简化的可读性计算
|
||
const sentences = content.split(/[.!?。!?]/).length
|
||
const words = content.split(/\s+/).length
|
||
const avgWordsPerSentence = words / sentences
|
||
|
||
// 基于平均句长计算可读性分数
|
||
let score = 1.0
|
||
if (avgWordsPerSentence > 30) score = 0.3
|
||
else if (avgWordsPerSentence > 20) score = 0.5
|
||
else if (avgWordsPerSentence > 15) score = 0.7
|
||
else if (avgWordsPerSentence > 10) score = 0.9
|
||
|
||
return score
|
||
|
||
} catch (error) {
|
||
console.error('可读性分析失败:', error)
|
||
return 0.5
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 可信度分析
|
||
*/
|
||
private async analyzeCredibility(content: ContentInfo): Promise<number> {
|
||
try {
|
||
let score = 0.5 // 基础分数
|
||
|
||
// 来源可信度
|
||
if (content.sourceUrl) {
|
||
const domain = this.extractDomain(content.sourceUrl)
|
||
const credibleDomains = ['reuters.com', 'bbc.com', 'xinhuanet.com', 'nhk.or.jp']
|
||
if (credibleDomains.some(d => domain.includes(d))) {
|
||
score += 0.2
|
||
}
|
||
}
|
||
|
||
// 作者信息
|
||
if (content.author && content.author.length > 0) {
|
||
score += 0.1
|
||
}
|
||
|
||
// 内容长度和结构
|
||
if (content.content.length > 500) {
|
||
score += 0.1
|
||
}
|
||
|
||
// 时效性
|
||
const daysSincePublished = (Date.now() - content.publishedAt) / (1000 * 60 * 60 * 24)
|
||
if (daysSincePublished < 1) {
|
||
score += 0.1
|
||
}
|
||
|
||
return Math.min(1.0, score)
|
||
|
||
} catch (error) {
|
||
console.error('可信度分析失败:', error)
|
||
return 0.5
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 生成摘要
|
||
*/
|
||
private async generateSummary(content: string): Promise<string> {
|
||
try {
|
||
const openaiConfig = this.config.openai
|
||
if (!openaiConfig) {
|
||
return ''
|
||
}
|
||
|
||
if (content.length < 200) {
|
||
return content
|
||
}
|
||
|
||
const prompt = `请为以下文本生成一个简洁的摘要(100字以内):\n\n${content.substring(0, 2000)}`
|
||
|
||
const response = await this.req.post<any>({
|
||
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
|
||
headers: {
|
||
'Authorization': `Bearer ${openaiConfig.apiKey}`,
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
model: openaiConfig.model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: '你是一个专业的文本摘要助手。'
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: prompt
|
||
}
|
||
],
|
||
max_tokens: 200,
|
||
temperature: 0.3
|
||
}
|
||
})
|
||
|
||
if (!response.success || !response.data?.choices?.[0]) {
|
||
return ''
|
||
}
|
||
|
||
return response.data.choices[0].message.content.trim()
|
||
|
||
} catch (error) {
|
||
console.error('摘要生成失败:', error)
|
||
return ''
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 提取关键词
|
||
*/
|
||
private async extractKeywords(content: string): Promise<string[]> {
|
||
try {
|
||
const openaiConfig = this.config.openai
|
||
if (!openaiConfig) {
|
||
return []
|
||
}
|
||
|
||
const prompt = `请从以下文本中提取5-10个关键词:\n\n${content.substring(0, 1500)}\n\n请以JSON数组格式返回关键词。`
|
||
|
||
const response = await this.req.post<any>({
|
||
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
|
||
headers: {
|
||
'Authorization': `Bearer ${openaiConfig.apiKey}`,
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
model: openaiConfig.model,
|
||
messages: [
|
||
{
|
||
role: 'system',
|
||
content: '你是一个专业的关键词提取助手。'
|
||
},
|
||
{
|
||
role: 'user',
|
||
content: prompt
|
||
}
|
||
],
|
||
max_tokens: 200,
|
||
temperature: 0.1
|
||
}
|
||
})
|
||
|
||
if (!response.success || !response.data?.choices?.[0]) {
|
||
return []
|
||
}
|
||
|
||
const keywords = JSON.parse(response.data.choices[0].message.content)
|
||
return Array.isArray(keywords) ? keywords : []
|
||
|
||
} catch (error) {
|
||
console.error('关键词提取失败:', error)
|
||
return []
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 内容分类
|
||
*/
|
||
private async classifyContent(content: ContentInfo): Promise<CategoryResult[]> {
|
||
try {
|
||
// 预定义分类
|
||
const categories = [
|
||
{ id: 'politics', name: '政治', keywords: ['政治', '政府', '选举', '政策', 'politics', 'government'] },
|
||
{ id: 'technology', name: '科技', keywords: ['科技', '技术', '人工智能', 'AI', 'technology', 'tech'] },
|
||
{ id: 'business', name: '商业', keywords: ['商业', '经济', '金融', '市场', 'business', 'economy'] },
|
||
{ id: 'sports', name: '体育', keywords: ['体育', '运动', '比赛', '足球', 'sports', 'game'] },
|
||
{ id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', 'entertainment', 'movie'] },
|
||
{ id: 'health', name: '健康', keywords: ['健康', '医疗', '疾病', '医院', 'health', 'medical'] }
|
||
]
|
||
|
||
const text = `${content.title} ${content.content}`.toLowerCase()
|
||
const results: CategoryResult[] = []
|
||
|
||
for (const category of categories) {
|
||
let score = 0
|
||
for (const keyword of category.keywords) {
|
||
const matches = (text.match(new RegExp(keyword.toLowerCase(), 'g')) || []).length
|
||
score += matches
|
||
}
|
||
|
||
if (score > 0) {
|
||
results.push({
|
||
categoryId: category.id,
|
||
categoryName: category.name,
|
||
confidence: Math.min(1.0, score / 10),
|
||
level: 1
|
||
})
|
||
}
|
||
}
|
||
|
||
return results.sort((a, b) => b.confidence - a.confidence).slice(0, 3)
|
||
|
||
} catch (error) {
|
||
console.error('内容分类失败:', error)
|
||
return []
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 提取关键短语
|
||
*/
|
||
private extractKeyPhrases(content: string): string[] {
|
||
try {
|
||
// 简单的关键短语提取
|
||
const sentences = content.split(/[.!?。!?]/)
|
||
const phrases: string[] = []
|
||
|
||
for (const sentence of sentences) {
|
||
const words = sentence.trim().split(/\s+/)
|
||
if (words.length >= 2 && words.length <= 5) {
|
||
phrases.push(sentence.trim())
|
||
}
|
||
}
|
||
|
||
return phrases.slice(0, 10)
|
||
|
||
} catch (error) {
|
||
console.error('关键短语提取失败:', error)
|
||
return []
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 提取域名
|
||
*/
|
||
private extractDomain(url: string): string {
|
||
try {
|
||
const matches = url.match(/https?:\/\/([^\/]+)/)
|
||
return matches ? matches[1] : ''
|
||
} catch (error) {
|
||
return ''
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 生成缓存键
|
||
*/
|
||
private generateContentCacheKey(contentId: string, options?: any): string {
|
||
const optionsStr = JSON.stringify(options || {})
|
||
return `content-${contentId}-${this.simpleHash(optionsStr)}`
|
||
}
|
||
|
||
/**
|
||
* 简单哈希函数
|
||
*/
|
||
private simpleHash(str: string): string {
|
||
let hash = 0
|
||
for (let i = 0; i < str.length; i++) {
|
||
const char = str.charCodeAt(i)
|
||
hash = ((hash << 5) - hash) + char
|
||
hash = hash & hash
|
||
}
|
||
return Math.abs(hash).toString(36)
|
||
}
|
||
}
|