Initial commit of akmon project

This commit is contained in:
2026-01-20 08:04:15 +08:00
commit 77a2bab985
1309 changed files with 343305 additions and 0 deletions

View File

@@ -0,0 +1,563 @@
// AI内容分析服务
// filepath: h:\blews\akmon\uni_modules\ak-ai-news\services\ai-content-analysis-service.uts
import { AkReq } from '@/uni_modules/ak-req/index.uts'
import type {
ContentAnalysisResult,
EntityResult,
TopicResult,
CategoryResult,
AIProvider,
AIResponse,
AIServiceConfig,
ContentInfo
} from '../types/ai-types.uts'
export class AIContentAnalysisService {
private config: AIServiceConfig
private req: AkReq
private cache: Map<string, ContentAnalysisResult> = new Map()
constructor(config: AIServiceConfig) {
this.config = config
this.req = new AkReq()
}
/**
* 综合内容分析
*/
async analyzeContent(
content: ContentInfo,
options?: {
includeEntities?: boolean
includeTopics?: boolean
includeSentiment?: boolean
includeReadability?: boolean
includeCredibility?: boolean
language?: string
}
): Promise<AIResponse<ContentAnalysisResult>> {
try {
const startTime = Date.now()
// 检查缓存
const cacheKey = this.generateContentCacheKey(content.id, options)
const cached = this.cache.get(cacheKey)
if (cached) {
return {
success: true,
data: cached,
processingTimeMs: 0
}
}
const analysisPromises: Promise<any>[] = []
// 情感分析
if (options?.includeSentiment !== false) {
analysisPromises.push(this.analyzeSentiment(content.content, content.title))
}
// 实体识别
if (options?.includeEntities !== false) {
analysisPromises.push(this.extractEntities(content.content))
}
// 主题提取
if (options?.includeTopics !== false) {
analysisPromises.push(this.extractTopics(content.content))
}
// 可读性分析
if (options?.includeReadability !== false) {
analysisPromises.push(this.analyzeReadability(content.content))
}
// 可信度分析
if (options?.includeCredibility !== false) {
analysisPromises.push(this.analyzeCredibility(content))
}
// 并行执行所有分析
const results = await Promise.all(analysisPromises)
// 生成摘要
const summary = await this.generateSummary(content.content)
// 提取关键词
const keywords = await this.extractKeywords(content.content)
// 分类内容
const categories = await this.classifyContent(content)
const analysisResult: ContentAnalysisResult = {
contentId: content.id,
sentimentScore: results[0]?.score || 0,
sentimentLabel: results[0]?.label || 'neutral',
readabilityScore: results[3] || 0.5,
credibilityScore: results[4] || 0.5,
toxicityScore: 0, // 可以添加毒性检测
keywords: keywords || [],
entities: results[1] || [],
topics: results[2] || [],
categories: categories || [],
summary: summary || '',
keyPhrases: this.extractKeyPhrases(content.content),
language: options?.language || content.originalLanguage,
processingTimeMs: Date.now() - startTime,
provider: 'openai'
}
// 缓存结果
this.cache.set(cacheKey, analysisResult)
return {
success: true,
data: analysisResult,
processingTimeMs: analysisResult.processingTimeMs
}
} catch (error) {
console.error('内容分析失败:', error)
return {
success: false,
error: error instanceof Error ? error.message : '内容分析服务异常',
errorCode: 'CONTENT_ANALYSIS_FAILED'
}
}
}
/**
* 情感分析
*/
private async analyzeSentiment(content: string, title?: string): Promise<{score: number, label: string}> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
throw new Error('OpenAI配置未找到')
}
const text = title ? `${title}\n\n${content}` : content
const prompt = `请分析以下文本的情感倾向,返回一个-1到1之间的数值-1表示非常负面0表示中性1表示非常正面和对应的标签positive/negative/neutral
文本:${text.substring(0, 2000)}
请以JSON格式返回{"score": 数值, "label": "标签"}`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的文本情感分析助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 100,
temperature: 0.1
}
})
if (!response.success || !response.data?.choices?.[0]) {
throw new Error('情感分析API调用失败')
}
const result = JSON.parse(response.data.choices[0].message.content)
return {
score: Math.max(-1, Math.min(1, parseFloat(result.score) || 0)),
label: result.label || 'neutral'
}
} catch (error) {
console.error('情感分析失败:', error)
return { score: 0, label: 'neutral' }
}
}
/**
* 实体识别
*/
private async extractEntities(content: string): Promise<EntityResult[]> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return []
}
const prompt = `请从以下文本中识别出人名、地名、机构名、日期、金额等实体。
文本:${content.substring(0, 2000)}
请以JSON数组格式返回每个实体包含text(实体文本)、type(类型person/location/organization/date/money/other)、confidence(置信度0-1)。`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的命名实体识别助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 500,
temperature: 0.1
}
})
if (!response.success || !response.data?.choices?.[0]) {
return []
}
const entities = JSON.parse(response.data.choices[0].message.content)
return entities.map((entity: any, index: number) => ({
text: entity.text || '',
type: entity.type || 'other',
confidence: entity.confidence || 0.8,
startPosition: 0, // 简化处理
endPosition: entity.text?.length || 0
})) as EntityResult[]
} catch (error) {
console.error('实体识别失败:', error)
return []
}
}
/**
* 主题提取
*/
private async extractTopics(content: string): Promise<TopicResult[]> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return []
}
const prompt = `请分析以下文本的主要主题提取3-5个核心主题。
文本:${content.substring(0, 2000)}
请以JSON数组格式返回每个主题包含name(主题名称)、confidence(置信度0-1)、keywords(相关关键词数组)。`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的文本主题分析助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 400,
temperature: 0.2
}
})
if (!response.success || !response.data?.choices?.[0]) {
return []
}
const topics = JSON.parse(response.data.choices[0].message.content)
return topics.map((topic: any) => ({
name: topic.name || '',
confidence: topic.confidence || 0.8,
keywords: topic.keywords || []
})) as TopicResult[]
} catch (error) {
console.error('主题提取失败:', error)
return []
}
}
/**
* 可读性分析
*/
private async analyzeReadability(content: string): Promise<number> {
try {
// 简化的可读性计算
const sentences = content.split(/[.!?。!?]/).length
const words = content.split(/\s+/).length
const avgWordsPerSentence = words / sentences
// 基于平均句长计算可读性分数
let score = 1.0
if (avgWordsPerSentence > 30) score = 0.3
else if (avgWordsPerSentence > 20) score = 0.5
else if (avgWordsPerSentence > 15) score = 0.7
else if (avgWordsPerSentence > 10) score = 0.9
return score
} catch (error) {
console.error('可读性分析失败:', error)
return 0.5
}
}
/**
* 可信度分析
*/
private async analyzeCredibility(content: ContentInfo): Promise<number> {
try {
let score = 0.5 // 基础分数
// 来源可信度
if (content.sourceUrl) {
const domain = this.extractDomain(content.sourceUrl)
const credibleDomains = ['reuters.com', 'bbc.com', 'xinhuanet.com', 'nhk.or.jp']
if (credibleDomains.some(d => domain.includes(d))) {
score += 0.2
}
}
// 作者信息
if (content.author && content.author.length > 0) {
score += 0.1
}
// 内容长度和结构
if (content.content.length > 500) {
score += 0.1
}
// 时效性
const daysSincePublished = (Date.now() - content.publishedAt) / (1000 * 60 * 60 * 24)
if (daysSincePublished < 1) {
score += 0.1
}
return Math.min(1.0, score)
} catch (error) {
console.error('可信度分析失败:', error)
return 0.5
}
}
/**
* 生成摘要
*/
private async generateSummary(content: string): Promise<string> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return ''
}
if (content.length < 200) {
return content
}
const prompt = `请为以下文本生成一个简洁的摘要100字以内\n\n${content.substring(0, 2000)}`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的文本摘要助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 200,
temperature: 0.3
}
})
if (!response.success || !response.data?.choices?.[0]) {
return ''
}
return response.data.choices[0].message.content.trim()
} catch (error) {
console.error('摘要生成失败:', error)
return ''
}
}
/**
* 提取关键词
*/
private async extractKeywords(content: string): Promise<string[]> {
try {
const openaiConfig = this.config.openai
if (!openaiConfig) {
return []
}
const prompt = `请从以下文本中提取5-10个关键词\n\n${content.substring(0, 1500)}\n\n请以JSON数组格式返回关键词。`
const response = await this.req.post<any>({
url: `${openaiConfig.baseURL || 'https://api.openai.com'}/v1/chat/completions`,
headers: {
'Authorization': `Bearer ${openaiConfig.apiKey}`,
'Content-Type': 'application/json'
},
data: {
model: openaiConfig.model,
messages: [
{
role: 'system',
content: '你是一个专业的关键词提取助手。'
},
{
role: 'user',
content: prompt
}
],
max_tokens: 200,
temperature: 0.1
}
})
if (!response.success || !response.data?.choices?.[0]) {
return []
}
const keywords = JSON.parse(response.data.choices[0].message.content)
return Array.isArray(keywords) ? keywords : []
} catch (error) {
console.error('关键词提取失败:', error)
return []
}
}
/**
* 内容分类
*/
private async classifyContent(content: ContentInfo): Promise<CategoryResult[]> {
try {
// 预定义分类
const categories = [
{ id: 'politics', name: '政治', keywords: ['政治', '政府', '选举', '政策', 'politics', 'government'] },
{ id: 'technology', name: '科技', keywords: ['科技', '技术', '人工智能', 'AI', 'technology', 'tech'] },
{ id: 'business', name: '商业', keywords: ['商业', '经济', '金融', '市场', 'business', 'economy'] },
{ id: 'sports', name: '体育', keywords: ['体育', '运动', '比赛', '足球', 'sports', 'game'] },
{ id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', 'entertainment', 'movie'] },
{ id: 'health', name: '健康', keywords: ['健康', '医疗', '疾病', '医院', 'health', 'medical'] }
]
const text = `${content.title} ${content.content}`.toLowerCase()
const results: CategoryResult[] = []
for (const category of categories) {
let score = 0
for (const keyword of category.keywords) {
const matches = (text.match(new RegExp(keyword.toLowerCase(), 'g')) || []).length
score += matches
}
if (score > 0) {
results.push({
categoryId: category.id,
categoryName: category.name,
confidence: Math.min(1.0, score / 10),
level: 1
})
}
}
return results.sort((a, b) => b.confidence - a.confidence).slice(0, 3)
} catch (error) {
console.error('内容分类失败:', error)
return []
}
}
/**
* 提取关键短语
*/
private extractKeyPhrases(content: string): string[] {
try {
// 简单的关键短语提取
const sentences = content.split(/[.!?。!?]/)
const phrases: string[] = []
for (const sentence of sentences) {
const words = sentence.trim().split(/\s+/)
if (words.length >= 2 && words.length <= 5) {
phrases.push(sentence.trim())
}
}
return phrases.slice(0, 10)
} catch (error) {
console.error('关键短语提取失败:', error)
return []
}
}
/**
* 提取域名
*/
private extractDomain(url: string): string {
try {
const matches = url.match(/https?:\/\/([^\/]+)/)
return matches ? matches[1] : ''
} catch (error) {
return ''
}
}
/**
* 生成缓存键
*/
private generateContentCacheKey(contentId: string, options?: any): string {
const optionsStr = JSON.stringify(options || {})
return `content-${contentId}-${this.simpleHash(optionsStr)}`
}
/**
* 简单哈希函数
*/
private simpleHash(str: string): string {
let hash = 0
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i)
hash = ((hash << 5) - hash) + char
hash = hash & hash
}
return Math.abs(hash).toString(36)
}
}