Initial commit of akmon project
This commit is contained in:
806
uni_modules/ak-ai-news/services/AIContentAnalysisService.uts
Normal file
806
uni_modules/ak-ai-news/services/AIContentAnalysisService.uts
Normal file
@@ -0,0 +1,806 @@
|
||||
// AI Content Analysis Service - Content classification, sentiment analysis, and quality assessment
|
||||
|
||||
import {
|
||||
ContentAnalysisResult,
|
||||
EntityResult,
|
||||
TopicResult,
|
||||
CategoryResult,
|
||||
AIProvider,
|
||||
AIResponse,
|
||||
AIServiceConfig,
|
||||
BatchProcessingOptions,
|
||||
AIServiceError,
|
||||
ContentInfo
|
||||
} from '../types/ai-types.uts'
|
||||
|
||||
// 分析类型枚举
|
||||
type AnalysisType = 'sentiment' | 'entities' | 'topics' | 'categories' | 'readability' | 'credibility' | 'toxicity' | 'summary' | 'keywords'
|
||||
|
||||
// 分析选项
|
||||
type AnalysisOptions = {
|
||||
types: AnalysisType[]
|
||||
provider?: AIProvider
|
||||
model?: string
|
||||
includeScores?: boolean
|
||||
detailedResults?: boolean
|
||||
language?: string
|
||||
customCategories?: string[]
|
||||
}
|
||||
|
||||
// 内容质量评估结果
|
||||
type QualityAssessment = {
|
||||
overallScore: number
|
||||
factualAccuracy: number
|
||||
sourceReliability: number
|
||||
writingQuality: number
|
||||
objectivity: number
|
||||
completeness: number
|
||||
timeliness: number
|
||||
relevance: number
|
||||
}
|
||||
|
||||
// 关键词提取结果
|
||||
type KeywordResult = {
|
||||
keyword: string
|
||||
frequency: number
|
||||
importance: number
|
||||
type: 'noun' | 'verb' | 'adjective' | 'entity' | 'concept'
|
||||
}
|
||||
|
||||
// 分析统计
|
||||
type AnalysisStats = {
|
||||
totalAnalyses: number
|
||||
successCount: number
|
||||
errorCount: number
|
||||
avgProcessingTimeMs: number
|
||||
totalCost: number
|
||||
byProvider: Record<AIProvider, number>
|
||||
byAnalysisType: Record<AnalysisType, number>
|
||||
}
|
||||
|
||||
/**
|
||||
* AI内容分析服务类
|
||||
* 提供情感分析、实体识别、主题提取、内容分类、质量评估等功能
|
||||
*/
|
||||
export class AIContentAnalysisService {
|
||||
private config: AIServiceConfig
|
||||
private stats: AnalysisStats = {
|
||||
totalAnalyses: 0,
|
||||
successCount: 0,
|
||||
errorCount: 0,
|
||||
avgProcessingTimeMs: 0,
|
||||
totalCost: 0,
|
||||
byProvider: {} as Record<AIProvider, number>,
|
||||
byAnalysisType: {} as Record<AnalysisType, number>
|
||||
}
|
||||
|
||||
// 预定义的新闻分类
|
||||
private readonly NEWS_CATEGORIES = [
|
||||
{ id: 'politics', name: '政治', keywords: ['政府', '政策', '选举', '法律', '议会', '总统', '部长'] },
|
||||
{ id: 'economy', name: '经济', keywords: ['经济', '金融', '股市', '投资', '银行', '贸易', 'GDP'] },
|
||||
{ id: 'technology', name: '科技', keywords: ['科技', '人工智能', '互联网', '软件', '硬件', '创新', '数字化'] },
|
||||
{ id: 'sports', name: '体育', keywords: ['体育', '足球', '篮球', '奥运', '比赛', '运动员', '锦标赛'] },
|
||||
{ id: 'entertainment', name: '娱乐', keywords: ['娱乐', '电影', '音乐', '明星', '综艺', '演出', '艺术'] },
|
||||
{ id: 'health', name: '健康', keywords: ['健康', '医疗', '病毒', '疫苗', '医院', '药物', '疾病'] },
|
||||
{ id: 'education', name: '教育', keywords: ['教育', '学校', '大学', '学生', '教师', '考试', '学习'] },
|
||||
{ id: 'environment', name: '环境', keywords: ['环境', '气候', '污染', '环保', '生态', '绿色', '可持续'] },
|
||||
{ id: 'international', name: '国际', keywords: ['国际', '外交', '战争', '和平', '联合国', '条约', '全球'] },
|
||||
{ id: 'social', name: '社会', keywords: ['社会', '社区', '公益', '慈善', '志愿者', '文化', '传统'] }
|
||||
]
|
||||
|
||||
constructor(config: AIServiceConfig) {
|
||||
this.config = config
|
||||
this.initializeStats()
|
||||
}
|
||||
|
||||
/**
|
||||
* 分析内容
|
||||
* @param content 内容文本
|
||||
* @param options 分析选项
|
||||
*/
|
||||
async analyzeContent(
|
||||
content: string,
|
||||
options: AnalysisOptions = {
|
||||
types: ['sentiment', 'entities', 'topics', 'categories', 'readability', 'summary', 'keywords']
|
||||
}
|
||||
): Promise<AIResponse<ContentAnalysisResult>> {
|
||||
try {
|
||||
this.stats.totalAnalyses++
|
||||
const startTime = Date.now()
|
||||
|
||||
// 选择提供商
|
||||
const provider = options.provider || this.selectBestProvider()
|
||||
|
||||
// 执行各种分析
|
||||
const results = await Promise.allSettled([
|
||||
this.analyzeSentiment(content, provider, options),
|
||||
this.extractEntities(content, provider, options),
|
||||
this.extractTopics(content, provider, options),
|
||||
this.classifyContent(content, options),
|
||||
this.assessReadability(content, options.language),
|
||||
this.assessCredibility(content),
|
||||
this.assessToxicity(content, provider),
|
||||
this.generateSummary(content, provider, options),
|
||||
this.extractKeywords(content, options)
|
||||
])
|
||||
|
||||
const processingTime = Date.now() - startTime
|
||||
|
||||
// 合并结果
|
||||
const analysisResult: ContentAnalysisResult = {
|
||||
contentId: this.generateContentId(content),
|
||||
sentimentScore: this.extractResult(results[0], 0),
|
||||
sentimentLabel: this.getSentimentLabel(this.extractResult(results[0], 0)),
|
||||
readabilityScore: this.extractResult(results[4], 0.5),
|
||||
credibilityScore: this.extractResult(results[5], 0.5),
|
||||
toxicityScore: this.extractResult(results[6], 0),
|
||||
keywords: this.extractResult(results[8], []),
|
||||
entities: this.extractResult(results[1], []),
|
||||
topics: this.extractResult(results[2], []),
|
||||
categories: this.extractResult(results[3], []),
|
||||
summary: this.extractResult(results[7], ''),
|
||||
keyPhrases: this.extractKeyPhrases(content),
|
||||
language: options.language || await this.detectLanguage(content),
|
||||
processingTimeMs: processingTime,
|
||||
provider
|
||||
}
|
||||
|
||||
// 更新统计
|
||||
this.updateStats(provider, options.types, processingTime)
|
||||
this.stats.successCount++
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: analysisResult,
|
||||
processingTimeMs: processingTime,
|
||||
provider
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
this.stats.errorCount++
|
||||
const aiError: AIServiceError = {
|
||||
code: 'ANALYSIS_ERROR',
|
||||
message: error.message || 'Content analysis failed',
|
||||
provider: options.provider,
|
||||
retryable: this.isRetryableError(error)
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: aiError.message,
|
||||
errorCode: aiError.code
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量内容分析
|
||||
* @param contents 内容数组
|
||||
* @param options 分析选项
|
||||
* @param batchOptions 批处理选项
|
||||
*/
|
||||
async analyzeContentBatch(
|
||||
contents: string[],
|
||||
options: AnalysisOptions = { types: ['sentiment', 'categories', 'summary'] },
|
||||
batchOptions: BatchProcessingOptions = {
|
||||
batchSize: 5,
|
||||
concurrency: 2,
|
||||
retryCount: 2,
|
||||
delayMs: 1000
|
||||
}
|
||||
): Promise<AIResponse<ContentAnalysisResult[]>> {
|
||||
try {
|
||||
const results: ContentAnalysisResult[] = []
|
||||
const batches = this.createBatches(contents, batchOptions.batchSize)
|
||||
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
const batch = batches[i]
|
||||
const batchPromises = batch.map(async (content) => {
|
||||
try {
|
||||
const response = await this.analyzeContent(content, options)
|
||||
if (response.success && response.data) {
|
||||
return response.data
|
||||
}
|
||||
throw new Error(response.error || 'Analysis failed')
|
||||
} catch (error) {
|
||||
if (batchOptions.onError) {
|
||||
batchOptions.onError(error, content)
|
||||
}
|
||||
throw error
|
||||
}
|
||||
})
|
||||
|
||||
const batchResults = await Promise.allSettled(batchPromises)
|
||||
|
||||
for (const result of batchResults) {
|
||||
if (result.status === 'fulfilled') {
|
||||
results.push(result.value)
|
||||
}
|
||||
}
|
||||
|
||||
// 进度回调
|
||||
if (batchOptions.onProgress) {
|
||||
batchOptions.onProgress(results.length, contents.length)
|
||||
}
|
||||
|
||||
// 批次间延迟
|
||||
if (i < batches.length - 1 && batchOptions.delayMs > 0) {
|
||||
await this.delay(batchOptions.delayMs)
|
||||
}
|
||||
}
|
||||
|
||||
return { success: true, data: results }
|
||||
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
error: error.message || 'Batch analysis failed'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 质量评估
|
||||
* @param content 内容文本
|
||||
* @param metadata 内容元数据
|
||||
*/
|
||||
async assessQuality(content: string, metadata?: Partial<ContentInfo>): Promise<AIResponse<QualityAssessment>> {
|
||||
try {
|
||||
const [
|
||||
factualScore,
|
||||
sourceScore,
|
||||
writingScore,
|
||||
objectivityScore,
|
||||
completenessScore,
|
||||
timelinessScore,
|
||||
relevanceScore
|
||||
] = await Promise.all([
|
||||
this.assessFactualAccuracy(content),
|
||||
this.assessSourceReliability(metadata?.sourceUrl || ''),
|
||||
this.assessWritingQuality(content),
|
||||
this.assessObjectivity(content),
|
||||
this.assessCompleteness(content),
|
||||
this.assessTimeliness(metadata?.publishedAt || Date.now()),
|
||||
this.assessRelevance(content, metadata?.categoryId)
|
||||
])
|
||||
|
||||
const overallScore = (
|
||||
factualScore + sourceScore + writingScore + objectivityScore +
|
||||
completenessScore + timelinessScore + relevanceScore
|
||||
) / 7
|
||||
|
||||
const assessment: QualityAssessment = {
|
||||
overallScore,
|
||||
factualAccuracy: factualScore,
|
||||
sourceReliability: sourceScore,
|
||||
writingQuality: writingScore,
|
||||
objectivity: objectivityScore,
|
||||
completeness: completenessScore,
|
||||
timeliness: timelinessScore,
|
||||
relevance: relevanceScore
|
||||
}
|
||||
|
||||
return { success: true, data: assessment }
|
||||
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
error: error.message || 'Quality assessment failed'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取统计信息
|
||||
*/
|
||||
getStatistics(): AnalysisStats {
|
||||
return { ...this.stats }
|
||||
}
|
||||
|
||||
// Private methods
|
||||
|
||||
private async analyzeSentiment(content: string, provider: AIProvider, options: AnalysisOptions): Promise<number> {
|
||||
if (!options.types.includes('sentiment')) return 0
|
||||
|
||||
switch (provider) {
|
||||
case 'openai':
|
||||
return await this.analyzeSentimentWithOpenAI(content)
|
||||
case 'google':
|
||||
return await this.analyzeSentimentWithGoogle(content)
|
||||
case 'baidu':
|
||||
return await this.analyzeSentimentWithBaidu(content)
|
||||
default:
|
||||
return this.analyzeSentimentBasic(content)
|
||||
}
|
||||
}
|
||||
|
||||
private async extractEntities(content: string, provider: AIProvider, options: AnalysisOptions): Promise<EntityResult[]> {
|
||||
if (!options.types.includes('entities')) return []
|
||||
|
||||
switch (provider) {
|
||||
case 'openai':
|
||||
return await this.extractEntitiesWithOpenAI(content)
|
||||
case 'google':
|
||||
return await this.extractEntitiesWithGoogle(content)
|
||||
default:
|
||||
return this.extractEntitiesBasic(content)
|
||||
}
|
||||
}
|
||||
|
||||
private async extractTopics(content: string, provider: AIProvider, options: AnalysisOptions): Promise<TopicResult[]> {
|
||||
if (!options.types.includes('topics')) return []
|
||||
|
||||
switch (provider) {
|
||||
case 'openai':
|
||||
return await this.extractTopicsWithOpenAI(content)
|
||||
default:
|
||||
return this.extractTopicsBasic(content)
|
||||
}
|
||||
}
|
||||
|
||||
private async classifyContent(content: string, options: AnalysisOptions): Promise<CategoryResult[]> {
|
||||
if (!options.types.includes('categories')) return []
|
||||
|
||||
const categories: CategoryResult[] = []
|
||||
|
||||
// 基于关键词的分类
|
||||
for (const category of this.NEWS_CATEGORIES) {
|
||||
const matches = category.keywords.filter(keyword =>
|
||||
content.toLowerCase().includes(keyword.toLowerCase())
|
||||
)
|
||||
|
||||
if (matches.length > 0) {
|
||||
const confidence = Math.min(matches.length / category.keywords.length, 1)
|
||||
categories.push({
|
||||
categoryId: category.id,
|
||||
categoryName: category.name,
|
||||
confidence,
|
||||
level: 1
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// 按置信度排序
|
||||
return categories.sort((a, b) => b.confidence - a.confidence).slice(0, 3)
|
||||
}
|
||||
|
||||
private assessReadability(content: string, language?: string): number {
|
||||
// 简化的可读性评估
|
||||
const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0)
|
||||
const words = content.split(/\s+/).filter(w => w.length > 0)
|
||||
const characters = content.replace(/\s/g, '').length
|
||||
|
||||
if (sentences.length === 0 || words.length === 0) return 0
|
||||
|
||||
const avgWordsPerSentence = words.length / sentences.length
|
||||
const avgCharsPerWord = characters / words.length
|
||||
|
||||
// 基于句子长度和词汇复杂度的评分
|
||||
let score = 1.0
|
||||
|
||||
// 句子长度惩罚
|
||||
if (avgWordsPerSentence > 20) score -= 0.2
|
||||
if (avgWordsPerSentence > 30) score -= 0.3
|
||||
|
||||
// 词汇复杂度惩罚
|
||||
if (avgCharsPerWord > 6) score -= 0.1
|
||||
if (avgCharsPerWord > 8) score -= 0.2
|
||||
|
||||
return Math.max(0, Math.min(1, score))
|
||||
}
|
||||
|
||||
private assessCredibility(content: string): number {
|
||||
let score = 0.5 // 基础分
|
||||
|
||||
// 包含引用或来源
|
||||
if (content.includes('据') || content.includes('根据') || content.includes('来源')) {
|
||||
score += 0.2
|
||||
}
|
||||
|
||||
// 包含具体数据
|
||||
if (/\d+%|\d+万|\d+亿|\d{4}年/.test(content)) {
|
||||
score += 0.15
|
||||
}
|
||||
|
||||
// 避免极端词汇
|
||||
const extremeWords = ['绝对', '必然', '完全', '永远', '从来', '所有']
|
||||
const extremeCount = extremeWords.filter(word => content.includes(word)).length
|
||||
score -= extremeCount * 0.05
|
||||
|
||||
// 避免情绪化表达
|
||||
const emotionalWords = ['震惊', '愤怒', '可怕', '惊人', '令人发指']
|
||||
const emotionalCount = emotionalWords.filter(word => content.includes(word)).length
|
||||
score -= emotionalCount * 0.03
|
||||
|
||||
return Math.max(0, Math.min(1, score))
|
||||
}
|
||||
|
||||
private async assessToxicity(content: string, provider: AIProvider): Promise<number> {
|
||||
// 基础毒性检测
|
||||
const toxicWords = ['仇恨', '歧视', '暴力', '威胁', '诽谤', '侮辱']
|
||||
const toxicCount = toxicWords.filter(word => content.includes(word)).length
|
||||
|
||||
return Math.min(toxicCount / 10, 1)
|
||||
}
|
||||
|
||||
private async generateSummary(content: string, provider: AIProvider, options: AnalysisOptions): Promise<string> {
|
||||
if (!options.types.includes('summary')) return ''
|
||||
|
||||
// 简单的摘要生成:提取前两句
|
||||
const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 10)
|
||||
return sentences.slice(0, 2).join('。') + (sentences.length > 2 ? '。' : '')
|
||||
}
|
||||
|
||||
private extractKeywords(content: string, options: AnalysisOptions): string[] {
|
||||
if (!options.types.includes('keywords')) return []
|
||||
|
||||
// 简单的关键词提取
|
||||
const words = content
|
||||
.replace(/[^\u4e00-\u9fa5\w\s]/g, '') // 保留中文、英文和空格
|
||||
.split(/\s+/)
|
||||
.filter(word => word.length > 1)
|
||||
|
||||
// 统计词频
|
||||
const wordCount: Record<string, number> = {}
|
||||
words.forEach(word => {
|
||||
const lower = word.toLowerCase()
|
||||
wordCount[lower] = (wordCount[lower] || 0) + 1
|
||||
})
|
||||
|
||||
// 按频率排序并返回前10个
|
||||
return Object.entries(wordCount)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.slice(0, 10)
|
||||
.map(([word]) => word)
|
||||
}
|
||||
|
||||
private extractKeyPhrases(content: string): string[] {
|
||||
// 提取2-3个词的短语
|
||||
const phrases: string[] = []
|
||||
const words = content.split(/\s+/)
|
||||
|
||||
for (let i = 0; i < words.length - 1; i++) {
|
||||
const twoWordPhrase = words.slice(i, i + 2).join(' ')
|
||||
if (twoWordPhrase.length > 4) {
|
||||
phrases.push(twoWordPhrase)
|
||||
}
|
||||
|
||||
if (i < words.length - 2) {
|
||||
const threeWordPhrase = words.slice(i, i + 3).join(' ')
|
||||
if (threeWordPhrase.length > 6) {
|
||||
phrases.push(threeWordPhrase)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 去重并返回前5个
|
||||
return [...new Set(phrases)].slice(0, 5)
|
||||
}
|
||||
|
||||
private async detectLanguage(content: string): Promise<string> {
|
||||
// 基础语言检测
|
||||
const chineseRegex = /[\u4e00-\u9fff]/
|
||||
const englishRegex = /[a-zA-Z]/
|
||||
|
||||
const chineseMatches = content.match(chineseRegex)?.length || 0
|
||||
const englishMatches = content.match(englishRegex)?.length || 0
|
||||
|
||||
if (chineseMatches > englishMatches) return 'zh-CN'
|
||||
if (englishMatches > chineseMatches) return 'en'
|
||||
return 'auto'
|
||||
}
|
||||
|
||||
private getSentimentLabel(score: number): 'positive' | 'negative' | 'neutral' {
|
||||
if (score > 0.1) return 'positive'
|
||||
if (score < -0.1) return 'negative'
|
||||
return 'neutral'
|
||||
}
|
||||
|
||||
private generateContentId(content: string): string {
|
||||
// 简单的内容ID生成
|
||||
return `content_${Date.now()}_${content.substring(0, 10).replace(/\s/g, '_')}`
|
||||
}
|
||||
|
||||
private extractResult<T>(result: PromiseSettledResult<T>, defaultValue: T): T {
|
||||
return result.status === 'fulfilled' ? result.value : defaultValue
|
||||
}
|
||||
|
||||
private selectBestProvider(): AIProvider {
|
||||
if (this.config.openai?.apiKey) return 'openai'
|
||||
if (this.config.google?.apiKey) return 'google'
|
||||
if (this.config.baidu?.apiKey) return 'baidu'
|
||||
return 'openai'
|
||||
}
|
||||
|
||||
private createBatches<T>(items: T[], batchSize: number): T[][] {
|
||||
const batches: T[][] = []
|
||||
for (let i = 0; i < items.length; i += batchSize) {
|
||||
batches.push(items.slice(i, i + batchSize))
|
||||
}
|
||||
return batches
|
||||
}
|
||||
|
||||
private async delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
private initializeStats(): void {
|
||||
const providers: AIProvider[] = ['openai', 'google', 'baidu', 'custom']
|
||||
const analysisTypes: AnalysisType[] = ['sentiment', 'entities', 'topics', 'categories', 'readability', 'credibility', 'toxicity', 'summary', 'keywords']
|
||||
|
||||
providers.forEach(provider => {
|
||||
this.stats.byProvider[provider] = 0
|
||||
})
|
||||
|
||||
analysisTypes.forEach(type => {
|
||||
this.stats.byAnalysisType[type] = 0
|
||||
})
|
||||
}
|
||||
|
||||
private updateStats(provider: AIProvider, types: AnalysisType[], processingTime: number): void {
|
||||
this.stats.byProvider[provider]++
|
||||
types.forEach(type => {
|
||||
this.stats.byAnalysisType[type]++
|
||||
})
|
||||
|
||||
this.stats.avgProcessingTimeMs = (this.stats.avgProcessingTimeMs * (this.stats.totalAnalyses - 1) + processingTime) / this.stats.totalAnalyses
|
||||
}
|
||||
|
||||
private isRetryableError(error: any): boolean {
|
||||
const retryableCodes = ['TIMEOUT', 'RATE_LIMIT', 'SERVER_ERROR']
|
||||
return retryableCodes.includes(error.code) || error.status >= 500
|
||||
}
|
||||
|
||||
// Quality assessment methods
|
||||
private async assessFactualAccuracy(content: string): Promise<number> {
|
||||
// 检查是否包含可验证的事实
|
||||
let score = 0.5
|
||||
|
||||
// 包含日期
|
||||
if (/\d{4}年|\d{1,2}月|\d{1,2}日/.test(content)) score += 0.1
|
||||
|
||||
// 包含具体数字
|
||||
if (/\d+\.?\d*%|\d+万|\d+亿|\d+千/.test(content)) score += 0.1
|
||||
|
||||
// 包含地点
|
||||
if (/市|省|县|区|国|州/.test(content)) score += 0.1
|
||||
|
||||
// 包含人名或机构名
|
||||
if (/先生|女士|部长|主席|公司|集团|大学|医院/.test(content)) score += 0.1
|
||||
|
||||
return Math.min(1, score)
|
||||
}
|
||||
|
||||
private async assessSourceReliability(sourceUrl: string): Promise<number> {
|
||||
if (!sourceUrl) return 0.3
|
||||
|
||||
// 简单的源可靠性评估
|
||||
const reliableDomains = ['gov.cn', 'edu.cn', 'xinhuanet.com', 'people.com.cn', 'cctv.com']
|
||||
const domain = sourceUrl.toLowerCase()
|
||||
|
||||
for (const reliableDomain of reliableDomains) {
|
||||
if (domain.includes(reliableDomain)) return 0.9
|
||||
}
|
||||
|
||||
if (domain.includes('.gov') || domain.includes('.edu')) return 0.8
|
||||
if (domain.includes('news') || domain.includes('media')) return 0.6
|
||||
|
||||
return 0.4
|
||||
}
|
||||
|
||||
private async assessWritingQuality(content: string): Promise<number> {
|
||||
let score = 0.5
|
||||
|
||||
// 检查语法和结构
|
||||
const sentences = content.split(/[.!?]+/).filter(s => s.trim().length > 0)
|
||||
if (sentences.length > 2) score += 0.1
|
||||
|
||||
// 检查段落结构
|
||||
const paragraphs = content.split('\n\n').filter(p => p.trim().length > 0)
|
||||
if (paragraphs.length > 1) score += 0.1
|
||||
|
||||
// 检查词汇丰富度
|
||||
const words = content.split(/\s+/).filter(w => w.length > 0)
|
||||
const uniqueWords = new Set(words.map(w => w.toLowerCase()))
|
||||
const diversity = uniqueWords.size / words.length
|
||||
score += diversity * 0.3
|
||||
|
||||
return Math.min(1, score)
|
||||
}
|
||||
|
||||
private async assessObjectivity(content: string): Promise<number> {
|
||||
let score = 0.7 // 基础客观性分数
|
||||
|
||||
// 主观词汇惩罚
|
||||
const subjectiveWords = ['我认为', '个人觉得', '显然', '明显', '无疑', '肯定']
|
||||
const subjectiveCount = subjectiveWords.filter(word => content.includes(word)).length
|
||||
score -= subjectiveCount * 0.1
|
||||
|
||||
// 情感词汇惩罚
|
||||
const emotionalWords = ['愤怒', '激动', '兴奋', '失望', '震惊', '惊喜']
|
||||
const emotionalCount = emotionalWords.filter(word => content.includes(word)).length
|
||||
score -= emotionalCount * 0.05
|
||||
|
||||
return Math.max(0, Math.min(1, score))
|
||||
}
|
||||
|
||||
private async assessCompleteness(content: string): Promise<number> {
|
||||
let score = 0.3
|
||||
|
||||
// 基于内容长度
|
||||
if (content.length > 200) score += 0.2
|
||||
if (content.length > 500) score += 0.2
|
||||
if (content.length > 1000) score += 0.2
|
||||
|
||||
// 包含关键新闻要素(5W1H)
|
||||
const hasWho = /人|者|员|家|国|公司|组织/.test(content)
|
||||
const hasWhat = /事件|活动|发生|进行|宣布|决定/.test(content)
|
||||
const hasWhen = /\d{4}年|\d{1,2}月|\d{1,2}日|今天|昨天|明天/.test(content)
|
||||
const hasWhere = /市|省|县|区|国|地区|地点/.test(content)
|
||||
const hasWhy = /因为|由于|原因|目的|为了/.test(content)
|
||||
|
||||
const elements = [hasWho, hasWhat, hasWhen, hasWhere, hasWhy].filter(Boolean).length
|
||||
score += elements * 0.06
|
||||
|
||||
return Math.min(1, score)
|
||||
}
|
||||
|
||||
private async assessTimeliness(publishedAt: number): Promise<number> {
|
||||
const now = Date.now()
|
||||
const ageHours = (now - publishedAt) / (1000 * 60 * 60)
|
||||
|
||||
// 新闻越新,时效性越高
|
||||
if (ageHours < 1) return 1.0
|
||||
if (ageHours < 6) return 0.9
|
||||
if (ageHours < 24) return 0.7
|
||||
if (ageHours < 72) return 0.5
|
||||
if (ageHours < 168) return 0.3
|
||||
return 0.1
|
||||
}
|
||||
|
||||
private async assessRelevance(content: string, categoryId?: string): Promise<number> {
|
||||
if (!categoryId) return 0.5
|
||||
|
||||
// 根据分类检查相关性
|
||||
const category = this.NEWS_CATEGORIES.find(c => c.id === categoryId)
|
||||
if (!category) return 0.5
|
||||
|
||||
const matches = category.keywords.filter(keyword =>
|
||||
content.toLowerCase().includes(keyword.toLowerCase())
|
||||
).length
|
||||
|
||||
return Math.min(1, matches / category.keywords.length + 0.3)
|
||||
}
|
||||
|
||||
// Mock AI service methods
|
||||
private async analyzeSentimentWithOpenAI(content: string): Promise<number> {
|
||||
// 模拟OpenAI情感分析
|
||||
await this.delay(Math.random() * 500 + 200)
|
||||
|
||||
// 简单的情感检测
|
||||
const positiveWords = ['好', '棒', '优秀', '成功', '胜利', '喜悦', '高兴', '满意']
|
||||
const negativeWords = ['坏', '糟糕', '失败', '问题', '困难', '悲伤', '愤怒', '失望']
|
||||
|
||||
const positiveCount = positiveWords.filter(word => content.includes(word)).length
|
||||
const negativeCount = negativeWords.filter(word => content.includes(word)).length
|
||||
|
||||
const score = (positiveCount - negativeCount) / Math.max(positiveCount + negativeCount, 1)
|
||||
return Math.max(-1, Math.min(1, score))
|
||||
}
|
||||
|
||||
private async analyzeSentimentWithGoogle(content: string): Promise<number> {
|
||||
await this.delay(Math.random() * 400 + 150)
|
||||
return Math.random() * 2 - 1 // -1 to 1
|
||||
}
|
||||
|
||||
private async analyzeSentimentWithBaidu(content: string): Promise<number> {
|
||||
await this.delay(Math.random() * 300 + 100)
|
||||
return Math.random() * 2 - 1
|
||||
}
|
||||
|
||||
private analyzeSentimentBasic(content: string): number {
|
||||
// 基础情感分析
|
||||
const positiveWords = ['好', '棒', '优秀', '成功', '胜利', 'great', 'good', 'excellent']
|
||||
const negativeWords = ['坏', '糟糕', '失败', '问题', 'bad', 'terrible', 'awful']
|
||||
|
||||
const positiveCount = positiveWords.filter(word => content.toLowerCase().includes(word)).length
|
||||
const negativeCount = negativeWords.filter(word => content.toLowerCase().includes(word)).length
|
||||
|
||||
return (positiveCount - negativeCount) / Math.max(positiveCount + negativeCount, 1)
|
||||
}
|
||||
|
||||
private async extractEntitiesWithOpenAI(content: string): Promise<EntityResult[]> {
|
||||
await this.delay(Math.random() * 600 + 300)
|
||||
|
||||
// 模拟实体提取
|
||||
const entities: EntityResult[] = []
|
||||
const patterns = [
|
||||
{ regex: /[\u4e00-\u9fa5]{2,4}(公司|集团|企业|机构)/g, type: 'organization' as const },
|
||||
{ regex: /[\u4e00-\u9fa5]{2,3}(市|省|县|区)/g, type: 'location' as const },
|
||||
{ regex: /[\u4e00-\u9fa5]{2,4}(先生|女士|部长|主席|总裁|经理)/g, type: 'person' as const },
|
||||
{ regex: /\d{4}年\d{1,2}月\d{1,2}日/g, type: 'date' as const },
|
||||
{ regex: /\d+\.?\d*(万|亿|千)?(元|美元|英镑)/g, type: 'money' as const }
|
||||
]
|
||||
|
||||
patterns.forEach(pattern => {
|
||||
const matches = content.matchAll(pattern.regex)
|
||||
for (const match of matches) {
|
||||
entities.push({
|
||||
text: match[0],
|
||||
type: pattern.type,
|
||||
confidence: 0.8 + Math.random() * 0.2,
|
||||
startPosition: match.index || 0,
|
||||
endPosition: (match.index || 0) + match[0].length
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return entities.slice(0, 10)
|
||||
}
|
||||
|
||||
private async extractEntitiesWithGoogle(content: string): Promise<EntityResult[]> {
|
||||
await this.delay(Math.random() * 500 + 250)
|
||||
return this.extractEntitiesBasic(content)
|
||||
}
|
||||
|
||||
private extractEntitiesBasic(content: string): EntityResult[] {
|
||||
// 基础实体提取
|
||||
const entities: EntityResult[] = []
|
||||
|
||||
// 提取组织
|
||||
const orgMatches = content.matchAll(/[\u4e00-\u9fa5]{2,4}(公司|集团)/g)
|
||||
for (const match of orgMatches) {
|
||||
entities.push({
|
||||
text: match[0],
|
||||
type: 'organization',
|
||||
confidence: 0.7,
|
||||
startPosition: match.index || 0,
|
||||
endPosition: (match.index || 0) + match[0].length
|
||||
})
|
||||
}
|
||||
|
||||
return entities
|
||||
}
|
||||
|
||||
private async extractTopicsWithOpenAI(content: string): Promise<TopicResult[]> {
|
||||
await this.delay(Math.random() * 400 + 200)
|
||||
|
||||
// 基于关键词聚类的主题提取
|
||||
const topics: TopicResult[] = []
|
||||
|
||||
for (const category of this.NEWS_CATEGORIES.slice(0, 3)) {
|
||||
const matches = category.keywords.filter(keyword =>
|
||||
content.toLowerCase().includes(keyword.toLowerCase())
|
||||
)
|
||||
|
||||
if (matches.length > 0) {
|
||||
topics.push({
|
||||
name: category.name,
|
||||
confidence: matches.length / category.keywords.length,
|
||||
keywords: matches
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return topics.sort((a, b) => b.confidence - a.confidence)
|
||||
}
|
||||
|
||||
private extractTopicsBasic(content: string): TopicResult[] {
|
||||
// 基础主题提取
|
||||
const topics: TopicResult[] = []
|
||||
|
||||
// 检查科技相关关键词
|
||||
const techKeywords = ['科技', '技术', '互联网', 'AI', '人工智能']
|
||||
const techMatches = techKeywords.filter(keyword => content.includes(keyword))
|
||||
|
||||
if (techMatches.length > 0) {
|
||||
topics.push({
|
||||
name: '科技',
|
||||
confidence: techMatches.length / techKeywords.length,
|
||||
keywords: techMatches
|
||||
})
|
||||
}
|
||||
|
||||
return topics
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user