Files
akmon/uni_modules/ak-ai-news/services/AITranslationService.uts
2026-01-20 08:04:15 +08:00

702 lines
20 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// AI Translation Service - Multi-provider translation implementation
import {
TranslationResult,
TranslationOptions,
AIProvider,
AIResponse,
AIServiceConfig,
BatchProcessingOptions,
CacheOptions,
AIServiceError
} from '../types/ai-types.uts'
// 翻译缓存条目
type TranslationCacheEntry = {
key: string
result: TranslationResult
createdAt: number
ttl: number
}
// 翻译统计
type TranslationStats = {
totalRequests: number
successCount: number
errorCount: number
totalTokens: number
totalCost: number
avgQuality: number
cacheHitRate: number
}
/**
* AI翻译服务类
* 支持多种AI提供商的翻译服务包括缓存、批处理、质量评估等功能
*/
export class AITranslationService {
private config: AIServiceConfig
private cache: Map<string, TranslationCacheEntry> = new Map()
private cacheOptions: CacheOptions
private stats: TranslationStats = {
totalRequests: 0,
successCount: 0,
errorCount: 0,
totalTokens: 0,
totalCost: 0,
avgQuality: 0,
cacheHitRate: 0
}
constructor(config: AIServiceConfig, cacheOptions: CacheOptions = {
enabled: true,
ttlHours: 24,
maxSize: 10000,
strategy: 'lru'
}) {
this.config = config
this.cacheOptions = cacheOptions
this.initializeCache()
}
/**
* 翻译文本
* @param text 原文本
* @param targetLang 目标语言
* @param sourceLang 源语言(可选,自动检测)
* @param options 翻译选项
*/
async translateText(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<AIResponse<TranslationResult>> {
try {
this.stats.totalRequests++
// 检查缓存
const cacheKey = this.generateCacheKey(text, targetLang, sourceLang, options)
const cached = this.getFromCache(cacheKey)
if (cached) {
return { success: true, data: cached }
}
// 选择提供商
const provider = options.provider || this.selectBestProvider()
// 执行翻译
let result: TranslationResult
switch (provider) {
case 'openai':
result = await this.translateWithOpenAI(text, targetLang, sourceLang, options)
break
case 'google':
result = await this.translateWithGoogle(text, targetLang, sourceLang, options)
break
case 'baidu':
result = await this.translateWithBaidu(text, targetLang, sourceLang, options)
break
default:
throw new Error(`Unsupported translation provider: ${provider}`)
}
// 质量检查
if (result.qualityScore < (this.config.qualityThresholds?.translation || 0.7)) {
console.warn(`Translation quality below threshold: ${result.qualityScore}`)
}
// 缓存结果
this.addToCache(cacheKey, result)
// 更新统计
this.updateStats(result)
return {
success: true,
data: result,
tokensUsed: result.tokensUsed,
processingTimeMs: result.processingTimeMs,
costUSD: result.costUSD,
provider: result.provider
}
} catch (error) {
this.stats.errorCount++
const aiError: AIServiceError = {
code: 'TRANSLATION_ERROR',
message: error.message || 'Translation failed',
provider: options.provider,
retryable: this.isRetryableError(error)
}
return {
success: false,
error: aiError.message,
errorCode: aiError.code
}
}
}
/**
* 批量翻译
* @param texts 文本数组
* @param targetLang 目标语言
* @param sourceLang 源语言
* @param options 翻译选项
* @param batchOptions 批处理选项
*/
async translateBatch(
texts: string[],
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {},
batchOptions: BatchProcessingOptions = {
batchSize: 10,
concurrency: 3,
retryCount: 2,
delayMs: 1000
}
): Promise<AIResponse<TranslationResult[]>> {
try {
const results: TranslationResult[] = []
const batches = this.createBatches(texts, batchOptions.batchSize)
for (let i = 0; i < batches.length; i++) {
const batch = batches[i]
const batchPromises = batch.map(async (text, index) => {
try {
const response = await this.translateText(text, targetLang, sourceLang, options)
if (response.success && response.data) {
return response.data
}
throw new Error(response.error || 'Translation failed')
} catch (error) {
if (batchOptions.onError) {
batchOptions.onError(error, text)
}
throw error
}
})
const batchResults = await Promise.allSettled(batchPromises)
for (const result of batchResults) {
if (result.status === 'fulfilled') {
results.push(result.value)
}
}
// 进度回调
if (batchOptions.onProgress) {
batchOptions.onProgress(results.length, texts.length)
}
// 批次间延迟
if (i < batches.length - 1 && batchOptions.delayMs > 0) {
await this.delay(batchOptions.delayMs)
}
}
return { success: true, data: results }
} catch (error) {
return {
success: false,
error: error.message || 'Batch translation failed'
}
}
}
/**
* 检测语言
* @param text 文本
*/
async detectLanguage(text: string): Promise<AIResponse<string>> {
try {
// 使用正则表达式和字符集进行基础检测
const basicDetection = this.basicLanguageDetection(text)
if (basicDetection.confidence > 0.8) {
return { success: true, data: basicDetection.language }
}
// 使用AI提供商进行检测
const provider = this.selectBestProvider()
let detectedLang: string
switch (provider) {
case 'google':
detectedLang = await this.detectLanguageWithGoogle(text)
break
case 'baidu':
detectedLang = await this.detectLanguageWithBaidu(text)
break
default:
detectedLang = basicDetection.language
}
return { success: true, data: detectedLang, provider }
} catch (error) {
return {
success: false,
error: error.message || 'Language detection failed'
}
}
}
/**
* 获取支持的语言列表
*/
getSupportedLanguages(): string[] {
return [
'zh-CN', 'zh-TW', 'en', 'ja', 'ko', 'es', 'fr', 'de', 'it', 'pt',
'ru', 'ar', 'hi', 'th', 'vi', 'id', 'ms', 'tl', 'tr', 'nl'
]
}
/**
* 获取翻译统计
*/
getStatistics(): TranslationStats {
this.updateCacheHitRate()
return { ...this.stats }
}
/**
* 清理缓存
*/
clearCache(): void {
this.cache.clear()
}
// Private methods
private async translateWithOpenAI(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<TranslationResult> {
const startTime = Date.now()
const systemPrompt = this.buildOpenAISystemPrompt(targetLang, sourceLang, options)
const userPrompt = `请翻译以下文本到${this.getLanguageName(targetLang)}\n\n${text}`
const requestBody = {
model: options.model || this.config.openai?.model || 'gpt-3.5-turbo',
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt }
],
temperature: options.temperature || this.config.openai?.temperature || 0.3,
max_tokens: options.maxTokens || this.config.openai?.maxTokens || 2000
}
// 模拟API调用实际项目中替换为真实的HTTP请求
const response = await this.mockOpenAIRequest(requestBody)
const processingTime = Date.now() - startTime
const tokensUsed = response.usage.total_tokens
const cost = this.calculateOpenAICost(tokensUsed, requestBody.model)
return {
translatedText: response.choices[0].message.content.trim(),
originalText: text,
sourceLang: sourceLang || 'auto',
targetLang,
confidence: 0.95,
qualityScore: this.evaluateTranslationQuality(text, response.choices[0].message.content),
provider: 'openai',
tokensUsed,
processingTimeMs: processingTime,
costUSD: cost
}
}
private async translateWithGoogle(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<TranslationResult> {
const startTime = Date.now()
// 模拟Google Translate API调用
const response = await this.mockGoogleRequest({
q: text,
target: this.convertToGoogleLangCode(targetLang),
source: sourceLang ? this.convertToGoogleLangCode(sourceLang) : undefined,
format: 'text'
})
const processingTime = Date.now() - startTime
const tokensUsed = Math.ceil(text.length / 4) // 估算
const cost = this.calculateGoogleCost(text.length)
return {
translatedText: response.data.translations[0].translatedText,
originalText: text,
sourceLang: response.data.translations[0].detectedSourceLanguage || sourceLang || 'auto',
targetLang,
confidence: 0.92,
qualityScore: this.evaluateTranslationQuality(text, response.data.translations[0].translatedText),
provider: 'google',
tokensUsed,
processingTimeMs: processingTime,
costUSD: cost
}
}
private async translateWithBaidu(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<TranslationResult> {
const startTime = Date.now()
// 模拟百度翻译API调用
const response = await this.mockBaiduRequest({
q: text,
from: sourceLang ? this.convertToBaiduLangCode(sourceLang) : 'auto',
to: this.convertToBaiduLangCode(targetLang),
appid: this.config.baidu?.apiKey || '',
salt: Date.now().toString(),
sign: 'mock_sign'
})
const processingTime = Date.now() - startTime
const tokensUsed = Math.ceil(text.length / 4)
const cost = this.calculateBaiduCost(text.length)
return {
translatedText: response.trans_result[0].dst,
originalText: text,
sourceLang: response.from || sourceLang || 'auto',
targetLang,
confidence: 0.90,
qualityScore: this.evaluateTranslationQuality(text, response.trans_result[0].dst),
provider: 'baidu',
tokensUsed,
processingTimeMs: processingTime,
costUSD: cost
}
}
private selectBestProvider(): AIProvider {
// 根据配置和可用性选择最佳提供商
if (this.config.openai?.apiKey) return 'openai'
if (this.config.google?.apiKey) return 'google'
if (this.config.baidu?.apiKey) return 'baidu'
return 'openai' // 默认
}
private generateCacheKey(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): string {
const optionsStr = JSON.stringify({
provider: options.provider,
temperature: options.temperature,
culturalAdaptation: options.culturalAdaptation
})
return `${text}_${sourceLang || 'auto'}_${targetLang}_${optionsStr}`.replace(/\s+/g, '_')
}
private getFromCache(key: string): TranslationResult | null {
if (!this.cacheOptions.enabled) return null
const entry = this.cache.get(key)
if (!entry) return null
// 检查TTL
const now = Date.now()
if (now > entry.createdAt + (entry.ttl * 60 * 60 * 1000)) {
this.cache.delete(key)
return null
}
return entry.result
}
private addToCache(key: string, result: TranslationResult): void {
if (!this.cacheOptions.enabled) return
// 检查缓存大小限制
if (this.cache.size >= this.cacheOptions.maxSize) {
this.evictCache()
}
this.cache.set(key, {
key,
result,
createdAt: Date.now(),
ttl: this.cacheOptions.ttlHours
})
}
private evictCache(): void {
// LRU策略删除最早的条目
const oldestKey = this.cache.keys().next().value
if (oldestKey) {
this.cache.delete(oldestKey)
}
}
private initializeCache(): void {
// 初始化缓存清理定时器
setInterval(() => {
this.cleanupExpiredCache()
}, 60 * 60 * 1000) // 每小时清理一次
}
private cleanupExpiredCache(): void {
const now = Date.now()
for (const [key, entry] of this.cache.entries()) {
if (now > entry.createdAt + (entry.ttl * 60 * 60 * 1000)) {
this.cache.delete(key)
}
}
}
private createBatches<T>(items: T[], batchSize: number): T[][] {
const batches: T[][] = []
for (let i = 0; i < items.length; i += batchSize) {
batches.push(items.slice(i, i + batchSize))
}
return batches
}
private async delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms))
}
private updateStats(result: TranslationResult): void {
this.stats.successCount++
this.stats.totalTokens += result.tokensUsed
this.stats.totalCost += result.costUSD
this.stats.avgQuality = (this.stats.avgQuality * (this.stats.successCount - 1) + result.qualityScore) / this.stats.successCount
}
private updateCacheHitRate(): void {
if (this.stats.totalRequests > 0) {
const cacheHits = this.stats.totalRequests - this.stats.successCount - this.stats.errorCount
this.stats.cacheHitRate = cacheHits / this.stats.totalRequests
}
}
private evaluateTranslationQuality(original: string, translated: string): number {
// 简单的质量评估算法
if (!translated || translated.length === 0) return 0
const lengthRatio = translated.length / original.length
const lengthScore = lengthRatio > 0.5 && lengthRatio < 2 ? 1 : 0.7
// 检查是否包含原文(可能翻译失败)
const similarityScore = original.toLowerCase() === translated.toLowerCase() ? 0.3 : 1
return (lengthScore + similarityScore) / 2
}
private basicLanguageDetection(text: string): { language: string, confidence: number } {
// 基于字符集的语言检测
const chineseRegex = /[\u4e00-\u9fff]/
const japaneseRegex = /[\u3040-\u309f\u30a0-\u30ff]/
const koreanRegex = /[\uac00-\ud7af]/
const arabicRegex = /[\u0600-\u06ff]/
const russianRegex = /[\u0400-\u04ff]/
if (chineseRegex.test(text)) return { language: 'zh-CN', confidence: 0.9 }
if (japaneseRegex.test(text)) return { language: 'ja', confidence: 0.9 }
if (koreanRegex.test(text)) return { language: 'ko', confidence: 0.9 }
if (arabicRegex.test(text)) return { language: 'ar', confidence: 0.8 }
if (russianRegex.test(text)) return { language: 'ru', confidence: 0.8 }
return { language: 'en', confidence: 0.5 }
}
private async detectLanguageWithGoogle(text: string): Promise<string> {
// 模拟Google语言检测API
const response = await this.mockGoogleDetectRequest({ q: text })
return this.convertFromGoogleLangCode(response.data.detections[0][0].language)
}
private async detectLanguageWithBaidu(text: string): Promise<string> {
// 模拟百度语言检测API
const response = await this.mockBaiduDetectRequest({ q: text })
return this.convertFromBaiduLangCode(response.lan)
}
private buildOpenAISystemPrompt(targetLang: string, sourceLang?: string, options: TranslationOptions = {}): string {
let prompt = `你是一个专业的翻译助手。请将文本翻译成${this.getLanguageName(targetLang)}。`
if (options.culturalAdaptation) {
prompt += ' 请注意文化适应性,确保翻译符合目标文化的表达习惯。'
}
if (options.preserveFormatting) {
prompt += ' 请保持原文的格式和结构。'
}
prompt += ' 只返回翻译结果,不需要其他说明。'
return prompt
}
private getLanguageName(langCode: string): string {
const languageNames: Record<string, string> = {
'zh-CN': '简体中文',
'zh-TW': '繁体中文',
'en': 'English',
'ja': '日本語',
'ko': '한국어',
'es': 'Español',
'fr': 'Français',
'de': 'Deutsch',
'it': 'Italiano',
'pt': 'Português',
'ru': 'Русский',
'ar': 'العربية',
'hi': 'हिन्दी',
'th': 'ไทย',
'vi': 'Tiếng Việt'
}
return languageNames[langCode] || langCode
}
private convertToGoogleLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh-CN': 'zh',
'zh-TW': 'zh-tw'
}
return mapping[langCode] || langCode
}
private convertFromGoogleLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh': 'zh-CN',
'zh-tw': 'zh-TW'
}
return mapping[langCode] || langCode
}
private convertToBaiduLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh-CN': 'zh',
'zh-TW': 'cht',
'en': 'en',
'ja': 'jp',
'ko': 'kor',
'es': 'spa',
'fr': 'fra',
'de': 'de',
'ru': 'ru',
'ar': 'ara'
}
return mapping[langCode] || 'en'
}
private convertFromBaiduLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh': 'zh-CN',
'cht': 'zh-TW',
'en': 'en',
'jp': 'ja',
'kor': 'ko',
'spa': 'es',
'fra': 'fr',
'de': 'de',
'ru': 'ru',
'ara': 'ar'
}
return mapping[langCode] || langCode
}
private calculateOpenAICost(tokens: number, model: string): number {
const pricing: Record<string, { input: number, output: number }> = {
'gpt-3.5-turbo': { input: 0.0015, output: 0.002 },
'gpt-4': { input: 0.03, output: 0.06 },
'gpt-4-turbo': { input: 0.01, output: 0.03 }
}
const modelPricing = pricing[model] || pricing['gpt-3.5-turbo']
return (tokens / 1000) * ((modelPricing.input + modelPricing.output) / 2)
}
private calculateGoogleCost(textLength: number): number {
// Google Translate pricing: $20 per 1M characters
return (textLength / 1000000) * 20
}
private calculateBaiduCost(textLength: number): number {
// 百度翻译定价较低
return (textLength / 1000000) * 10
}
private isRetryableError(error: any): boolean {
// 判断错误是否可重试
const retryableCodes = ['TIMEOUT', 'RATE_LIMIT', 'SERVER_ERROR']
return retryableCodes.includes(error.code) || error.status >= 500
}
// Mock API methods (在实际项目中替换为真实的HTTP请求)
private async mockOpenAIRequest(requestBody: any): Promise<any> {
await this.delay(Math.random() * 1000 + 500) // 模拟网络延迟
return {
choices: [{
message: {
content: `[Translated by OpenAI] ${requestBody.messages[1].content.split('\n\n')[1] || 'Translation result'}`
}
}],
usage: {
total_tokens: Math.ceil(requestBody.messages[1].content.length / 4) + 50
}
}
}
private async mockGoogleRequest(params: any): Promise<any> {
await this.delay(Math.random() * 800 + 400)
return {
data: {
translations: [{
translatedText: `[Translated by Google] ${params.q}`,
detectedSourceLanguage: 'zh'
}]
}
}
}
private async mockBaiduRequest(params: any): Promise<any> {
await this.delay(Math.random() * 600 + 300)
return {
trans_result: [{
src: params.q,
dst: `[Translated by Baidu] ${params.q}`
}],
from: params.from
}
}
private async mockGoogleDetectRequest(params: any): Promise<any> {
await this.delay(200)
return {
data: {
detections: [[{
language: 'zh',
confidence: 0.95
}]]
}
}
}
private async mockBaiduDetectRequest(params: any): Promise<any> {
await this.delay(200)
return {
lan: 'zh',
confidence: 0.92
}
}
}