Initial commit of akmon project

This commit is contained in:
2026-01-20 08:04:15 +08:00
commit 77a2bab985
1309 changed files with 343305 additions and 0 deletions

View File

@@ -0,0 +1,701 @@
// AI Translation Service - Multi-provider translation implementation
import {
TranslationResult,
TranslationOptions,
AIProvider,
AIResponse,
AIServiceConfig,
BatchProcessingOptions,
CacheOptions,
AIServiceError
} from '../types/ai-types.uts'
// 翻译缓存条目
type TranslationCacheEntry = {
key: string
result: TranslationResult
createdAt: number
ttl: number
}
// 翻译统计
type TranslationStats = {
totalRequests: number
successCount: number
errorCount: number
totalTokens: number
totalCost: number
avgQuality: number
cacheHitRate: number
}
/**
* AI翻译服务类
* 支持多种AI提供商的翻译服务包括缓存、批处理、质量评估等功能
*/
export class AITranslationService {
private config: AIServiceConfig
private cache: Map<string, TranslationCacheEntry> = new Map()
private cacheOptions: CacheOptions
private stats: TranslationStats = {
totalRequests: 0,
successCount: 0,
errorCount: 0,
totalTokens: 0,
totalCost: 0,
avgQuality: 0,
cacheHitRate: 0
}
constructor(config: AIServiceConfig, cacheOptions: CacheOptions = {
enabled: true,
ttlHours: 24,
maxSize: 10000,
strategy: 'lru'
}) {
this.config = config
this.cacheOptions = cacheOptions
this.initializeCache()
}
/**
* 翻译文本
* @param text 原文本
* @param targetLang 目标语言
* @param sourceLang 源语言(可选,自动检测)
* @param options 翻译选项
*/
async translateText(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<AIResponse<TranslationResult>> {
try {
this.stats.totalRequests++
// 检查缓存
const cacheKey = this.generateCacheKey(text, targetLang, sourceLang, options)
const cached = this.getFromCache(cacheKey)
if (cached) {
return { success: true, data: cached }
}
// 选择提供商
const provider = options.provider || this.selectBestProvider()
// 执行翻译
let result: TranslationResult
switch (provider) {
case 'openai':
result = await this.translateWithOpenAI(text, targetLang, sourceLang, options)
break
case 'google':
result = await this.translateWithGoogle(text, targetLang, sourceLang, options)
break
case 'baidu':
result = await this.translateWithBaidu(text, targetLang, sourceLang, options)
break
default:
throw new Error(`Unsupported translation provider: ${provider}`)
}
// 质量检查
if (result.qualityScore < (this.config.qualityThresholds?.translation || 0.7)) {
console.warn(`Translation quality below threshold: ${result.qualityScore}`)
}
// 缓存结果
this.addToCache(cacheKey, result)
// 更新统计
this.updateStats(result)
return {
success: true,
data: result,
tokensUsed: result.tokensUsed,
processingTimeMs: result.processingTimeMs,
costUSD: result.costUSD,
provider: result.provider
}
} catch (error) {
this.stats.errorCount++
const aiError: AIServiceError = {
code: 'TRANSLATION_ERROR',
message: error.message || 'Translation failed',
provider: options.provider,
retryable: this.isRetryableError(error)
}
return {
success: false,
error: aiError.message,
errorCode: aiError.code
}
}
}
/**
* 批量翻译
* @param texts 文本数组
* @param targetLang 目标语言
* @param sourceLang 源语言
* @param options 翻译选项
* @param batchOptions 批处理选项
*/
async translateBatch(
texts: string[],
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {},
batchOptions: BatchProcessingOptions = {
batchSize: 10,
concurrency: 3,
retryCount: 2,
delayMs: 1000
}
): Promise<AIResponse<TranslationResult[]>> {
try {
const results: TranslationResult[] = []
const batches = this.createBatches(texts, batchOptions.batchSize)
for (let i = 0; i < batches.length; i++) {
const batch = batches[i]
const batchPromises = batch.map(async (text, index) => {
try {
const response = await this.translateText(text, targetLang, sourceLang, options)
if (response.success && response.data) {
return response.data
}
throw new Error(response.error || 'Translation failed')
} catch (error) {
if (batchOptions.onError) {
batchOptions.onError(error, text)
}
throw error
}
})
const batchResults = await Promise.allSettled(batchPromises)
for (const result of batchResults) {
if (result.status === 'fulfilled') {
results.push(result.value)
}
}
// 进度回调
if (batchOptions.onProgress) {
batchOptions.onProgress(results.length, texts.length)
}
// 批次间延迟
if (i < batches.length - 1 && batchOptions.delayMs > 0) {
await this.delay(batchOptions.delayMs)
}
}
return { success: true, data: results }
} catch (error) {
return {
success: false,
error: error.message || 'Batch translation failed'
}
}
}
/**
* 检测语言
* @param text 文本
*/
async detectLanguage(text: string): Promise<AIResponse<string>> {
try {
// 使用正则表达式和字符集进行基础检测
const basicDetection = this.basicLanguageDetection(text)
if (basicDetection.confidence > 0.8) {
return { success: true, data: basicDetection.language }
}
// 使用AI提供商进行检测
const provider = this.selectBestProvider()
let detectedLang: string
switch (provider) {
case 'google':
detectedLang = await this.detectLanguageWithGoogle(text)
break
case 'baidu':
detectedLang = await this.detectLanguageWithBaidu(text)
break
default:
detectedLang = basicDetection.language
}
return { success: true, data: detectedLang, provider }
} catch (error) {
return {
success: false,
error: error.message || 'Language detection failed'
}
}
}
/**
* 获取支持的语言列表
*/
getSupportedLanguages(): string[] {
return [
'zh-CN', 'zh-TW', 'en', 'ja', 'ko', 'es', 'fr', 'de', 'it', 'pt',
'ru', 'ar', 'hi', 'th', 'vi', 'id', 'ms', 'tl', 'tr', 'nl'
]
}
/**
* 获取翻译统计
*/
getStatistics(): TranslationStats {
this.updateCacheHitRate()
return { ...this.stats }
}
/**
* 清理缓存
*/
clearCache(): void {
this.cache.clear()
}
// Private methods
private async translateWithOpenAI(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<TranslationResult> {
const startTime = Date.now()
const systemPrompt = this.buildOpenAISystemPrompt(targetLang, sourceLang, options)
const userPrompt = `请翻译以下文本到${this.getLanguageName(targetLang)}\n\n${text}`
const requestBody = {
model: options.model || this.config.openai?.model || 'gpt-3.5-turbo',
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt }
],
temperature: options.temperature || this.config.openai?.temperature || 0.3,
max_tokens: options.maxTokens || this.config.openai?.maxTokens || 2000
}
// 模拟API调用实际项目中替换为真实的HTTP请求
const response = await this.mockOpenAIRequest(requestBody)
const processingTime = Date.now() - startTime
const tokensUsed = response.usage.total_tokens
const cost = this.calculateOpenAICost(tokensUsed, requestBody.model)
return {
translatedText: response.choices[0].message.content.trim(),
originalText: text,
sourceLang: sourceLang || 'auto',
targetLang,
confidence: 0.95,
qualityScore: this.evaluateTranslationQuality(text, response.choices[0].message.content),
provider: 'openai',
tokensUsed,
processingTimeMs: processingTime,
costUSD: cost
}
}
private async translateWithGoogle(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<TranslationResult> {
const startTime = Date.now()
// 模拟Google Translate API调用
const response = await this.mockGoogleRequest({
q: text,
target: this.convertToGoogleLangCode(targetLang),
source: sourceLang ? this.convertToGoogleLangCode(sourceLang) : undefined,
format: 'text'
})
const processingTime = Date.now() - startTime
const tokensUsed = Math.ceil(text.length / 4) // 估算
const cost = this.calculateGoogleCost(text.length)
return {
translatedText: response.data.translations[0].translatedText,
originalText: text,
sourceLang: response.data.translations[0].detectedSourceLanguage || sourceLang || 'auto',
targetLang,
confidence: 0.92,
qualityScore: this.evaluateTranslationQuality(text, response.data.translations[0].translatedText),
provider: 'google',
tokensUsed,
processingTimeMs: processingTime,
costUSD: cost
}
}
private async translateWithBaidu(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): Promise<TranslationResult> {
const startTime = Date.now()
// 模拟百度翻译API调用
const response = await this.mockBaiduRequest({
q: text,
from: sourceLang ? this.convertToBaiduLangCode(sourceLang) : 'auto',
to: this.convertToBaiduLangCode(targetLang),
appid: this.config.baidu?.apiKey || '',
salt: Date.now().toString(),
sign: 'mock_sign'
})
const processingTime = Date.now() - startTime
const tokensUsed = Math.ceil(text.length / 4)
const cost = this.calculateBaiduCost(text.length)
return {
translatedText: response.trans_result[0].dst,
originalText: text,
sourceLang: response.from || sourceLang || 'auto',
targetLang,
confidence: 0.90,
qualityScore: this.evaluateTranslationQuality(text, response.trans_result[0].dst),
provider: 'baidu',
tokensUsed,
processingTimeMs: processingTime,
costUSD: cost
}
}
private selectBestProvider(): AIProvider {
// 根据配置和可用性选择最佳提供商
if (this.config.openai?.apiKey) return 'openai'
if (this.config.google?.apiKey) return 'google'
if (this.config.baidu?.apiKey) return 'baidu'
return 'openai' // 默认
}
private generateCacheKey(
text: string,
targetLang: string,
sourceLang?: string,
options: TranslationOptions = {}
): string {
const optionsStr = JSON.stringify({
provider: options.provider,
temperature: options.temperature,
culturalAdaptation: options.culturalAdaptation
})
return `${text}_${sourceLang || 'auto'}_${targetLang}_${optionsStr}`.replace(/\s+/g, '_')
}
private getFromCache(key: string): TranslationResult | null {
if (!this.cacheOptions.enabled) return null
const entry = this.cache.get(key)
if (!entry) return null
// 检查TTL
const now = Date.now()
if (now > entry.createdAt + (entry.ttl * 60 * 60 * 1000)) {
this.cache.delete(key)
return null
}
return entry.result
}
private addToCache(key: string, result: TranslationResult): void {
if (!this.cacheOptions.enabled) return
// 检查缓存大小限制
if (this.cache.size >= this.cacheOptions.maxSize) {
this.evictCache()
}
this.cache.set(key, {
key,
result,
createdAt: Date.now(),
ttl: this.cacheOptions.ttlHours
})
}
private evictCache(): void {
// LRU策略删除最早的条目
const oldestKey = this.cache.keys().next().value
if (oldestKey) {
this.cache.delete(oldestKey)
}
}
private initializeCache(): void {
// 初始化缓存清理定时器
setInterval(() => {
this.cleanupExpiredCache()
}, 60 * 60 * 1000) // 每小时清理一次
}
private cleanupExpiredCache(): void {
const now = Date.now()
for (const [key, entry] of this.cache.entries()) {
if (now > entry.createdAt + (entry.ttl * 60 * 60 * 1000)) {
this.cache.delete(key)
}
}
}
private createBatches<T>(items: T[], batchSize: number): T[][] {
const batches: T[][] = []
for (let i = 0; i < items.length; i += batchSize) {
batches.push(items.slice(i, i + batchSize))
}
return batches
}
private async delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms))
}
private updateStats(result: TranslationResult): void {
this.stats.successCount++
this.stats.totalTokens += result.tokensUsed
this.stats.totalCost += result.costUSD
this.stats.avgQuality = (this.stats.avgQuality * (this.stats.successCount - 1) + result.qualityScore) / this.stats.successCount
}
private updateCacheHitRate(): void {
if (this.stats.totalRequests > 0) {
const cacheHits = this.stats.totalRequests - this.stats.successCount - this.stats.errorCount
this.stats.cacheHitRate = cacheHits / this.stats.totalRequests
}
}
private evaluateTranslationQuality(original: string, translated: string): number {
// 简单的质量评估算法
if (!translated || translated.length === 0) return 0
const lengthRatio = translated.length / original.length
const lengthScore = lengthRatio > 0.5 && lengthRatio < 2 ? 1 : 0.7
// 检查是否包含原文(可能翻译失败)
const similarityScore = original.toLowerCase() === translated.toLowerCase() ? 0.3 : 1
return (lengthScore + similarityScore) / 2
}
private basicLanguageDetection(text: string): { language: string, confidence: number } {
// 基于字符集的语言检测
const chineseRegex = /[\u4e00-\u9fff]/
const japaneseRegex = /[\u3040-\u309f\u30a0-\u30ff]/
const koreanRegex = /[\uac00-\ud7af]/
const arabicRegex = /[\u0600-\u06ff]/
const russianRegex = /[\u0400-\u04ff]/
if (chineseRegex.test(text)) return { language: 'zh-CN', confidence: 0.9 }
if (japaneseRegex.test(text)) return { language: 'ja', confidence: 0.9 }
if (koreanRegex.test(text)) return { language: 'ko', confidence: 0.9 }
if (arabicRegex.test(text)) return { language: 'ar', confidence: 0.8 }
if (russianRegex.test(text)) return { language: 'ru', confidence: 0.8 }
return { language: 'en', confidence: 0.5 }
}
private async detectLanguageWithGoogle(text: string): Promise<string> {
// 模拟Google语言检测API
const response = await this.mockGoogleDetectRequest({ q: text })
return this.convertFromGoogleLangCode(response.data.detections[0][0].language)
}
private async detectLanguageWithBaidu(text: string): Promise<string> {
// 模拟百度语言检测API
const response = await this.mockBaiduDetectRequest({ q: text })
return this.convertFromBaiduLangCode(response.lan)
}
private buildOpenAISystemPrompt(targetLang: string, sourceLang?: string, options: TranslationOptions = {}): string {
let prompt = `你是一个专业的翻译助手。请将文本翻译成${this.getLanguageName(targetLang)}。`
if (options.culturalAdaptation) {
prompt += ' 请注意文化适应性,确保翻译符合目标文化的表达习惯。'
}
if (options.preserveFormatting) {
prompt += ' 请保持原文的格式和结构。'
}
prompt += ' 只返回翻译结果,不需要其他说明。'
return prompt
}
private getLanguageName(langCode: string): string {
const languageNames: Record<string, string> = {
'zh-CN': '简体中文',
'zh-TW': '繁体中文',
'en': 'English',
'ja': '日本語',
'ko': '한국어',
'es': 'Español',
'fr': 'Français',
'de': 'Deutsch',
'it': 'Italiano',
'pt': 'Português',
'ru': 'Русский',
'ar': 'العربية',
'hi': 'हिन्दी',
'th': 'ไทย',
'vi': 'Tiếng Việt'
}
return languageNames[langCode] || langCode
}
private convertToGoogleLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh-CN': 'zh',
'zh-TW': 'zh-tw'
}
return mapping[langCode] || langCode
}
private convertFromGoogleLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh': 'zh-CN',
'zh-tw': 'zh-TW'
}
return mapping[langCode] || langCode
}
private convertToBaiduLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh-CN': 'zh',
'zh-TW': 'cht',
'en': 'en',
'ja': 'jp',
'ko': 'kor',
'es': 'spa',
'fr': 'fra',
'de': 'de',
'ru': 'ru',
'ar': 'ara'
}
return mapping[langCode] || 'en'
}
private convertFromBaiduLangCode(langCode: string): string {
const mapping: Record<string, string> = {
'zh': 'zh-CN',
'cht': 'zh-TW',
'en': 'en',
'jp': 'ja',
'kor': 'ko',
'spa': 'es',
'fra': 'fr',
'de': 'de',
'ru': 'ru',
'ara': 'ar'
}
return mapping[langCode] || langCode
}
private calculateOpenAICost(tokens: number, model: string): number {
const pricing: Record<string, { input: number, output: number }> = {
'gpt-3.5-turbo': { input: 0.0015, output: 0.002 },
'gpt-4': { input: 0.03, output: 0.06 },
'gpt-4-turbo': { input: 0.01, output: 0.03 }
}
const modelPricing = pricing[model] || pricing['gpt-3.5-turbo']
return (tokens / 1000) * ((modelPricing.input + modelPricing.output) / 2)
}
private calculateGoogleCost(textLength: number): number {
// Google Translate pricing: $20 per 1M characters
return (textLength / 1000000) * 20
}
private calculateBaiduCost(textLength: number): number {
// 百度翻译定价较低
return (textLength / 1000000) * 10
}
private isRetryableError(error: any): boolean {
// 判断错误是否可重试
const retryableCodes = ['TIMEOUT', 'RATE_LIMIT', 'SERVER_ERROR']
return retryableCodes.includes(error.code) || error.status >= 500
}
// Mock API methods (在实际项目中替换为真实的HTTP请求)
private async mockOpenAIRequest(requestBody: any): Promise<any> {
await this.delay(Math.random() * 1000 + 500) // 模拟网络延迟
return {
choices: [{
message: {
content: `[Translated by OpenAI] ${requestBody.messages[1].content.split('\n\n')[1] || 'Translation result'}`
}
}],
usage: {
total_tokens: Math.ceil(requestBody.messages[1].content.length / 4) + 50
}
}
}
private async mockGoogleRequest(params: any): Promise<any> {
await this.delay(Math.random() * 800 + 400)
return {
data: {
translations: [{
translatedText: `[Translated by Google] ${params.q}`,
detectedSourceLanguage: 'zh'
}]
}
}
}
private async mockBaiduRequest(params: any): Promise<any> {
await this.delay(Math.random() * 600 + 300)
return {
trans_result: [{
src: params.q,
dst: `[Translated by Baidu] ${params.q}`
}],
from: params.from
}
}
private async mockGoogleDetectRequest(params: any): Promise<any> {
await this.delay(200)
return {
data: {
detections: [[{
language: 'zh',
confidence: 0.95
}]]
}
}
}
private async mockBaiduDetectRequest(params: any): Promise<any> {
await this.delay(200)
return {
lan: 'zh',
confidence: 0.92
}
}
}