Initial commit of akmon project
This commit is contained in:
403
trans_LLM/simple_translation_service.py
Normal file
403
trans_LLM/simple_translation_service.py
Normal file
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
简化版自动翻译服务
|
||||
专门用于翻译 ak_contents 表的内容
|
||||
使用 Supabase + RageFlow 接口
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class TranslationConfig:
|
||||
"""翻译配置"""
|
||||
supabase_url: str
|
||||
supabase_key: str
|
||||
rageflow_api_key: str
|
||||
rageflow_base_url: str = "https://api.rageflow.ai/v1"
|
||||
model: str = "gpt-4"
|
||||
temperature: float = 0.3
|
||||
max_tokens: int = 2048
|
||||
|
||||
@dataclass
|
||||
class ContentItem:
|
||||
"""内容项"""
|
||||
id: str
|
||||
title: str
|
||||
content: str
|
||||
category_id: str
|
||||
current_language: str = "zh"
|
||||
|
||||
@dataclass
|
||||
class TranslationResult:
|
||||
"""翻译结果"""
|
||||
original_id: str
|
||||
title: str
|
||||
content: str
|
||||
language: str
|
||||
quality_score: float = 0.0
|
||||
success: bool = True
|
||||
error_message: str = ""
|
||||
|
||||
class SimpleTranslationService:
|
||||
"""简化翻译服务"""
|
||||
|
||||
def __init__(self, config: TranslationConfig):
|
||||
self.config = config
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self.session = aiohttp.ClientSession()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def get_contents_to_translate(self, limit: int = 10) -> List[ContentItem]:
|
||||
"""获取需要翻译的内容"""
|
||||
headers = {
|
||||
"apikey": self.config.supabase_key,
|
||||
"Authorization": f"Bearer {self.config.supabase_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 查询中文内容,且还没有英文翻译的记录
|
||||
query = f"""
|
||||
select=id,title,content,category_id
|
||||
&limit={limit}
|
||||
&order=created_at.desc
|
||||
"""
|
||||
|
||||
url = f"{self.config.supabase_url}/rest/v1/ak_contents?{query}"
|
||||
|
||||
try:
|
||||
async with self.session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
contents = []
|
||||
for item in data:
|
||||
contents.append(ContentItem(
|
||||
id=item["id"],
|
||||
title=item.get("title", ""),
|
||||
content=item.get("content", ""),
|
||||
category_id=item.get("category_id", "")
|
||||
))
|
||||
logger.info(f"获取到 {len(contents)} 条待翻译内容")
|
||||
return contents
|
||||
else:
|
||||
logger.error(f"获取内容失败: {response.status}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"获取内容异常: {str(e)}")
|
||||
return []
|
||||
|
||||
async def translate_text(self, text: str, target_language: str = "en") -> Dict:
|
||||
"""翻译文本"""
|
||||
if not text.strip():
|
||||
return {"translated_text": "", "success": True}
|
||||
|
||||
# 构建翻译提示词
|
||||
language_map = {
|
||||
"en": "English",
|
||||
"ja": "Japanese",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"es": "Spanish"
|
||||
}
|
||||
|
||||
target_lang_name = language_map.get(target_language, target_language)
|
||||
|
||||
prompt = f"""Please translate the following Chinese text to {target_lang_name}.
|
||||
|
||||
Requirements:
|
||||
1. Maintain the original meaning and tone
|
||||
2. Use natural and fluent expressions
|
||||
3. Preserve any technical terms appropriately
|
||||
4. Keep HTML tags and markdown formatting intact
|
||||
|
||||
Text to translate:
|
||||
{text}
|
||||
|
||||
Translation:"""
|
||||
|
||||
# 调用 RageFlow API
|
||||
payload = {
|
||||
"model": self.config.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a professional translator."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": self.config.temperature,
|
||||
"max_tokens": self.config.max_tokens,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.config.rageflow_api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(
|
||||
f"{self.config.rageflow_base_url}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
) as response:
|
||||
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
translated_text = data["choices"][0]["message"]["content"].strip()
|
||||
|
||||
return {
|
||||
"translated_text": translated_text,
|
||||
"success": True,
|
||||
"model": self.config.model
|
||||
}
|
||||
else:
|
||||
error_data = await response.text()
|
||||
logger.error(f"RageFlow API 错误: {response.status} - {error_data}")
|
||||
return {
|
||||
"translated_text": "",
|
||||
"success": False,
|
||||
"error": f"API错误: {response.status}"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"翻译请求异常: {str(e)}")
|
||||
return {
|
||||
"translated_text": "",
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def translate_content_item(self, content: ContentItem, target_language: str = "en") -> TranslationResult:
|
||||
"""翻译单个内容项"""
|
||||
logger.info(f"开始翻译内容: {content.id}")
|
||||
|
||||
# 翻译标题
|
||||
title_result = await self.translate_text(content.title, target_language)
|
||||
if not title_result["success"]:
|
||||
return TranslationResult(
|
||||
original_id=content.id,
|
||||
title="",
|
||||
content="",
|
||||
language=target_language,
|
||||
success=False,
|
||||
error_message=f"标题翻译失败: {title_result.get('error', '')}"
|
||||
)
|
||||
|
||||
# 翻译内容
|
||||
content_result = await self.translate_text(content.content, target_language)
|
||||
if not content_result["success"]:
|
||||
return TranslationResult(
|
||||
original_id=content.id,
|
||||
title=title_result["translated_text"],
|
||||
content="",
|
||||
language=target_language,
|
||||
success=False,
|
||||
error_message=f"内容翻译失败: {content_result.get('error', '')}"
|
||||
)
|
||||
|
||||
# 计算简单的质量分数
|
||||
quality_score = self._calculate_quality_score(
|
||||
content.title + content.content,
|
||||
title_result["translated_text"] + content_result["translated_text"]
|
||||
)
|
||||
|
||||
return TranslationResult(
|
||||
original_id=content.id,
|
||||
title=title_result["translated_text"],
|
||||
content=content_result["translated_text"],
|
||||
language=target_language,
|
||||
quality_score=quality_score,
|
||||
success=True
|
||||
)
|
||||
|
||||
def _calculate_quality_score(self, original: str, translated: str) -> float:
|
||||
"""计算简单的质量分数"""
|
||||
if not original or not translated:
|
||||
return 0.0
|
||||
|
||||
# 基于长度比例的简单评分
|
||||
original_len = len(original.split())
|
||||
translated_len = len(translated.split())
|
||||
|
||||
if original_len == 0:
|
||||
return 0.0
|
||||
|
||||
ratio = translated_len / original_len
|
||||
|
||||
# 理想比例范围 (0.5 - 2.0)
|
||||
if 0.5 <= ratio <= 2.0:
|
||||
return 0.9
|
||||
elif ratio < 0.5:
|
||||
return max(0.3, ratio * 1.8)
|
||||
else:
|
||||
return max(0.3, 2.0 / ratio)
|
||||
|
||||
async def save_translation(self, original_content: ContentItem, translation: TranslationResult) -> bool:
|
||||
"""保存翻译结果到 ak_content_translations 表"""
|
||||
if not translation.success:
|
||||
return False
|
||||
|
||||
headers = {
|
||||
"apikey": self.config.supabase_key,
|
||||
"Authorization": f"Bearer {self.config.supabase_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 检查是否已存在该语言的翻译
|
||||
check_url = f"{self.config.supabase_url}/rest/v1/ak_content_translations"
|
||||
check_params = f"select=id&content_id=eq.{original_content.id}&language=eq.{translation.language}"
|
||||
|
||||
try:
|
||||
async with self.session.get(f"{check_url}?{check_params}", headers=headers) as response:
|
||||
if response.status == 200:
|
||||
existing = await response.json()
|
||||
if existing:
|
||||
logger.info(f"内容 {original_content.id} 的 {translation.language} 翻译已存在,跳过")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"检查翻译记录时出错: {str(e)}")
|
||||
|
||||
# 插入新的翻译记录
|
||||
translation_data = {
|
||||
"content_id": original_content.id,
|
||||
"language": translation.language,
|
||||
"title": translation.title,
|
||||
"content": translation.content,
|
||||
"quality_score": translation.quality_score,
|
||||
"translated_at": datetime.now().isoformat(),
|
||||
"translation_source": "rageflow_auto"
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(
|
||||
f"{self.config.supabase_url}/rest/v1/ak_content_translations",
|
||||
json=translation_data,
|
||||
headers=headers
|
||||
) as response:
|
||||
|
||||
if response.status in [200, 201]:
|
||||
logger.info(f"翻译保存成功: {original_content.id} -> {translation.language}")
|
||||
return True
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"保存翻译失败: {response.status} - {error_text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"保存翻译异常: {str(e)}")
|
||||
return False
|
||||
|
||||
async def batch_translate(self, target_languages: List[str] = ["en"], limit: int = 10) -> Dict:
|
||||
"""批量翻译"""
|
||||
logger.info(f"开始批量翻译,目标语言: {target_languages},限制: {limit}")
|
||||
|
||||
# 获取待翻译内容
|
||||
contents = await self.get_contents_to_translate(limit)
|
||||
if not contents:
|
||||
return {"success": False, "message": "没有找到待翻译的内容"}
|
||||
|
||||
results = {
|
||||
"total_contents": len(contents),
|
||||
"target_languages": target_languages,
|
||||
"results": {},
|
||||
"summary": {
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"skipped": 0
|
||||
}
|
||||
}
|
||||
|
||||
for content in contents:
|
||||
content_results = {}
|
||||
|
||||
for target_lang in target_languages:
|
||||
try:
|
||||
# 翻译内容
|
||||
translation = await self.translate_content_item(content, target_lang)
|
||||
|
||||
if translation.success:
|
||||
# 保存翻译
|
||||
saved = await self.save_translation(content, translation)
|
||||
|
||||
content_results[target_lang] = {
|
||||
"success": saved,
|
||||
"quality_score": translation.quality_score,
|
||||
"title_preview": translation.title[:50] + "..." if len(translation.title) > 50 else translation.title
|
||||
}
|
||||
|
||||
if saved:
|
||||
results["summary"]["successful"] += 1
|
||||
else:
|
||||
results["summary"]["failed"] += 1
|
||||
else:
|
||||
content_results[target_lang] = {
|
||||
"success": False,
|
||||
"error": translation.error_message
|
||||
}
|
||||
results["summary"]["failed"] += 1
|
||||
|
||||
# 添加延迟避免API限制
|
||||
await asyncio.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"翻译内容 {content.id} 到 {target_lang} 时出错: {str(e)}")
|
||||
content_results[target_lang] = {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
results["summary"]["failed"] += 1
|
||||
|
||||
results["results"][content.id] = content_results
|
||||
|
||||
logger.info(f"批量翻译完成: 成功 {results['summary']['successful']}, 失败 {results['summary']['failed']}")
|
||||
return results
|
||||
|
||||
# 使用示例函数
|
||||
async def main():
|
||||
"""主函数示例"""
|
||||
|
||||
# 配置参数
|
||||
config = TranslationConfig(
|
||||
supabase_url="YOUR_SUPABASE_URL", # 替换为您的 Supabase URL
|
||||
supabase_key="YOUR_SUPABASE_KEY", # 替换为您的 Supabase API Key
|
||||
rageflow_api_key="YOUR_RAGEFLOW_KEY", # 替换为您的 RageFlow API Key
|
||||
model="gpt-4",
|
||||
temperature=0.3
|
||||
)
|
||||
|
||||
# 执行翻译
|
||||
async with SimpleTranslationService(config) as service:
|
||||
# 翻译为英文和日文
|
||||
results = await service.batch_translate(
|
||||
target_languages=["en", "ja"],
|
||||
limit=5 # 限制翻译5条内容进行测试
|
||||
)
|
||||
|
||||
# 打印结果
|
||||
print(json.dumps(results, indent=2, ensure_ascii=False))
|
||||
|
||||
# 单独翻译示例
|
||||
# contents = await service.get_contents_to_translate(1)
|
||||
# if contents:
|
||||
# translation = await service.translate_content_item(contents[0], "en")
|
||||
# print(f"翻译结果: {translation}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user