Initial commit of akmon project
This commit is contained in:
195
trans_LLM/SIMPLE_TRANSLATION_README.md
Normal file
195
trans_LLM/SIMPLE_TRANSLATION_README.md
Normal file
@@ -0,0 +1,195 @@
|
||||
# 简化版 Supabase 自动翻译服务
|
||||
|
||||
这是一个简化版的自动翻译脚本,专门用于翻译 `ak_contents` 表的内容,使用 Supabase 和 RageFlow 接口。
|
||||
|
||||
## 功能特点
|
||||
|
||||
- ✅ 直接连接 Supabase 数据库
|
||||
- ✅ 使用 RageFlow API 调用大模型翻译
|
||||
- ✅ 支持批量翻译多种语言
|
||||
- ✅ 自动保存翻译结果到 `ak_content_translations` 表
|
||||
- ✅ 简单的质量评估
|
||||
- ✅ 避重复翻译
|
||||
- ✅ 错误处理和日志记录
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 安装依赖
|
||||
|
||||
```bash
|
||||
pip install aiohttp asyncio
|
||||
```
|
||||
|
||||
### 2. 配置参数
|
||||
|
||||
复制 `.env.example` 为 `.env` 并填入您的配置:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
编辑 `.env` 文件:
|
||||
```env
|
||||
SUPABASE_URL=https://your-project.supabase.co
|
||||
SUPABASE_KEY=your-supabase-anon-key
|
||||
RAGEFLOW_API_KEY=your-rageflow-api-key
|
||||
```
|
||||
|
||||
### 3. 使用脚本
|
||||
|
||||
#### 方法1:直接修改脚本配置
|
||||
编辑 `simple_translation_service.py` 中的配置:
|
||||
|
||||
```python
|
||||
config = TranslationConfig(
|
||||
supabase_url="YOUR_SUPABASE_URL",
|
||||
supabase_key="YOUR_SUPABASE_KEY",
|
||||
rageflow_api_key="YOUR_RAGEFLOW_KEY"
|
||||
)
|
||||
```
|
||||
|
||||
然后运行:
|
||||
```bash
|
||||
python simple_translation_service.py
|
||||
```
|
||||
|
||||
#### 方法2:作为模块使用
|
||||
```python
|
||||
import asyncio
|
||||
from simple_translation_service import SimpleTranslationService, TranslationConfig
|
||||
|
||||
async def translate_contents():
|
||||
config = TranslationConfig(
|
||||
supabase_url="YOUR_SUPABASE_URL",
|
||||
supabase_key="YOUR_SUPABASE_KEY",
|
||||
rageflow_api_key="YOUR_RAGEFLOW_KEY"
|
||||
)
|
||||
|
||||
async with SimpleTranslationService(config) as service:
|
||||
# 翻译为英文和日文,限制5条记录
|
||||
results = await service.batch_translate(
|
||||
target_languages=["en", "ja"],
|
||||
limit=5
|
||||
)
|
||||
print(f"翻译完成: {results['summary']}")
|
||||
|
||||
# 运行
|
||||
asyncio.run(translate_contents())
|
||||
```
|
||||
|
||||
## 数据库要求
|
||||
|
||||
确保您的 Supabase 数据库中存在以下表:
|
||||
|
||||
### ak_contents 表(源内容)
|
||||
```sql
|
||||
CREATE TABLE ak_contents (
|
||||
id UUID PRIMARY KEY,
|
||||
title TEXT,
|
||||
content TEXT,
|
||||
category_id UUID,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
```
|
||||
|
||||
### ak_content_translations 表(翻译结果)
|
||||
```sql
|
||||
CREATE TABLE ak_content_translations (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
content_id UUID REFERENCES ak_contents(id),
|
||||
language VARCHAR(10) NOT NULL,
|
||||
title TEXT,
|
||||
content TEXT,
|
||||
quality_score FLOAT DEFAULT 0.0,
|
||||
translated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
translation_source VARCHAR(50) DEFAULT 'manual',
|
||||
UNIQUE(content_id, language)
|
||||
);
|
||||
```
|
||||
|
||||
## API 使用方法
|
||||
|
||||
### 获取待翻译内容
|
||||
```python
|
||||
contents = await service.get_contents_to_translate(limit=10)
|
||||
```
|
||||
|
||||
### 翻译单个内容
|
||||
```python
|
||||
translation = await service.translate_content_item(content, "en")
|
||||
```
|
||||
|
||||
### 批量翻译
|
||||
```python
|
||||
results = await service.batch_translate(
|
||||
target_languages=["en", "ja", "fr"],
|
||||
limit=20
|
||||
)
|
||||
```
|
||||
|
||||
## 支持的语言
|
||||
|
||||
- `en`: English (英文)
|
||||
- `ja`: Japanese (日文)
|
||||
- `fr`: French (法文)
|
||||
- `de`: German (德文)
|
||||
- `es`: Spanish (西班牙文)
|
||||
|
||||
## 输出示例
|
||||
|
||||
```json
|
||||
{
|
||||
"total_contents": 5,
|
||||
"target_languages": ["en", "ja"],
|
||||
"results": {
|
||||
"content-uuid-1": {
|
||||
"en": {
|
||||
"success": true,
|
||||
"quality_score": 0.9,
|
||||
"title_preview": "Translated title preview..."
|
||||
},
|
||||
"ja": {
|
||||
"success": true,
|
||||
"quality_score": 0.85,
|
||||
"title_preview": "翻訳されたタイトルのプレビュー..."
|
||||
}
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"successful": 8,
|
||||
"failed": 2,
|
||||
"skipped": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **API 限制**: 脚本会在每次翻译请求之间添加1秒延迟,避免触发API限制
|
||||
2. **避免重复**: 自动检查是否已存在翻译,避免重复翻译
|
||||
3. **错误处理**: 包含完整的错误处理和日志记录
|
||||
4. **质量评估**: 提供基于长度比例的简单质量评分
|
||||
|
||||
## 扩展功能
|
||||
|
||||
如果需要更多功能,可以考虑:
|
||||
- 添加更多翻译提供商(OpenAI、Google Translate等)
|
||||
- 实现更复杂的质量评估算法
|
||||
- 添加人工审核工作流
|
||||
- 支持增量翻译(定时任务)
|
||||
- 添加翻译缓存机制
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 常见错误
|
||||
|
||||
1. **连接错误**: 检查 Supabase URL 和 API Key 是否正确
|
||||
2. **权限错误**: 确保 Supabase API Key 有读写权限
|
||||
3. **翻译失败**: 检查 RageFlow API Key 和网络连接
|
||||
4. **数据库错误**: 确保表结构正确且存在
|
||||
|
||||
### 调试模式
|
||||
将日志级别设置为 DEBUG:
|
||||
```python
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
```
|
||||
204
trans_LLM/quick_translate.py
Normal file
204
trans_LLM/quick_translate.py
Normal file
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
快速翻译脚本
|
||||
用于快速翻译 ak_contents 表内容
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
import logging
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ========== 配置区域 - 请修改以下参数 ==========
|
||||
SUPABASE_URL = "YOUR_SUPABASE_URL" # 例如: https://abcdefgh.supabase.co
|
||||
SUPABASE_KEY = "YOUR_SUPABASE_KEY" # 您的 Supabase anon key
|
||||
RAGEFLOW_API_KEY = "YOUR_RAGEFLOW_KEY" # 您的 RageFlow API key
|
||||
|
||||
# 翻译设置
|
||||
TARGET_LANGUAGES = ["en", "ja"] # 目标语言
|
||||
CONTENT_LIMIT = 5 # 每次翻译的内容数量
|
||||
MODEL = "gpt-4" # 使用的模型
|
||||
# ===============================================
|
||||
|
||||
class QuickTranslator:
|
||||
def __init__(self):
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self.session = aiohttp.ClientSession()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def get_contents(self):
|
||||
"""获取待翻译内容"""
|
||||
headers = {
|
||||
"apikey": SUPABASE_KEY,
|
||||
"Authorization": f"Bearer {SUPABASE_KEY}",
|
||||
}
|
||||
|
||||
url = f"{SUPABASE_URL}/rest/v1/ak_contents?select=id,title,content&limit={CONTENT_LIMIT}&order=created_at.desc"
|
||||
|
||||
async with self.session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
return await response.json()
|
||||
else:
|
||||
logger.error(f"获取内容失败: {response.status}")
|
||||
return []
|
||||
|
||||
async def translate_text(self, text, target_lang):
|
||||
"""翻译文本"""
|
||||
if not text.strip():
|
||||
return ""
|
||||
|
||||
lang_map = {"en": "English", "ja": "Japanese", "fr": "French"}
|
||||
target_name = lang_map.get(target_lang, target_lang)
|
||||
|
||||
prompt = f"Please translate the following Chinese text to {target_name}:\n\n{text}\n\nTranslation:"
|
||||
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 2048
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {RAGEFLOW_API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(
|
||||
"https://api.rageflow.ai/v1/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return data["choices"][0]["message"]["content"].strip()
|
||||
else:
|
||||
logger.error(f"翻译失败: {response.status}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
logger.error(f"翻译异常: {e}")
|
||||
return ""
|
||||
|
||||
async def save_translation(self, content_id, language, title, content):
|
||||
"""保存翻译结果"""
|
||||
headers = {
|
||||
"apikey": SUPABASE_KEY,
|
||||
"Authorization": f"Bearer {SUPABASE_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
data = {
|
||||
"content_id": content_id,
|
||||
"language": language,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"translation_source": "rageflow_auto"
|
||||
}
|
||||
|
||||
# 先检查是否已存在
|
||||
check_url = f"{SUPABASE_URL}/rest/v1/ak_content_translations?content_id=eq.{content_id}&language=eq.{language}&select=id"
|
||||
async with self.session.get(check_url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
existing = await response.json()
|
||||
if existing:
|
||||
logger.info(f"翻译已存在,跳过: {content_id} -> {language}")
|
||||
return True
|
||||
|
||||
# 插入新翻译
|
||||
async with self.session.post(
|
||||
f"{SUPABASE_URL}/rest/v1/ak_content_translations",
|
||||
json=data,
|
||||
headers=headers
|
||||
) as response:
|
||||
success = response.status in [200, 201]
|
||||
if success:
|
||||
logger.info(f"翻译保存成功: {content_id} -> {language}")
|
||||
else:
|
||||
logger.error(f"保存失败: {response.status}")
|
||||
return success
|
||||
|
||||
async def run_translation(self):
|
||||
"""执行翻译"""
|
||||
logger.info("开始获取内容...")
|
||||
contents = await self.get_contents()
|
||||
|
||||
if not contents:
|
||||
logger.warning("没有找到内容")
|
||||
return
|
||||
|
||||
logger.info(f"找到 {len(contents)} 条内容,开始翻译...")
|
||||
|
||||
total_success = 0
|
||||
total_failed = 0
|
||||
|
||||
for content in contents:
|
||||
content_id = content["id"]
|
||||
title = content.get("title", "")
|
||||
content_text = content.get("content", "")
|
||||
|
||||
logger.info(f"翻译内容: {title[:30]}...")
|
||||
|
||||
for target_lang in TARGET_LANGUAGES:
|
||||
try:
|
||||
# 翻译标题和内容
|
||||
translated_title = await self.translate_text(title, target_lang)
|
||||
await asyncio.sleep(1) # 避免API限制
|
||||
|
||||
translated_content = await self.translate_text(content_text, target_lang)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if translated_title and translated_content:
|
||||
# 保存翻译
|
||||
success = await self.save_translation(
|
||||
content_id, target_lang, translated_title, translated_content
|
||||
)
|
||||
if success:
|
||||
total_success += 1
|
||||
else:
|
||||
total_failed += 1
|
||||
else:
|
||||
logger.error(f"翻译失败: {content_id} -> {target_lang}")
|
||||
total_failed += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理异常: {content_id} -> {target_lang}: {e}")
|
||||
total_failed += 1
|
||||
|
||||
logger.info(f"翻译完成!成功: {total_success}, 失败: {total_failed}")
|
||||
|
||||
async def main():
|
||||
"""主函数"""
|
||||
print("=" * 50)
|
||||
print("🌍 简化版自动翻译服务")
|
||||
print("=" * 50)
|
||||
print(f"📝 目标语言: {', '.join(TARGET_LANGUAGES)}")
|
||||
print(f"📊 内容限制: {CONTENT_LIMIT} 条")
|
||||
print(f"🤖 使用模型: {MODEL}")
|
||||
print("=" * 50)
|
||||
|
||||
# 检查配置
|
||||
if "YOUR_" in SUPABASE_URL or "YOUR_" in SUPABASE_KEY or "YOUR_" in RAGEFLOW_API_KEY:
|
||||
print("❌ 请先在脚本顶部配置正确的 API 参数!")
|
||||
return
|
||||
|
||||
async with QuickTranslator() as translator:
|
||||
await translator.run_translation()
|
||||
|
||||
print("✅ 翻译任务完成!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
403
trans_LLM/simple_translation_service.py
Normal file
403
trans_LLM/simple_translation_service.py
Normal file
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
简化版自动翻译服务
|
||||
专门用于翻译 ak_contents 表的内容
|
||||
使用 Supabase + RageFlow 接口
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class TranslationConfig:
|
||||
"""翻译配置"""
|
||||
supabase_url: str
|
||||
supabase_key: str
|
||||
rageflow_api_key: str
|
||||
rageflow_base_url: str = "https://api.rageflow.ai/v1"
|
||||
model: str = "gpt-4"
|
||||
temperature: float = 0.3
|
||||
max_tokens: int = 2048
|
||||
|
||||
@dataclass
|
||||
class ContentItem:
|
||||
"""内容项"""
|
||||
id: str
|
||||
title: str
|
||||
content: str
|
||||
category_id: str
|
||||
current_language: str = "zh"
|
||||
|
||||
@dataclass
|
||||
class TranslationResult:
|
||||
"""翻译结果"""
|
||||
original_id: str
|
||||
title: str
|
||||
content: str
|
||||
language: str
|
||||
quality_score: float = 0.0
|
||||
success: bool = True
|
||||
error_message: str = ""
|
||||
|
||||
class SimpleTranslationService:
|
||||
"""简化翻译服务"""
|
||||
|
||||
def __init__(self, config: TranslationConfig):
|
||||
self.config = config
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self.session = aiohttp.ClientSession()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
|
||||
async def get_contents_to_translate(self, limit: int = 10) -> List[ContentItem]:
|
||||
"""获取需要翻译的内容"""
|
||||
headers = {
|
||||
"apikey": self.config.supabase_key,
|
||||
"Authorization": f"Bearer {self.config.supabase_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 查询中文内容,且还没有英文翻译的记录
|
||||
query = f"""
|
||||
select=id,title,content,category_id
|
||||
&limit={limit}
|
||||
&order=created_at.desc
|
||||
"""
|
||||
|
||||
url = f"{self.config.supabase_url}/rest/v1/ak_contents?{query}"
|
||||
|
||||
try:
|
||||
async with self.session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
contents = []
|
||||
for item in data:
|
||||
contents.append(ContentItem(
|
||||
id=item["id"],
|
||||
title=item.get("title", ""),
|
||||
content=item.get("content", ""),
|
||||
category_id=item.get("category_id", "")
|
||||
))
|
||||
logger.info(f"获取到 {len(contents)} 条待翻译内容")
|
||||
return contents
|
||||
else:
|
||||
logger.error(f"获取内容失败: {response.status}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"获取内容异常: {str(e)}")
|
||||
return []
|
||||
|
||||
async def translate_text(self, text: str, target_language: str = "en") -> Dict:
|
||||
"""翻译文本"""
|
||||
if not text.strip():
|
||||
return {"translated_text": "", "success": True}
|
||||
|
||||
# 构建翻译提示词
|
||||
language_map = {
|
||||
"en": "English",
|
||||
"ja": "Japanese",
|
||||
"fr": "French",
|
||||
"de": "German",
|
||||
"es": "Spanish"
|
||||
}
|
||||
|
||||
target_lang_name = language_map.get(target_language, target_language)
|
||||
|
||||
prompt = f"""Please translate the following Chinese text to {target_lang_name}.
|
||||
|
||||
Requirements:
|
||||
1. Maintain the original meaning and tone
|
||||
2. Use natural and fluent expressions
|
||||
3. Preserve any technical terms appropriately
|
||||
4. Keep HTML tags and markdown formatting intact
|
||||
|
||||
Text to translate:
|
||||
{text}
|
||||
|
||||
Translation:"""
|
||||
|
||||
# 调用 RageFlow API
|
||||
payload = {
|
||||
"model": self.config.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a professional translator."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": self.config.temperature,
|
||||
"max_tokens": self.config.max_tokens,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.config.rageflow_api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(
|
||||
f"{self.config.rageflow_base_url}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
) as response:
|
||||
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
translated_text = data["choices"][0]["message"]["content"].strip()
|
||||
|
||||
return {
|
||||
"translated_text": translated_text,
|
||||
"success": True,
|
||||
"model": self.config.model
|
||||
}
|
||||
else:
|
||||
error_data = await response.text()
|
||||
logger.error(f"RageFlow API 错误: {response.status} - {error_data}")
|
||||
return {
|
||||
"translated_text": "",
|
||||
"success": False,
|
||||
"error": f"API错误: {response.status}"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"翻译请求异常: {str(e)}")
|
||||
return {
|
||||
"translated_text": "",
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def translate_content_item(self, content: ContentItem, target_language: str = "en") -> TranslationResult:
|
||||
"""翻译单个内容项"""
|
||||
logger.info(f"开始翻译内容: {content.id}")
|
||||
|
||||
# 翻译标题
|
||||
title_result = await self.translate_text(content.title, target_language)
|
||||
if not title_result["success"]:
|
||||
return TranslationResult(
|
||||
original_id=content.id,
|
||||
title="",
|
||||
content="",
|
||||
language=target_language,
|
||||
success=False,
|
||||
error_message=f"标题翻译失败: {title_result.get('error', '')}"
|
||||
)
|
||||
|
||||
# 翻译内容
|
||||
content_result = await self.translate_text(content.content, target_language)
|
||||
if not content_result["success"]:
|
||||
return TranslationResult(
|
||||
original_id=content.id,
|
||||
title=title_result["translated_text"],
|
||||
content="",
|
||||
language=target_language,
|
||||
success=False,
|
||||
error_message=f"内容翻译失败: {content_result.get('error', '')}"
|
||||
)
|
||||
|
||||
# 计算简单的质量分数
|
||||
quality_score = self._calculate_quality_score(
|
||||
content.title + content.content,
|
||||
title_result["translated_text"] + content_result["translated_text"]
|
||||
)
|
||||
|
||||
return TranslationResult(
|
||||
original_id=content.id,
|
||||
title=title_result["translated_text"],
|
||||
content=content_result["translated_text"],
|
||||
language=target_language,
|
||||
quality_score=quality_score,
|
||||
success=True
|
||||
)
|
||||
|
||||
def _calculate_quality_score(self, original: str, translated: str) -> float:
|
||||
"""计算简单的质量分数"""
|
||||
if not original or not translated:
|
||||
return 0.0
|
||||
|
||||
# 基于长度比例的简单评分
|
||||
original_len = len(original.split())
|
||||
translated_len = len(translated.split())
|
||||
|
||||
if original_len == 0:
|
||||
return 0.0
|
||||
|
||||
ratio = translated_len / original_len
|
||||
|
||||
# 理想比例范围 (0.5 - 2.0)
|
||||
if 0.5 <= ratio <= 2.0:
|
||||
return 0.9
|
||||
elif ratio < 0.5:
|
||||
return max(0.3, ratio * 1.8)
|
||||
else:
|
||||
return max(0.3, 2.0 / ratio)
|
||||
|
||||
async def save_translation(self, original_content: ContentItem, translation: TranslationResult) -> bool:
|
||||
"""保存翻译结果到 ak_content_translations 表"""
|
||||
if not translation.success:
|
||||
return False
|
||||
|
||||
headers = {
|
||||
"apikey": self.config.supabase_key,
|
||||
"Authorization": f"Bearer {self.config.supabase_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 检查是否已存在该语言的翻译
|
||||
check_url = f"{self.config.supabase_url}/rest/v1/ak_content_translations"
|
||||
check_params = f"select=id&content_id=eq.{original_content.id}&language=eq.{translation.language}"
|
||||
|
||||
try:
|
||||
async with self.session.get(f"{check_url}?{check_params}", headers=headers) as response:
|
||||
if response.status == 200:
|
||||
existing = await response.json()
|
||||
if existing:
|
||||
logger.info(f"内容 {original_content.id} 的 {translation.language} 翻译已存在,跳过")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"检查翻译记录时出错: {str(e)}")
|
||||
|
||||
# 插入新的翻译记录
|
||||
translation_data = {
|
||||
"content_id": original_content.id,
|
||||
"language": translation.language,
|
||||
"title": translation.title,
|
||||
"content": translation.content,
|
||||
"quality_score": translation.quality_score,
|
||||
"translated_at": datetime.now().isoformat(),
|
||||
"translation_source": "rageflow_auto"
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(
|
||||
f"{self.config.supabase_url}/rest/v1/ak_content_translations",
|
||||
json=translation_data,
|
||||
headers=headers
|
||||
) as response:
|
||||
|
||||
if response.status in [200, 201]:
|
||||
logger.info(f"翻译保存成功: {original_content.id} -> {translation.language}")
|
||||
return True
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"保存翻译失败: {response.status} - {error_text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"保存翻译异常: {str(e)}")
|
||||
return False
|
||||
|
||||
async def batch_translate(self, target_languages: List[str] = ["en"], limit: int = 10) -> Dict:
|
||||
"""批量翻译"""
|
||||
logger.info(f"开始批量翻译,目标语言: {target_languages},限制: {limit}")
|
||||
|
||||
# 获取待翻译内容
|
||||
contents = await self.get_contents_to_translate(limit)
|
||||
if not contents:
|
||||
return {"success": False, "message": "没有找到待翻译的内容"}
|
||||
|
||||
results = {
|
||||
"total_contents": len(contents),
|
||||
"target_languages": target_languages,
|
||||
"results": {},
|
||||
"summary": {
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"skipped": 0
|
||||
}
|
||||
}
|
||||
|
||||
for content in contents:
|
||||
content_results = {}
|
||||
|
||||
for target_lang in target_languages:
|
||||
try:
|
||||
# 翻译内容
|
||||
translation = await self.translate_content_item(content, target_lang)
|
||||
|
||||
if translation.success:
|
||||
# 保存翻译
|
||||
saved = await self.save_translation(content, translation)
|
||||
|
||||
content_results[target_lang] = {
|
||||
"success": saved,
|
||||
"quality_score": translation.quality_score,
|
||||
"title_preview": translation.title[:50] + "..." if len(translation.title) > 50 else translation.title
|
||||
}
|
||||
|
||||
if saved:
|
||||
results["summary"]["successful"] += 1
|
||||
else:
|
||||
results["summary"]["failed"] += 1
|
||||
else:
|
||||
content_results[target_lang] = {
|
||||
"success": False,
|
||||
"error": translation.error_message
|
||||
}
|
||||
results["summary"]["failed"] += 1
|
||||
|
||||
# 添加延迟避免API限制
|
||||
await asyncio.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"翻译内容 {content.id} 到 {target_lang} 时出错: {str(e)}")
|
||||
content_results[target_lang] = {
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
}
|
||||
results["summary"]["failed"] += 1
|
||||
|
||||
results["results"][content.id] = content_results
|
||||
|
||||
logger.info(f"批量翻译完成: 成功 {results['summary']['successful']}, 失败 {results['summary']['failed']}")
|
||||
return results
|
||||
|
||||
# 使用示例函数
|
||||
async def main():
|
||||
"""主函数示例"""
|
||||
|
||||
# 配置参数
|
||||
config = TranslationConfig(
|
||||
supabase_url="YOUR_SUPABASE_URL", # 替换为您的 Supabase URL
|
||||
supabase_key="YOUR_SUPABASE_KEY", # 替换为您的 Supabase API Key
|
||||
rageflow_api_key="YOUR_RAGEFLOW_KEY", # 替换为您的 RageFlow API Key
|
||||
model="gpt-4",
|
||||
temperature=0.3
|
||||
)
|
||||
|
||||
# 执行翻译
|
||||
async with SimpleTranslationService(config) as service:
|
||||
# 翻译为英文和日文
|
||||
results = await service.batch_translate(
|
||||
target_languages=["en", "ja"],
|
||||
limit=5 # 限制翻译5条内容进行测试
|
||||
)
|
||||
|
||||
# 打印结果
|
||||
print(json.dumps(results, indent=2, ensure_ascii=False))
|
||||
|
||||
# 单独翻译示例
|
||||
# contents = await service.get_contents_to_translate(1)
|
||||
# if contents:
|
||||
# translation = await service.translate_content_item(contents[0], "en")
|
||||
# print(f"翻译结果: {translation}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
160
trans_LLM/test_config.py
Normal file
160
trans_LLM/test_config.py
Normal file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
配置测试脚本
|
||||
用于测试 Supabase 和 RageFlow 连接是否正常
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
|
||||
# ========== 配置区域 - 请修改以下参数 ==========
|
||||
SUPABASE_URL = "YOUR_SUPABASE_URL"
|
||||
SUPABASE_KEY = "YOUR_SUPABASE_KEY"
|
||||
RAGEFLOW_API_KEY = "YOUR_RAGEFLOW_KEY"
|
||||
# ===============================================
|
||||
|
||||
async def test_supabase_connection():
|
||||
"""测试 Supabase 连接"""
|
||||
print("🔄 测试 Supabase 连接...")
|
||||
|
||||
headers = {
|
||||
"apikey": SUPABASE_KEY,
|
||||
"Authorization": f"Bearer {SUPABASE_KEY}",
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# 测试获取 ak_contents 表
|
||||
url = f"{SUPABASE_URL}/rest/v1/ak_contents?select=id,title&limit=1"
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
print(f"✅ Supabase 连接成功!找到 {len(data)} 条记录")
|
||||
if data:
|
||||
print(f" 示例内容 ID: {data[0]['id']}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Supabase 连接失败: HTTP {response.status}")
|
||||
error_text = await response.text()
|
||||
print(f" 错误信息: {error_text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Supabase 连接异常: {e}")
|
||||
return False
|
||||
|
||||
async def test_rageflow_connection():
|
||||
"""测试 RageFlow 连接"""
|
||||
print("🔄 测试 RageFlow 连接...")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {RAGEFLOW_API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
test_payload = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Hello, this is a test."}],
|
||||
"max_tokens": 50
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
"https://api.rageflow.ai/v1/chat/completions",
|
||||
json=test_payload,
|
||||
headers=headers
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
reply = data["choices"][0]["message"]["content"]
|
||||
print(f"✅ RageFlow 连接成功!")
|
||||
print(f" 测试回复: {reply[:50]}...")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ RageFlow 连接失败: HTTP {response.status}")
|
||||
error_text = await response.text()
|
||||
print(f" 错误信息: {error_text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ RageFlow 连接异常: {e}")
|
||||
return False
|
||||
|
||||
async def test_translation_tables():
|
||||
"""测试翻译相关表结构"""
|
||||
print("🔄 测试数据库表结构...")
|
||||
|
||||
headers = {
|
||||
"apikey": SUPABASE_KEY,
|
||||
"Authorization": f"Bearer {SUPABASE_KEY}",
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# 测试 ak_content_translations 表
|
||||
url = f"{SUPABASE_URL}/rest/v1/ak_content_translations?select=id&limit=1"
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
print("✅ ak_content_translations 表存在")
|
||||
return True
|
||||
elif response.status == 404:
|
||||
print("❌ ak_content_translations 表不存在")
|
||||
print(" 请创建翻译表,参考 README 中的 SQL")
|
||||
return False
|
||||
else:
|
||||
print(f"❓ 表结构检查异常: HTTP {response.status}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ 表结构检查异常: {e}")
|
||||
return False
|
||||
|
||||
async def run_full_test():
|
||||
"""运行完整测试"""
|
||||
print("=" * 60)
|
||||
print("🧪 翻译服务配置测试")
|
||||
print("=" * 60)
|
||||
|
||||
# 检查配置
|
||||
if "YOUR_" in SUPABASE_URL or "YOUR_" in SUPABASE_KEY or "YOUR_" in RAGEFLOW_API_KEY:
|
||||
print("❌ 请先配置正确的 API 参数!")
|
||||
print("\n请修改脚本顶部的以下参数:")
|
||||
print("- SUPABASE_URL: 您的 Supabase 项目 URL")
|
||||
print("- SUPABASE_KEY: 您的 Supabase API Key")
|
||||
print("- RAGEFLOW_API_KEY: 您的 RageFlow API Key")
|
||||
return
|
||||
|
||||
print(f"📍 Supabase URL: {SUPABASE_URL}")
|
||||
print(f"🔑 使用 API Key: {SUPABASE_KEY[:20]}...")
|
||||
print(f"🤖 RageFlow Key: {RAGEFLOW_API_KEY[:20]}...")
|
||||
print()
|
||||
|
||||
# 运行测试
|
||||
supabase_ok = await test_supabase_connection()
|
||||
print()
|
||||
|
||||
rageflow_ok = await test_rageflow_connection()
|
||||
print()
|
||||
|
||||
tables_ok = await test_translation_tables()
|
||||
print()
|
||||
|
||||
# 总结
|
||||
print("=" * 60)
|
||||
if supabase_ok and rageflow_ok and tables_ok:
|
||||
print("🎉 所有测试通过!可以开始使用翻译服务了")
|
||||
print("\n下一步:")
|
||||
print("1. 运行 python quick_translate.py 开始翻译")
|
||||
print("2. 或者使用 simple_translation_service.py 进行更复杂的操作")
|
||||
else:
|
||||
print("❌ 部分测试失败,请检查配置:")
|
||||
if not supabase_ok:
|
||||
print("- 检查 Supabase URL 和 API Key")
|
||||
if not rageflow_ok:
|
||||
print("- 检查 RageFlow API Key")
|
||||
if not tables_ok:
|
||||
print("- 检查数据库表结构")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(run_full_test())
|
||||
Reference in New Issue
Block a user