Initial commit of akmon project

2026-01-20 08:04:15 +08:00
commit 77a2bab985
1309 changed files with 343305 additions and 0 deletions
--- a/uni_modules/ak-ai-news/services/AIErrorHandler.uts
+++ b/uni_modules/ak-ai-news/services/AIErrorHandler.uts
@@ -0,0 +1,761 @@
+// Advanced Error Handling and Retry Mechanism System
+// Comprehensive error recovery, circuit breaker, and resilience patterns
+
+import { type AIProvider, type AIResponse } from '../types/ai-types.uts'
+
+/**
+ * Error classification and handling configuration
+ */
+export type ErrorHandlingConfig = {
+  retryPolicy: {
+    maxAttempts: number
+    baseDelayMs: number
+    maxDelayMs: number
+    backoffMultiplier: number
+    jitterEnabled: boolean
+  }
+  circuitBreaker: {
+    failureThreshold: number
+    recoveryTimeoutMs: number
+    halfOpenMaxCalls: number
+    monitoringWindowMs: number
+  }
+  rateLimit: {
+    maxRequestsPerSecond: number
+    burstSize: number
+    enabled: boolean
+  }
+  fallback: {
+    enabled: boolean
+    fallbackProviders: AIProvider[]
+    gracefulDegradation: boolean
+  }
+  monitoring: {
+    enableMetrics: boolean
+    alertOnPatterns: boolean
+    maxErrorHistorySize: number
+  }
+}
+
+/**
+ * Error categories for different handling strategies
+ */
+export enum ErrorCategory {
+  TRANSIENT = 'transient',        // Network timeouts, temporary unavailability
+  AUTHENTICATION = 'auth',        // API key issues, token expiration
+  RATE_LIMIT = 'rate_limit',     // API rate limiting
+  QUOTA_EXCEEDED = 'quota',      // API quota exceeded
+  INVALID_REQUEST = 'invalid',   // Bad request data
+  SERVICE_ERROR = 'service',     // Internal service errors
+  NETWORK = 'network',           // Network connectivity issues
+  PERMANENT = 'permanent'        // Permanent failures that shouldn't be retried
+}
+
+/**
+ * Detailed error information
+ */
+export type ErrorInfo = {
+  category: ErrorCategory
+  code?: string
+  message: string
+  provider?: AIProvider
+  operation: string
+  timestamp: number
+  retryCount: number
+  context?: Record<string, any>
+  isRetryable: boolean
+  suggestedAction?: string
+}
+
+/**
+ * Circuit breaker states
+ */
+export enum CircuitBreakerState {
+  CLOSED = 'closed',      // Normal operation
+  OPEN = 'open',          // Circuit is open, failing fast
+  HALF_OPEN = 'half_open' // Testing if service has recovered
+}
+
+/**
+ * Circuit breaker status
+ */
+export type CircuitBreakerStatus = {
+  state: CircuitBreakerState
+  failureCount: number
+  successCount: number
+  lastFailureTime?: number
+  nextAttemptTime?: number
+  halfOpenAttempts: number
+}
+
+/**
+ * Rate limiter status
+ */
+export type RateLimiterStatus = {
+  requestsRemaining: number
+  resetTime: number
+  isLimited: boolean
+  queueSize: number
+}
+
+/**
+ * Retry attempt information
+ */
+export type RetryAttempt = {
+  attemptNumber: number
+  timestamp: number
+  error?: ErrorInfo
+  delayMs: number
+  success: boolean
+}
+
+/**
+ * Operation result with retry information
+ */
+export type OperationResult<T> = {
+  success: boolean
+  data?: T
+  error?: ErrorInfo
+  attempts: RetryAttempt[]
+  totalDuration: number
+  finalProvider?: AIProvider
+}
+
+/**
+ * Advanced error handler and retry manager
+ */
+export class AIErrorHandler {
+  private config: ErrorHandlingConfig
+  private circuitBreakers = new Map<string, CircuitBreakerStatus>()
+  private rateLimiters = new Map<string, RateLimiterStatus>()
+  private errorHistory: ErrorInfo[] = []
+  private requestQueues = new Map<string, Array<() => Promise<any>>>()
+
+  constructor(config: ErrorHandlingConfig) {
+    this.config = config
+    this.initializeCircuitBreakers()
+    this.initializeRateLimiters()
+  }
+
+  /**
+   * Execute operation with advanced error handling and retry logic
+   */
+  async executeWithRetry<T>(
+    operation: () => Promise<T>,
+    context: {
+      operationName: string
+      provider?: AIProvider
+      retryable?: boolean
+      metadata?: Record<string, any>
+    }
+  ): Promise<OperationResult<T>> {
+    const startTime = Date.now()
+    const attempts: RetryAttempt[] = []
+    let lastError: ErrorInfo | undefined
+
+    // Check circuit breaker
+    const breakerKey = this.getBreakerKey(context.operationName, context.provider)
+    if (this.isCircuitOpen(breakerKey)) {
+      const error = this.createError(
+        ErrorCategory.SERVICE_ERROR,
+        `Circuit breaker is open for ${breakerKey}`,
+        context.operationName,
+        context.provider
+      )
+      return {
+        success: false,
+        error,
+        attempts: [],
+        totalDuration: Date.now() - startTime
+      }
+    }
+
+    // Check rate limits
+    if (this.config.rateLimit.enabled && context.provider) {
+      const rateLimitResult = await this.checkRateLimit(context.provider)
+      if (!rateLimitResult.allowed) {
+        const error = this.createError(
+          ErrorCategory.RATE_LIMIT,
+          'Rate limit exceeded',
+          context.operationName,
+          context.provider
+        )
+        return {
+          success: false,
+          error,
+          attempts: [],
+          totalDuration: Date.now() - startTime
+        }
+      }
+    }
+
+    // Execute with retry logic
+    for (let attempt = 1; attempt <= this.config.retryPolicy.maxAttempts; attempt++) {
+      const attemptStart = Date.now()
+      
+      try {
+        // Add delay for retry attempts
+        if (attempt > 1) {
+          const delay = this.calculateRetryDelay(attempt - 1)
+          await this.sleep(delay)
+          attempts[attempts.length - 1].delayMs = delay
+        }
+
+        // Execute the operation
+        const result = await operation()
+        
+        // Record successful attempt
+        const attemptInfo: RetryAttempt = {
+          attemptNumber: attempt,
+          timestamp: attemptStart,
+          delayMs: 0,
+          success: true
+        }
+        attempts.push(attemptInfo)
+
+        // Update circuit breaker on success
+        this.recordSuccess(breakerKey)
+
+        return {
+          success: true,
+          data: result,
+          attempts,
+          totalDuration: Date.now() - startTime,
+          finalProvider: context.provider
+        }
+
+      } catch (error) {
+        const errorInfo = this.analyzeError(error, context.operationName, context.provider, attempt - 1)
+        lastError = errorInfo
+
+        // Record failed attempt
+        const attemptInfo: RetryAttempt = {
+          attemptNumber: attempt,
+          timestamp: attemptStart,
+          error: errorInfo,
+          delayMs: 0,
+          success: false
+        }
+        attempts.push(attemptInfo)
+
+        // Update error history
+        this.recordError(errorInfo)
+
+        // Update circuit breaker on failure
+        this.recordFailure(breakerKey)
+
+        // Check if we should retry
+        if (!this.shouldRetry(errorInfo, attempt)) {
+          break
+        }
+
+        // Try fallback provider if available
+        if (this.config.fallback.enabled && attempt === this.config.retryPolicy.maxAttempts) {
+          const fallbackResult = await this.tryFallbackProviders(
+            operation,
+            context,
+            startTime,
+            attempts
+          )
+          if (fallbackResult) {
+            return fallbackResult
+          }
+        }
+      }
+    }
+
+    return {
+      success: false,
+      error: lastError,
+      attempts,
+      totalDuration: Date.now() - startTime
+    }
+  }
+
+  /**
+   * Handle bulk operations with advanced error recovery
+   */
+  async executeBulkWithRetry<T, R>(
+    items: T[],
+    operation: (item: T) => Promise<R>,
+    options: {
+      operationName: string
+      batchSize?: number
+      concurrency?: number
+      failFast?: boolean
+      partialFailureThreshold?: number
+    }
+  ): Promise<{
+    results: Array<{ item: T; result?: R; error?: ErrorInfo }>
+    summary: {
+      successful: number
+      failed: number
+      totalTime: number
+      throughput: number
+    }
+  }> {
+    const startTime = Date.now()
+    const batchSize = options.batchSize || 10
+    const concurrency = options.concurrency || 3
+    const results: Array<{ item: T; result?: R; error?: ErrorInfo }> = []
+    
+    // Process in batches
+    for (let i = 0; i < items.length; i += batchSize) {
+      const batch = items.slice(i, i + batchSize)
+      
+      // Process batch with controlled concurrency
+      const batchPromises = batch.map(async (item) => {
+        const operationResult = await this.executeWithRetry(
+          () => operation(item),
+          {
+            operationName: options.operationName,
+            metadata: { batchIndex: Math.floor(i / batchSize), itemIndex: i + batch.indexOf(item) }
+          }
+        )
+
+        return {
+          item,
+          result: operationResult.data,
+          error: operationResult.error
+        }
+      })
+
+      // Execute with concurrency control
+      const batchResults = await this.executeConcurrently(batchPromises, concurrency)
+      results.push(...batchResults)
+
+      // Check failure threshold
+      const failedCount = results.filter(r => r.error).length
+      const failureRate = failedCount / results.length
+      
+      if (options.failFast && failureRate > (options.partialFailureThreshold || 0.5)) {
+        console.log(`⚠️ Bulk operation failing fast due to high failure rate: ${(failureRate * 100).toFixed(1)}%`)
+        break
+      }
+    }
+
+    const endTime = Date.now()
+    const successful = results.filter(r => !r.error).length
+    const failed = results.filter(r => r.error).length
+    const totalTime = endTime - startTime
+    const throughput = results.length / (totalTime / 1000)
+
+    return {
+      results,
+      summary: {
+        successful,
+        failed,
+        totalTime,
+        throughput
+      }
+    }
+  }
+
+  /**
+   * Get current error handling status
+   */
+  getErrorHandlingStatus(): {
+    circuitBreakers: Array<{ key: string; status: CircuitBreakerStatus }>
+    rateLimiters: Array<{ key: string; status: RateLimiterStatus }>
+    recentErrors: ErrorInfo[]
+    errorPatterns: Array<{ pattern: string; count: number; lastSeen: number }>
+  } {
+    const recentErrors = this.errorHistory.slice(-50) // Last 50 errors
+    const errorPatterns = this.analyzeErrorPatterns(recentErrors)
+
+    return {
+      circuitBreakers: Array.from(this.circuitBreakers.entries()).map(([key, status]) => ({ key, status })),
+      rateLimiters: Array.from(this.rateLimiters.entries()).map(([key, status]) => ({ key, status })),
+      recentErrors,
+      errorPatterns
+    }
+  }
+
+  /**
+   * Reset circuit breakers and error state
+   */
+  resetErrorState(): void {
+    this.circuitBreakers.clear()
+    this.rateLimiters.clear()
+    this.errorHistory = []
+    this.requestQueues.clear()
+    
+    this.initializeCircuitBreakers()
+    this.initializeRateLimiters()
+    
+    console.log('🔄 Error handling state reset')
+  }
+
+  /**
+   * Update configuration
+   */
+  updateConfig(newConfig: Partial<ErrorHandlingConfig>): void {
+    this.config = { ...this.config, ...newConfig }
+    console.log('⚙️ Error handling configuration updated')
+  }
+
+  // Private methods
+
+  private initializeCircuitBreakers(): void {
+    const providers: AIProvider[] = ['openai', 'google', 'baidu']
+    const operations = ['translate', 'analyze', 'chat', 'recommend']
+
+    providers.forEach(provider => {
+      operations.forEach(operation => {
+        const key = this.getBreakerKey(operation, provider)
+        this.circuitBreakers.set(key, {
+          state: CircuitBreakerState.CLOSED,
+          failureCount: 0,
+          successCount: 0,
+          halfOpenAttempts: 0
+        })
+      })
+    })
+  }
+
+  private initializeRateLimiters(): void {
+    const providers: AIProvider[] = ['openai', 'google', 'baidu']
+    
+    providers.forEach(provider => {
+      this.rateLimiters.set(provider, {
+        requestsRemaining: this.config.rateLimit.maxRequestsPerSecond,
+        resetTime: Date.now() + 1000,
+        isLimited: false,
+        queueSize: 0
+      })
+    })
+  }
+
+  private getBreakerKey(operation: string, provider?: AIProvider): string {
+    return provider ? `${provider}:${operation}` : operation
+  }
+
+  private isCircuitOpen(breakerKey: string): boolean {
+    const breaker = this.circuitBreakers.get(breakerKey)
+    if (!breaker) return false
+
+    if (breaker.state === CircuitBreakerState.OPEN) {
+      // Check if we should transition to half-open
+      const now = Date.now()
+      if (breaker.lastFailureTime && 
+          now - breaker.lastFailureTime > this.config.circuitBreaker.recoveryTimeoutMs) {
+        breaker.state = CircuitBreakerState.HALF_OPEN
+        breaker.halfOpenAttempts = 0
+        console.log(`🔄 Circuit breaker ${breakerKey} transitioning to half-open`)
+        return false
+      }
+      return true
+    }
+
+    return false
+  }
+
+  private recordSuccess(breakerKey: string): void {
+    const breaker = this.circuitBreakers.get(breakerKey)
+    if (!breaker) return
+
+    breaker.successCount++
+
+    if (breaker.state === CircuitBreakerState.HALF_OPEN) {
+      breaker.halfOpenAttempts++
+      if (breaker.halfOpenAttempts >= this.config.circuitBreaker.halfOpenMaxCalls) {
+        breaker.state = CircuitBreakerState.CLOSED
+        breaker.failureCount = 0
+        console.log(`✅ Circuit breaker ${breakerKey} closed after successful recovery`)
+      }
+    }
+  }
+
+  private recordFailure(breakerKey: string): void {
+    const breaker = this.circuitBreakers.get(breakerKey)
+    if (!breaker) return
+
+    breaker.failureCount++
+    breaker.lastFailureTime = Date.now()
+
+    if (breaker.state === CircuitBreakerState.CLOSED) {
+      if (breaker.failureCount >= this.config.circuitBreaker.failureThreshold) {
+        breaker.state = CircuitBreakerState.OPEN
+        console.log(`⚠️ Circuit breaker ${breakerKey} opened due to ${breaker.failureCount} failures`)
+      }
+    } else if (breaker.state === CircuitBreakerState.HALF_OPEN) {
+      breaker.state = CircuitBreakerState.OPEN
+      console.log(`❌ Circuit breaker ${breakerKey} re-opened after failed recovery attempt`)
+    }
+  }
+
+  private async checkRateLimit(provider: AIProvider): Promise<{ allowed: boolean; waitTime?: number }> {
+    const limiter = this.rateLimiters.get(provider)
+    if (!limiter) return { allowed: true }
+
+    const now = Date.now()
+
+    // Reset if time window has passed
+    if (now >= limiter.resetTime) {
+      limiter.requestsRemaining = this.config.rateLimit.maxRequestsPerSecond
+      limiter.resetTime = now + 1000
+      limiter.isLimited = false
+    }
+
+    if (limiter.requestsRemaining <= 0) {
+      limiter.isLimited = true
+      return { 
+        allowed: false, 
+        waitTime: limiter.resetTime - now 
+      }
+    }
+
+    limiter.requestsRemaining--
+    return { allowed: true }
+  }
+
+  private analyzeError(
+    error: any, 
+    operation: string, 
+    provider?: AIProvider, 
+    retryCount: number = 0
+  ): ErrorInfo {
+    const errorMessage = error?.message || String(error)
+    const errorCode = error?.code || error?.status
+
+    let category = ErrorCategory.PERMANENT
+    let isRetryable = false
+    let suggestedAction = 'Review error and fix manually'
+
+    // Analyze error to determine category and retry strategy
+    if (errorMessage.toLowerCase().includes('timeout') || 
+        errorMessage.toLowerCase().includes('network')) {
+      category = ErrorCategory.TRANSIENT
+      isRetryable = true
+      suggestedAction = 'Retry with exponential backoff'
+    } else if (errorMessage.toLowerCase().includes('rate limit') || errorCode === 429) {
+      category = ErrorCategory.RATE_LIMIT
+      isRetryable = true
+      suggestedAction = 'Wait and retry, consider implementing rate limiting'
+    } else if (errorMessage.toLowerCase().includes('quota') || 
+               errorMessage.toLowerCase().includes('exceeded')) {
+      category = ErrorCategory.QUOTA_EXCEEDED
+      isRetryable = false
+      suggestedAction = 'Check API quota and billing'
+    } else if (errorMessage.toLowerCase().includes('auth') || 
+               errorMessage.toLowerCase().includes('unauthorized') || 
+               errorCode === 401) {
+      category = ErrorCategory.AUTHENTICATION
+      isRetryable = false
+      suggestedAction = 'Check API keys and authentication'
+    } else if (errorCode >= 400 && errorCode < 500) {
+      category = ErrorCategory.INVALID_REQUEST
+      isRetryable = false
+      suggestedAction = 'Review request parameters'
+    } else if (errorCode >= 500) {
+      category = ErrorCategory.SERVICE_ERROR
+      isRetryable = true
+      suggestedAction = 'Retry or use fallback provider'
+    }
+
+    return {
+      category,
+      code: String(errorCode || 'unknown'),
+      message: errorMessage,
+      provider,
+      operation,
+      timestamp: Date.now(),
+      retryCount,
+      isRetryable,
+      suggestedAction,
+      context: {
+        originalError: error
+      }
+    }
+  }
+
+  private shouldRetry(error: ErrorInfo, attemptNumber: number): boolean {
+    if (attemptNumber >= this.config.retryPolicy.maxAttempts) {
+      return false
+    }
+
+    return error.isRetryable && [
+      ErrorCategory.TRANSIENT,
+      ErrorCategory.RATE_LIMIT,
+      ErrorCategory.SERVICE_ERROR,
+      ErrorCategory.NETWORK
+    ].includes(error.category)
+  }
+
+  private calculateRetryDelay(attemptNumber: number): number {
+    const baseDelay = this.config.retryPolicy.baseDelayMs
+    const maxDelay = this.config.retryPolicy.maxDelayMs
+    const multiplier = this.config.retryPolicy.backoffMultiplier
+    
+    let delay = baseDelay * Math.pow(multiplier, attemptNumber)
+    delay = Math.min(delay, maxDelay)
+
+    // Add jitter if enabled
+    if (this.config.retryPolicy.jitterEnabled) {
+      const jitter = delay * 0.1 * Math.random()
+      delay += jitter
+    }
+
+    return Math.floor(delay)
+  }
+
+  private async tryFallbackProviders<T>(
+    operation: () => Promise<T>,
+    context: any,
+    startTime: number,
+    existingAttempts: RetryAttempt[]
+  ): Promise<OperationResult<T> | null> {
+    if (!this.config.fallback.enabled || !context.provider) {
+      return null
+    }
+
+    const fallbackProviders = this.config.fallback.fallbackProviders.filter(
+      p => p !== context.provider
+    )
+
+    for (const fallbackProvider of fallbackProviders) {
+      try {
+        console.log(`🔄 Attempting fallback to provider: ${fallbackProvider}`)
+        
+        const result = await operation() // Note: In real implementation, this would use the fallback provider
+        
+        return {
+          success: true,
+          data: result,
+          attempts: existingAttempts,
+          totalDuration: Date.now() - startTime,
+          finalProvider: fallbackProvider
+        }
+      } catch (error) {
+        console.log(`❌ Fallback provider ${fallbackProvider} also failed:`, error)
+      }
+    }
+
+    return null
+  }
+
+  private recordError(error: ErrorInfo): void {
+    this.errorHistory.push(error)
+
+    // Maintain history size limit
+    if (this.errorHistory.length > this.config.monitoring.maxErrorHistorySize) {
+      this.errorHistory = this.errorHistory.slice(-this.config.monitoring.maxErrorHistorySize)
+    }
+
+    // Alert on error patterns if enabled
+    if (this.config.monitoring.alertOnPatterns) {
+      this.checkErrorPatterns(error)
+    }
+  }
+
+  private checkErrorPatterns(error: ErrorInfo): void {
+    const recentErrors = this.errorHistory.filter(
+      e => Date.now() - e.timestamp < 300000 // Last 5 minutes
+    )
+
+    // Check for repeated errors from same provider
+    if (error.provider) {
+      const providerErrors = recentErrors.filter(e => e.provider === error.provider)
+      if (providerErrors.length >= 5) {
+        console.log(`🚨 High error rate detected for provider ${error.provider}: ${providerErrors.length} errors in 5 minutes`)
+      }
+    }
+
+    // Check for repeated error categories
+    const categoryErrors = recentErrors.filter(e => e.category === error.category)
+    if (categoryErrors.length >= 10) {
+      console.log(`🚨 High error rate detected for category ${error.category}: ${categoryErrors.length} errors in 5 minutes`)
+    }
+  }
+
+  private analyzeErrorPatterns(errors: ErrorInfo[]): Array<{ pattern: string; count: number; lastSeen: number }> {
+    const patterns = new Map<string, { count: number; lastSeen: number }>()
+
+    errors.forEach(error => {
+      const pattern = `${error.category}:${error.provider || 'unknown'}`
+      const existing = patterns.get(pattern) || { count: 0, lastSeen: 0 }
+      patterns.set(pattern, {
+        count: existing.count + 1,
+        lastSeen: Math.max(existing.lastSeen, error.timestamp)
+      })
+    })
+
+    return Array.from(patterns.entries())
+      .map(([pattern, data]) => ({ pattern, ...data }))
+      .sort((a, b) => b.count - a.count)
+  }
+
+  private async executeConcurrently<T>(promises: Promise<T>[], concurrency: number): Promise<T[]> {
+    const results: T[] = []
+    const executing: Promise<void>[] = []
+
+    for (const promise of promises) {
+      const p = promise.then(result => {
+        results.push(result)
+      })
+      
+      executing.push(p)
+
+      if (executing.length >= concurrency) {
+        await Promise.race(executing)
+        executing.splice(executing.findIndex(x => x === p), 1)
+      }
+    }
+
+    await Promise.all(executing)
+    return results
+  }
+
+  private createError(
+    category: ErrorCategory,
+    message: string,
+    operation: string,
+    provider?: AIProvider
+  ): ErrorInfo {
+    return {
+      category,
+      message,
+      operation,
+      provider,
+      timestamp: Date.now(),
+      retryCount: 0,
+      isRetryable: category !== ErrorCategory.PERMANENT
+    }
+  }
+
+  private sleep(ms: number): Promise<void> {
+    return new Promise(resolve => setTimeout(resolve, ms))
+  }
+}
+
+// Default configuration
+export const defaultErrorHandlingConfig: ErrorHandlingConfig = {
+  retryPolicy: {
+    maxAttempts: 3,
+    baseDelayMs: 1000, // 1 second
+    maxDelayMs: 30000, // 30 seconds
+    backoffMultiplier: 2,
+    jitterEnabled: true
+  },
+  circuitBreaker: {
+    failureThreshold: 5,
+    recoveryTimeoutMs: 60000, // 1 minute
+    halfOpenMaxCalls: 3,
+    monitoringWindowMs: 300000 // 5 minutes
+  },
+  rateLimit: {
+    maxRequestsPerSecond: 10,
+    burstSize: 5,
+    enabled: true
+  },
+  fallback: {
+    enabled: true,
+    fallbackProviders: ['openai', 'google', 'baidu'],
+    gracefulDegradation: true
+  },
+  monitoring: {
+    enableMetrics: true,
+    alertOnPatterns: true,
+    maxErrorHistorySize: 1000
+  }
+}