// Performance Monitor and Optimization System // Real-time monitoring, metrics collection, and automatic optimization import { type AIProvider, type AIServiceConfig } from '../types/ai-types.uts' /** * Performance metrics data structure */ export type PerformanceMetrics = { timestamp: number service: string operation: string provider?: AIProvider duration: number success: boolean error?: string tokensUsed?: number costUSD?: number cacheHit?: boolean memoryUsage?: number cpuUsage?: number networkLatency?: number queueSize?: number throughput?: number } /** * System health status */ export type SystemHealth = { status: 'healthy' | 'warning' | 'critical' score: number // 0-100 checks: { apiConnectivity: boolean memoryUsage: number errorRate: number responseTime: number costBudget: number cacheEfficiency: number } alerts: HealthAlert[] } /** * Health alert */ export type HealthAlert = { id: string severity: 'info' | 'warning' | 'error' | 'critical' message: string timestamp: number source: string resolved: boolean } /** * Optimization recommendation */ export type OptimizationRecommendation = { type: 'cache' | 'provider' | 'batch' | 'model' | 'timeout' | 'retry' priority: 'low' | 'medium' | 'high' | 'critical' description: string expectedImpact: { performanceGain?: string costSaving?: string reliabilityImprovement?: string } implementation: { action: string parameters: Record estimatedEffort: string } } /** * Performance statistics aggregation */ export type PerformanceStats = { timeRange: { start: number end: number duration: number } requests: { total: number successful: number failed: number successRate: number } timing: { averageLatency: number medianLatency: number p95Latency: number p99Latency: number } costs: { total: number average: number byProvider: Record } cache: { hitRate: number totalRequests: number hits: number misses: number } errors: { byType: Record byProvider: Record topErrors: Array<{ error: string; count: number }> } } /** * Performance monitoring and optimization service */ export class AIPerformanceMonitor { private metrics: PerformanceMetrics[] = [] private alerts: HealthAlert[] = [] private isMonitoring = false private monitoringInterval?: number private maxMetricsHistory = 10000 private alertThresholds = { errorRate: 0.05, // 5% responseTime: 5000, // 5 seconds memoryUsage: 0.8, // 80% costBudget: 0.9, // 90% of daily budget cacheHitRate: 0.3 // 30% minimum } constructor( private config: { monitoringInterval: number maxHistory: number alertWebhook?: string enableAutoOptimization: boolean } ) { this.maxMetricsHistory = config.maxHistory } /** * Start performance monitoring */ startMonitoring(): void { if (this.isMonitoring) { console.log('⚠️ Performance monitoring is already running') return } this.isMonitoring = true console.log('🚀 Starting performance monitoring...') this.monitoringInterval = setInterval(() => { this.collectSystemMetrics() this.checkSystemHealth() this.generateOptimizationRecommendations() }, this.config.monitoringInterval) } /** * Stop performance monitoring */ stopMonitoring(): void { if (this.monitoringInterval) { clearInterval(this.monitoringInterval) this.monitoringInterval = undefined } this.isMonitoring = false console.log('🛑 Performance monitoring stopped') } /** * Record a performance metric */ recordMetric(metric: PerformanceMetrics): void { metric.timestamp = metric.timestamp || Date.now() this.metrics.push(metric) // Maintain history limit if (this.metrics.length > this.maxMetricsHistory) { this.metrics = this.metrics.slice(-this.maxMetricsHistory) } // Real-time analysis for critical metrics if (!metric.success || (metric.duration && metric.duration > 10000)) { this.checkForImmedateAlerts(metric) } } /** * Get current system health */ getSystemHealth(): SystemHealth { const now = Date.now() const recentMetrics = this.metrics.filter(m => now - m.timestamp < 300000) // Last 5 minutes if (recentMetrics.length === 0) { return { status: 'warning', score: 50, checks: { apiConnectivity: false, memoryUsage: 0, errorRate: 0, responseTime: 0, costBudget: 0, cacheEfficiency: 0 }, alerts: this.getActiveAlerts() } } const errorRate = recentMetrics.filter(m => !m.success).length / recentMetrics.length const avgResponseTime = recentMetrics.reduce((sum, m) => sum + m.duration, 0) / recentMetrics.length const cacheHitRate = this.calculateCacheHitRate(recentMetrics) const memoryUsage = this.getMemoryUsage() const costBudget = this.calculateCostBudgetUsage() const checks = { apiConnectivity: errorRate < 0.1, memoryUsage, errorRate, responseTime: avgResponseTime, costBudget, cacheEfficiency: cacheHitRate } const score = this.calculateHealthScore(checks) const status = this.determineHealthStatus(score, checks) return { status, score, checks, alerts: this.getActiveAlerts() } } /** * Get performance statistics for a time range */ getPerformanceStats( startTime: number, endTime: number ): PerformanceStats { const rangeMetrics = this.metrics.filter( m => m.timestamp >= startTime && m.timestamp <= endTime ) if (rangeMetrics.length === 0) { return this.getEmptyStats(startTime, endTime) } const successful = rangeMetrics.filter(m => m.success) const failed = rangeMetrics.filter(m => !m.success) const successRate = successful.length / rangeMetrics.length // Calculate timing statistics const durations = rangeMetrics.map(m => m.duration).sort((a, b) => a - b) const averageLatency = durations.reduce((sum, d) => sum + d, 0) / durations.length const medianLatency = durations[Math.floor(durations.length / 2)] const p95Latency = durations[Math.floor(durations.length * 0.95)] const p99Latency = durations[Math.floor(durations.length * 0.99)] // Calculate cost statistics const totalCost = rangeMetrics.reduce((sum, m) => sum + (m.costUSD || 0), 0) const averageCost = totalCost / rangeMetrics.length const costByProvider = this.groupCostsByProvider(rangeMetrics) // Calculate cache statistics const cacheRequests = rangeMetrics.filter(m => m.cacheHit !== undefined) const cacheHits = cacheRequests.filter(m => m.cacheHit).length const cacheMisses = cacheRequests.length - cacheHits const cacheHitRate = cacheRequests.length > 0 ? cacheHits / cacheRequests.length : 0 // Calculate error statistics const errorsByType = this.groupErrorsByType(failed) const errorsByProvider = this.groupErrorsByProvider(failed) const topErrors = this.getTopErrors(failed) return { timeRange: { start: startTime, end: endTime, duration: endTime - startTime }, requests: { total: rangeMetrics.length, successful: successful.length, failed: failed.length, successRate }, timing: { averageLatency, medianLatency, p95Latency, p99Latency }, costs: { total: totalCost, average: averageCost, byProvider: costByProvider }, cache: { hitRate: cacheHitRate, totalRequests: cacheRequests.length, hits: cacheHits, misses: cacheMisses }, errors: { byType: errorsByType, byProvider: errorsByProvider, topErrors } } } /** * Get optimization recommendations */ getOptimizationRecommendations(): OptimizationRecommendation[] { const recommendations: OptimizationRecommendation[] = [] const recentStats = this.getPerformanceStats( Date.now() - 3600000, // Last hour Date.now() ) // Cache optimization recommendations if (recentStats.cache.hitRate < 0.4) { recommendations.push({ type: 'cache', priority: 'high', description: `Cache hit rate is low (${(recentStats.cache.hitRate * 100).toFixed(1)}%). Consider increasing cache size or TTL.`, expectedImpact: { performanceGain: '30-50% faster response times', costSaving: '20-40% reduction in AI API costs' }, implementation: { action: 'increase_cache_size', parameters: { maxSize: Math.max(1000, recentStats.cache.totalRequests * 2), ttl: 3600000 // 1 hour }, estimatedEffort: 'Low - Configuration change' } }) } // Provider optimization recommendations const providerErrors = recentStats.errors.byProvider const worstProvider = Object.entries(providerErrors) .sort(([, a], [, b]) => b - a)[0] if (worstProvider && worstProvider[1] > recentStats.requests.total * 0.1) { recommendations.push({ type: 'provider', priority: 'medium', description: `Provider ${worstProvider[0]} has high error rate (${worstProvider[1]} errors). Consider switching primary provider.`, expectedImpact: { reliabilityImprovement: '80-90% reduction in errors' }, implementation: { action: 'switch_primary_provider', parameters: { newPrimary: this.recommendBestProvider(recentStats), fallbackProviders: ['openai', 'google', 'baidu'] }, estimatedEffort: 'Medium - Code changes required' } }) } // Batch processing recommendations if (recentStats.timing.averageLatency > 3000 && recentStats.requests.total > 100) { recommendations.push({ type: 'batch', priority: 'medium', description: 'High latency with significant request volume. Consider implementing batch processing.', expectedImpact: { performanceGain: '50-70% improvement in throughput', costSaving: '15-25% cost reduction' }, implementation: { action: 'enable_batch_processing', parameters: { batchSize: 10, batchTimeout: 1000, concurrency: 3 }, estimatedEffort: 'High - Significant code changes' } }) } // Model optimization recommendations if (recentStats.costs.average > 0.01) { // More than 1 cent per request recommendations.push({ type: 'model', priority: 'low', description: 'Request costs are high. Consider using smaller/cheaper models for simple tasks.', expectedImpact: { costSaving: '40-60% cost reduction' }, implementation: { action: 'implement_model_selection', parameters: { simpleTaskModel: 'gpt-3.5-turbo', complexTaskModel: 'gpt-4', costThreshold: 0.005 }, estimatedEffort: 'Medium - Logic implementation required' } }) } return recommendations } /** * Apply automatic optimization */ async applyOptimizations( recommendations: OptimizationRecommendation[] ): Promise<{ applied: number; failed: number; results: any[] }> { if (!this.config.enableAutoOptimization) { console.log('⚠️ Auto-optimization is disabled') return { applied: 0, failed: 0, results: [] } } const results: any[] = [] let applied = 0 let failed = 0 for (const rec of recommendations) { try { const result = await this.applyOptimization(rec) if (result.success) { applied++ console.log(`✅ Applied optimization: ${rec.description}`) } else { failed++ console.log(`❌ Failed to apply optimization: ${rec.description} - ${result.error}`) } results.push(result) } catch (error) { failed++ console.log(`💥 Error applying optimization: ${rec.description} - ${error}`) results.push({ success: false, error: String(error) }) } } console.log(`🔧 Auto-optimization completed: ${applied} applied, ${failed} failed`) return { applied, failed, results } } /** * Export performance data for external analysis */ exportPerformanceData(format: 'json' | 'csv'): string { if (format === 'json') { return JSON.stringify({ exportTime: Date.now(), metrics: this.metrics, alerts: this.alerts, systemHealth: this.getSystemHealth(), stats: this.getPerformanceStats(Date.now() - 86400000, Date.now()) // Last 24h }, null, 2) } else { // CSV format const headers = [ 'timestamp', 'service', 'operation', 'provider', 'duration', 'success', 'tokensUsed', 'costUSD', 'cacheHit', 'error' ] const rows = this.metrics.map(m => [ m.timestamp, m.service, m.operation, m.provider || '', m.duration, m.success, m.tokensUsed || 0, m.costUSD || 0, m.cacheHit || false, m.error || '' ]) return [headers, ...rows].map(row => row.join(',')).join('\n') } } // Private methods for internal calculations and operations private collectSystemMetrics(): void { const memoryUsage = this.getMemoryUsage() const cpuUsage = this.getCpuUsage() this.recordMetric({ timestamp: Date.now(), service: 'system', operation: 'health_check', duration: 0, success: true, memoryUsage, cpuUsage }) } private checkSystemHealth(): void { const health = this.getSystemHealth() if (health.status === 'critical') { this.createAlert({ severity: 'critical', message: `System health is critical (score: ${health.score})`, source: 'health_monitor', resolved: false }) } else if (health.status === 'warning') { this.createAlert({ severity: 'warning', message: `System health degraded (score: ${health.score})`, source: 'health_monitor', resolved: false }) } } private generateOptimizationRecommendations(): void { const recommendations = this.getOptimizationRecommendations() if (recommendations.length > 0 && this.config.enableAutoOptimization) { console.log(`🔧 Found ${recommendations.length} optimization opportunities`) this.applyOptimizations(recommendations) } } private checkForImmedateAlerts(metric: PerformanceMetrics): void { if (!metric.success) { this.createAlert({ severity: 'error', message: `${metric.service} ${metric.operation} failed: ${metric.error}`, source: metric.service, resolved: false }) } if (metric.duration && metric.duration > 10000) { this.createAlert({ severity: 'warning', message: `High latency detected: ${metric.service} ${metric.operation} took ${metric.duration}ms`, source: metric.service, resolved: false }) } } private createAlert(alert: Omit): void { const newAlert: HealthAlert = { id: `alert-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, timestamp: Date.now(), ...alert } this.alerts.push(newAlert) // Keep only last 100 alerts if (this.alerts.length > 100) { this.alerts = this.alerts.slice(-100) } console.log(`🚨 ${alert.severity.toUpperCase()}: ${alert.message}`) } private getActiveAlerts(): HealthAlert[] { return this.alerts.filter(a => !a.resolved && Date.now() - a.timestamp < 3600000) // Last hour } private calculateHealthScore(checks: any): number { let score = 100 if (checks.errorRate > this.alertThresholds.errorRate) { score -= (checks.errorRate - this.alertThresholds.errorRate) * 500 } if (checks.responseTime > this.alertThresholds.responseTime) { score -= Math.min(30, (checks.responseTime - this.alertThresholds.responseTime) / 1000 * 5) } if (checks.memoryUsage > this.alertThresholds.memoryUsage) { score -= (checks.memoryUsage - this.alertThresholds.memoryUsage) * 100 } if (checks.cacheEfficiency < this.alertThresholds.cacheHitRate) { score -= (this.alertThresholds.cacheHitRate - checks.cacheEfficiency) * 50 } return Math.max(0, Math.min(100, score)) } private determineHealthStatus(score: number, checks: any): 'healthy' | 'warning' | 'critical' { if (score < 30 || checks.errorRate > 0.2 || !checks.apiConnectivity) { return 'critical' } else if (score < 70 || checks.errorRate > 0.1 || checks.responseTime > 5000) { return 'warning' } else { return 'healthy' } } private calculateCacheHitRate(metrics: PerformanceMetrics[]): number { const cacheMetrics = metrics.filter(m => m.cacheHit !== undefined) if (cacheMetrics.length === 0) return 0 return cacheMetrics.filter(m => m.cacheHit).length / cacheMetrics.length } private getMemoryUsage(): number { // Simulated memory usage - in real implementation, use actual system metrics return Math.random() * 0.8 + 0.2 } private getCpuUsage(): number { // Simulated CPU usage - in real implementation, use actual system metrics return Math.random() * 0.6 + 0.1 } private calculateCostBudgetUsage(): number { const todayStart = new Date().setHours(0, 0, 0, 0) const todayMetrics = this.metrics.filter(m => m.timestamp >= todayStart) const todayCost = todayMetrics.reduce((sum, m) => sum + (m.costUSD || 0), 0) const dailyBudget = 100 // $100 daily budget - should be configurable return todayCost / dailyBudget } private groupCostsByProvider(metrics: PerformanceMetrics[]): Record { const costs: Record = {} metrics.forEach(m => { if (m.provider && m.costUSD) { costs[m.provider] = (costs[m.provider] || 0) + m.costUSD } }) return costs } private groupErrorsByType(failedMetrics: PerformanceMetrics[]): Record { const errors: Record = {} failedMetrics.forEach(m => { if (m.error) { const errorType = this.categorizeError(m.error) errors[errorType] = (errors[errorType] || 0) + 1 } }) return errors } private groupErrorsByProvider(failedMetrics: PerformanceMetrics[]): Record { const errors: Record = {} failedMetrics.forEach(m => { if (m.provider) { errors[m.provider] = (errors[m.provider] || 0) + 1 } }) return errors } private getTopErrors(failedMetrics: PerformanceMetrics[]): Array<{ error: string; count: number }> { const errorCounts: Record = {} failedMetrics.forEach(m => { if (m.error) { errorCounts[m.error] = (errorCounts[m.error] || 0) + 1 } }) return Object.entries(errorCounts) .map(([error, count]) => ({ error, count })) .sort((a, b) => b.count - a.count) .slice(0, 5) } private categorizeError(error: string): string { const lowerError = error.toLowerCase() if (lowerError.includes('timeout')) return 'timeout' if (lowerError.includes('rate limit')) return 'rate_limit' if (lowerError.includes('auth')) return 'authentication' if (lowerError.includes('network')) return 'network' if (lowerError.includes('quota')) return 'quota_exceeded' return 'unknown' } private recommendBestProvider(stats: PerformanceStats): AIProvider { const providerPerformance = { openai: 0, google: 0, baidu: 0 } // Simple scoring based on error rates Object.entries(stats.errors.byProvider).forEach(([provider, errors]) => { const errorRate = errors / stats.requests.total providerPerformance[provider as AIProvider] = 1 - errorRate }) return Object.entries(providerPerformance) .sort(([, a], [, b]) => b - a)[0][0] as AIProvider } private getEmptyStats(startTime: number, endTime: number): PerformanceStats { return { timeRange: { start: startTime, end: endTime, duration: endTime - startTime }, requests: { total: 0, successful: 0, failed: 0, successRate: 0 }, timing: { averageLatency: 0, medianLatency: 0, p95Latency: 0, p99Latency: 0 }, costs: { total: 0, average: 0, byProvider: {} }, cache: { hitRate: 0, totalRequests: 0, hits: 0, misses: 0 }, errors: { byType: {}, byProvider: {}, topErrors: [] } } } private async applyOptimization(recommendation: OptimizationRecommendation): Promise<{ success: boolean; error?: string }> { // Simulated optimization application // In real implementation, this would apply actual configuration changes try { switch (recommendation.type) { case 'cache': // Apply cache optimization console.log(`🔧 Applying cache optimization: ${JSON.stringify(recommendation.implementation.parameters)}`) break case 'provider': // Switch provider console.log(`🔧 Switching to provider: ${recommendation.implementation.parameters.newPrimary}`) break case 'batch': // Enable batch processing console.log(`🔧 Enabling batch processing: batch size ${recommendation.implementation.parameters.batchSize}`) break case 'model': // Implement model selection console.log(`🔧 Implementing intelligent model selection`) break default: return { success: false, error: 'Unknown optimization type' } } return { success: true } } catch (error) { return { success: false, error: String(error) } } } } // Export default configuration export const defaultPerformanceConfig = { monitoringInterval: 30000, // 30 seconds maxHistory: 10000, enableAutoOptimization: false, // Disabled by default for safety alertWebhook: undefined }