Files
akmon/uni_modules/ak-ai-news/services/AIPerformanceMonitor.uts
2026-01-20 08:04:15 +08:00

759 lines
22 KiB
Plaintext

// Performance Monitor and Optimization System
// Real-time monitoring, metrics collection, and automatic optimization
import { type AIProvider, type AIServiceConfig } from '../types/ai-types.uts'
/**
* Performance metrics data structure
*/
export type PerformanceMetrics = {
timestamp: number
service: string
operation: string
provider?: AIProvider
duration: number
success: boolean
error?: string
tokensUsed?: number
costUSD?: number
cacheHit?: boolean
memoryUsage?: number
cpuUsage?: number
networkLatency?: number
queueSize?: number
throughput?: number
}
/**
* System health status
*/
export type SystemHealth = {
status: 'healthy' | 'warning' | 'critical'
score: number // 0-100
checks: {
apiConnectivity: boolean
memoryUsage: number
errorRate: number
responseTime: number
costBudget: number
cacheEfficiency: number
}
alerts: HealthAlert[]
}
/**
* Health alert
*/
export type HealthAlert = {
id: string
severity: 'info' | 'warning' | 'error' | 'critical'
message: string
timestamp: number
source: string
resolved: boolean
}
/**
* Optimization recommendation
*/
export type OptimizationRecommendation = {
type: 'cache' | 'provider' | 'batch' | 'model' | 'timeout' | 'retry'
priority: 'low' | 'medium' | 'high' | 'critical'
description: string
expectedImpact: {
performanceGain?: string
costSaving?: string
reliabilityImprovement?: string
}
implementation: {
action: string
parameters: Record<string, any>
estimatedEffort: string
}
}
/**
* Performance statistics aggregation
*/
export type PerformanceStats = {
timeRange: {
start: number
end: number
duration: number
}
requests: {
total: number
successful: number
failed: number
successRate: number
}
timing: {
averageLatency: number
medianLatency: number
p95Latency: number
p99Latency: number
}
costs: {
total: number
average: number
byProvider: Record<string, number>
}
cache: {
hitRate: number
totalRequests: number
hits: number
misses: number
}
errors: {
byType: Record<string, number>
byProvider: Record<string, number>
topErrors: Array<{ error: string; count: number }>
}
}
/**
* Performance monitoring and optimization service
*/
export class AIPerformanceMonitor {
private metrics: PerformanceMetrics[] = []
private alerts: HealthAlert[] = []
private isMonitoring = false
private monitoringInterval?: number
private maxMetricsHistory = 10000
private alertThresholds = {
errorRate: 0.05, // 5%
responseTime: 5000, // 5 seconds
memoryUsage: 0.8, // 80%
costBudget: 0.9, // 90% of daily budget
cacheHitRate: 0.3 // 30% minimum
}
constructor(
private config: {
monitoringInterval: number
maxHistory: number
alertWebhook?: string
enableAutoOptimization: boolean
}
) {
this.maxMetricsHistory = config.maxHistory
}
/**
* Start performance monitoring
*/
startMonitoring(): void {
if (this.isMonitoring) {
console.log('⚠️ Performance monitoring is already running')
return
}
this.isMonitoring = true
console.log('🚀 Starting performance monitoring...')
this.monitoringInterval = setInterval(() => {
this.collectSystemMetrics()
this.checkSystemHealth()
this.generateOptimizationRecommendations()
}, this.config.monitoringInterval)
}
/**
* Stop performance monitoring
*/
stopMonitoring(): void {
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval)
this.monitoringInterval = undefined
}
this.isMonitoring = false
console.log('🛑 Performance monitoring stopped')
}
/**
* Record a performance metric
*/
recordMetric(metric: PerformanceMetrics): void {
metric.timestamp = metric.timestamp || Date.now()
this.metrics.push(metric)
// Maintain history limit
if (this.metrics.length > this.maxMetricsHistory) {
this.metrics = this.metrics.slice(-this.maxMetricsHistory)
}
// Real-time analysis for critical metrics
if (!metric.success || (metric.duration && metric.duration > 10000)) {
this.checkForImmedateAlerts(metric)
}
}
/**
* Get current system health
*/
getSystemHealth(): SystemHealth {
const now = Date.now()
const recentMetrics = this.metrics.filter(m => now - m.timestamp < 300000) // Last 5 minutes
if (recentMetrics.length === 0) {
return {
status: 'warning',
score: 50,
checks: {
apiConnectivity: false,
memoryUsage: 0,
errorRate: 0,
responseTime: 0,
costBudget: 0,
cacheEfficiency: 0
},
alerts: this.getActiveAlerts()
}
}
const errorRate = recentMetrics.filter(m => !m.success).length / recentMetrics.length
const avgResponseTime = recentMetrics.reduce((sum, m) => sum + m.duration, 0) / recentMetrics.length
const cacheHitRate = this.calculateCacheHitRate(recentMetrics)
const memoryUsage = this.getMemoryUsage()
const costBudget = this.calculateCostBudgetUsage()
const checks = {
apiConnectivity: errorRate < 0.1,
memoryUsage,
errorRate,
responseTime: avgResponseTime,
costBudget,
cacheEfficiency: cacheHitRate
}
const score = this.calculateHealthScore(checks)
const status = this.determineHealthStatus(score, checks)
return {
status,
score,
checks,
alerts: this.getActiveAlerts()
}
}
/**
* Get performance statistics for a time range
*/
getPerformanceStats(
startTime: number,
endTime: number
): PerformanceStats {
const rangeMetrics = this.metrics.filter(
m => m.timestamp >= startTime && m.timestamp <= endTime
)
if (rangeMetrics.length === 0) {
return this.getEmptyStats(startTime, endTime)
}
const successful = rangeMetrics.filter(m => m.success)
const failed = rangeMetrics.filter(m => !m.success)
const successRate = successful.length / rangeMetrics.length
// Calculate timing statistics
const durations = rangeMetrics.map(m => m.duration).sort((a, b) => a - b)
const averageLatency = durations.reduce((sum, d) => sum + d, 0) / durations.length
const medianLatency = durations[Math.floor(durations.length / 2)]
const p95Latency = durations[Math.floor(durations.length * 0.95)]
const p99Latency = durations[Math.floor(durations.length * 0.99)]
// Calculate cost statistics
const totalCost = rangeMetrics.reduce((sum, m) => sum + (m.costUSD || 0), 0)
const averageCost = totalCost / rangeMetrics.length
const costByProvider = this.groupCostsByProvider(rangeMetrics)
// Calculate cache statistics
const cacheRequests = rangeMetrics.filter(m => m.cacheHit !== undefined)
const cacheHits = cacheRequests.filter(m => m.cacheHit).length
const cacheMisses = cacheRequests.length - cacheHits
const cacheHitRate = cacheRequests.length > 0 ? cacheHits / cacheRequests.length : 0
// Calculate error statistics
const errorsByType = this.groupErrorsByType(failed)
const errorsByProvider = this.groupErrorsByProvider(failed)
const topErrors = this.getTopErrors(failed)
return {
timeRange: {
start: startTime,
end: endTime,
duration: endTime - startTime
},
requests: {
total: rangeMetrics.length,
successful: successful.length,
failed: failed.length,
successRate
},
timing: {
averageLatency,
medianLatency,
p95Latency,
p99Latency
},
costs: {
total: totalCost,
average: averageCost,
byProvider: costByProvider
},
cache: {
hitRate: cacheHitRate,
totalRequests: cacheRequests.length,
hits: cacheHits,
misses: cacheMisses
},
errors: {
byType: errorsByType,
byProvider: errorsByProvider,
topErrors
}
}
}
/**
* Get optimization recommendations
*/
getOptimizationRecommendations(): OptimizationRecommendation[] {
const recommendations: OptimizationRecommendation[] = []
const recentStats = this.getPerformanceStats(
Date.now() - 3600000, // Last hour
Date.now()
)
// Cache optimization recommendations
if (recentStats.cache.hitRate < 0.4) {
recommendations.push({
type: 'cache',
priority: 'high',
description: `Cache hit rate is low (${(recentStats.cache.hitRate * 100).toFixed(1)}%). Consider increasing cache size or TTL.`,
expectedImpact: {
performanceGain: '30-50% faster response times',
costSaving: '20-40% reduction in AI API costs'
},
implementation: {
action: 'increase_cache_size',
parameters: {
maxSize: Math.max(1000, recentStats.cache.totalRequests * 2),
ttl: 3600000 // 1 hour
},
estimatedEffort: 'Low - Configuration change'
}
})
}
// Provider optimization recommendations
const providerErrors = recentStats.errors.byProvider
const worstProvider = Object.entries(providerErrors)
.sort(([, a], [, b]) => b - a)[0]
if (worstProvider && worstProvider[1] > recentStats.requests.total * 0.1) {
recommendations.push({
type: 'provider',
priority: 'medium',
description: `Provider ${worstProvider[0]} has high error rate (${worstProvider[1]} errors). Consider switching primary provider.`,
expectedImpact: {
reliabilityImprovement: '80-90% reduction in errors'
},
implementation: {
action: 'switch_primary_provider',
parameters: {
newPrimary: this.recommendBestProvider(recentStats),
fallbackProviders: ['openai', 'google', 'baidu']
},
estimatedEffort: 'Medium - Code changes required'
}
})
}
// Batch processing recommendations
if (recentStats.timing.averageLatency > 3000 && recentStats.requests.total > 100) {
recommendations.push({
type: 'batch',
priority: 'medium',
description: 'High latency with significant request volume. Consider implementing batch processing.',
expectedImpact: {
performanceGain: '50-70% improvement in throughput',
costSaving: '15-25% cost reduction'
},
implementation: {
action: 'enable_batch_processing',
parameters: {
batchSize: 10,
batchTimeout: 1000,
concurrency: 3
},
estimatedEffort: 'High - Significant code changes'
}
})
}
// Model optimization recommendations
if (recentStats.costs.average > 0.01) { // More than 1 cent per request
recommendations.push({
type: 'model',
priority: 'low',
description: 'Request costs are high. Consider using smaller/cheaper models for simple tasks.',
expectedImpact: {
costSaving: '40-60% cost reduction'
},
implementation: {
action: 'implement_model_selection',
parameters: {
simpleTaskModel: 'gpt-3.5-turbo',
complexTaskModel: 'gpt-4',
costThreshold: 0.005
},
estimatedEffort: 'Medium - Logic implementation required'
}
})
}
return recommendations
}
/**
* Apply automatic optimization
*/
async applyOptimizations(
recommendations: OptimizationRecommendation[]
): Promise<{ applied: number; failed: number; results: any[] }> {
if (!this.config.enableAutoOptimization) {
console.log('⚠️ Auto-optimization is disabled')
return { applied: 0, failed: 0, results: [] }
}
const results: any[] = []
let applied = 0
let failed = 0
for (const rec of recommendations) {
try {
const result = await this.applyOptimization(rec)
if (result.success) {
applied++
console.log(`✅ Applied optimization: ${rec.description}`)
} else {
failed++
console.log(`❌ Failed to apply optimization: ${rec.description} - ${result.error}`)
}
results.push(result)
} catch (error) {
failed++
console.log(`💥 Error applying optimization: ${rec.description} - ${error}`)
results.push({ success: false, error: String(error) })
}
}
console.log(`🔧 Auto-optimization completed: ${applied} applied, ${failed} failed`)
return { applied, failed, results }
}
/**
* Export performance data for external analysis
*/
exportPerformanceData(format: 'json' | 'csv'): string {
if (format === 'json') {
return JSON.stringify({
exportTime: Date.now(),
metrics: this.metrics,
alerts: this.alerts,
systemHealth: this.getSystemHealth(),
stats: this.getPerformanceStats(Date.now() - 86400000, Date.now()) // Last 24h
}, null, 2)
} else {
// CSV format
const headers = [
'timestamp', 'service', 'operation', 'provider', 'duration',
'success', 'tokensUsed', 'costUSD', 'cacheHit', 'error'
]
const rows = this.metrics.map(m => [
m.timestamp,
m.service,
m.operation,
m.provider || '',
m.duration,
m.success,
m.tokensUsed || 0,
m.costUSD || 0,
m.cacheHit || false,
m.error || ''
])
return [headers, ...rows].map(row => row.join(',')).join('\n')
}
}
// Private methods for internal calculations and operations
private collectSystemMetrics(): void {
const memoryUsage = this.getMemoryUsage()
const cpuUsage = this.getCpuUsage()
this.recordMetric({
timestamp: Date.now(),
service: 'system',
operation: 'health_check',
duration: 0,
success: true,
memoryUsage,
cpuUsage
})
}
private checkSystemHealth(): void {
const health = this.getSystemHealth()
if (health.status === 'critical') {
this.createAlert({
severity: 'critical',
message: `System health is critical (score: ${health.score})`,
source: 'health_monitor',
resolved: false
})
} else if (health.status === 'warning') {
this.createAlert({
severity: 'warning',
message: `System health degraded (score: ${health.score})`,
source: 'health_monitor',
resolved: false
})
}
}
private generateOptimizationRecommendations(): void {
const recommendations = this.getOptimizationRecommendations()
if (recommendations.length > 0 && this.config.enableAutoOptimization) {
console.log(`🔧 Found ${recommendations.length} optimization opportunities`)
this.applyOptimizations(recommendations)
}
}
private checkForImmedateAlerts(metric: PerformanceMetrics): void {
if (!metric.success) {
this.createAlert({
severity: 'error',
message: `${metric.service} ${metric.operation} failed: ${metric.error}`,
source: metric.service,
resolved: false
})
}
if (metric.duration && metric.duration > 10000) {
this.createAlert({
severity: 'warning',
message: `High latency detected: ${metric.service} ${metric.operation} took ${metric.duration}ms`,
source: metric.service,
resolved: false
})
}
}
private createAlert(alert: Omit<HealthAlert, 'id' | 'timestamp'>): void {
const newAlert: HealthAlert = {
id: `alert-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
timestamp: Date.now(),
...alert
}
this.alerts.push(newAlert)
// Keep only last 100 alerts
if (this.alerts.length > 100) {
this.alerts = this.alerts.slice(-100)
}
console.log(`🚨 ${alert.severity.toUpperCase()}: ${alert.message}`)
}
private getActiveAlerts(): HealthAlert[] {
return this.alerts.filter(a => !a.resolved && Date.now() - a.timestamp < 3600000) // Last hour
}
private calculateHealthScore(checks: any): number {
let score = 100
if (checks.errorRate > this.alertThresholds.errorRate) {
score -= (checks.errorRate - this.alertThresholds.errorRate) * 500
}
if (checks.responseTime > this.alertThresholds.responseTime) {
score -= Math.min(30, (checks.responseTime - this.alertThresholds.responseTime) / 1000 * 5)
}
if (checks.memoryUsage > this.alertThresholds.memoryUsage) {
score -= (checks.memoryUsage - this.alertThresholds.memoryUsage) * 100
}
if (checks.cacheEfficiency < this.alertThresholds.cacheHitRate) {
score -= (this.alertThresholds.cacheHitRate - checks.cacheEfficiency) * 50
}
return Math.max(0, Math.min(100, score))
}
private determineHealthStatus(score: number, checks: any): 'healthy' | 'warning' | 'critical' {
if (score < 30 || checks.errorRate > 0.2 || !checks.apiConnectivity) {
return 'critical'
} else if (score < 70 || checks.errorRate > 0.1 || checks.responseTime > 5000) {
return 'warning'
} else {
return 'healthy'
}
}
private calculateCacheHitRate(metrics: PerformanceMetrics[]): number {
const cacheMetrics = metrics.filter(m => m.cacheHit !== undefined)
if (cacheMetrics.length === 0) return 0
return cacheMetrics.filter(m => m.cacheHit).length / cacheMetrics.length
}
private getMemoryUsage(): number {
// Simulated memory usage - in real implementation, use actual system metrics
return Math.random() * 0.8 + 0.2
}
private getCpuUsage(): number {
// Simulated CPU usage - in real implementation, use actual system metrics
return Math.random() * 0.6 + 0.1
}
private calculateCostBudgetUsage(): number {
const todayStart = new Date().setHours(0, 0, 0, 0)
const todayMetrics = this.metrics.filter(m => m.timestamp >= todayStart)
const todayCost = todayMetrics.reduce((sum, m) => sum + (m.costUSD || 0), 0)
const dailyBudget = 100 // $100 daily budget - should be configurable
return todayCost / dailyBudget
}
private groupCostsByProvider(metrics: PerformanceMetrics[]): Record<string, number> {
const costs: Record<string, number> = {}
metrics.forEach(m => {
if (m.provider && m.costUSD) {
costs[m.provider] = (costs[m.provider] || 0) + m.costUSD
}
})
return costs
}
private groupErrorsByType(failedMetrics: PerformanceMetrics[]): Record<string, number> {
const errors: Record<string, number> = {}
failedMetrics.forEach(m => {
if (m.error) {
const errorType = this.categorizeError(m.error)
errors[errorType] = (errors[errorType] || 0) + 1
}
})
return errors
}
private groupErrorsByProvider(failedMetrics: PerformanceMetrics[]): Record<string, number> {
const errors: Record<string, number> = {}
failedMetrics.forEach(m => {
if (m.provider) {
errors[m.provider] = (errors[m.provider] || 0) + 1
}
})
return errors
}
private getTopErrors(failedMetrics: PerformanceMetrics[]): Array<{ error: string; count: number }> {
const errorCounts: Record<string, number> = {}
failedMetrics.forEach(m => {
if (m.error) {
errorCounts[m.error] = (errorCounts[m.error] || 0) + 1
}
})
return Object.entries(errorCounts)
.map(([error, count]) => ({ error, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 5)
}
private categorizeError(error: string): string {
const lowerError = error.toLowerCase()
if (lowerError.includes('timeout')) return 'timeout'
if (lowerError.includes('rate limit')) return 'rate_limit'
if (lowerError.includes('auth')) return 'authentication'
if (lowerError.includes('network')) return 'network'
if (lowerError.includes('quota')) return 'quota_exceeded'
return 'unknown'
}
private recommendBestProvider(stats: PerformanceStats): AIProvider {
const providerPerformance = {
openai: 0,
google: 0,
baidu: 0
}
// Simple scoring based on error rates
Object.entries(stats.errors.byProvider).forEach(([provider, errors]) => {
const errorRate = errors / stats.requests.total
providerPerformance[provider as AIProvider] = 1 - errorRate
})
return Object.entries(providerPerformance)
.sort(([, a], [, b]) => b - a)[0][0] as AIProvider
}
private getEmptyStats(startTime: number, endTime: number): PerformanceStats {
return {
timeRange: { start: startTime, end: endTime, duration: endTime - startTime },
requests: { total: 0, successful: 0, failed: 0, successRate: 0 },
timing: { averageLatency: 0, medianLatency: 0, p95Latency: 0, p99Latency: 0 },
costs: { total: 0, average: 0, byProvider: {} },
cache: { hitRate: 0, totalRequests: 0, hits: 0, misses: 0 },
errors: { byType: {}, byProvider: {}, topErrors: [] }
}
}
private async applyOptimization(recommendation: OptimizationRecommendation): Promise<{ success: boolean; error?: string }> {
// Simulated optimization application
// In real implementation, this would apply actual configuration changes
try {
switch (recommendation.type) {
case 'cache':
// Apply cache optimization
console.log(`🔧 Applying cache optimization: ${JSON.stringify(recommendation.implementation.parameters)}`)
break
case 'provider':
// Switch provider
console.log(`🔧 Switching to provider: ${recommendation.implementation.parameters.newPrimary}`)
break
case 'batch':
// Enable batch processing
console.log(`🔧 Enabling batch processing: batch size ${recommendation.implementation.parameters.batchSize}`)
break
case 'model':
// Implement model selection
console.log(`🔧 Implementing intelligent model selection`)
break
default:
return { success: false, error: 'Unknown optimization type' }
}
return { success: true }
} catch (error) {
return { success: false, error: String(error) }
}
}
}
// Export default configuration
export const defaultPerformanceConfig = {
monitoringInterval: 30000, // 30 seconds
maxHistory: 10000,
enableAutoOptimization: false, // Disabled by default for safety
alertWebhook: undefined
}