Node.js AI应用开发基础

📖 概述

本文全面介绍如何使用Node.js开发AI应用，从环境搭建到实际项目实现。涵盖主流AI API集成、本地模型部署、数据处理、性能优化等核心技术。通过丰富的代码示例和实战项目，帮助开发者快速掌握Node.js AI应用开发的最佳实践。

🚀 Node.js AI开发环境搭建

基础环境配置

组件	版本要求	安装方式	用途说明
Node.js	>=18.0.0	官网下载/nvm	JavaScript运行环境
npm/yarn	最新版本	随Node.js安装	包管理工具
TypeScript	>=4.5.0	npm install -g typescript	类型安全开发
Python	>=3.8.0	官网下载/pyenv	机器学习库支持

核心依赖包

包名	版本	功能描述	使用场景
@anthropic-ai/sdk	^0.24.0	Claude API客户端	对话和文本生成
openai	^4.0.0	OpenAI API客户端	GPT模型调用
@google/generative-ai	^0.15.0	Gemini API客户端	Google AI服务
langchain	^0.2.0	AI应用开发框架	复杂AI工作流
@tensorflow/tfjs-node	^4.0.0	TensorFlow.js	本地模型推理
sharp	^0.33.0	图像处理	图像预处理
pdf-parse	^1.1.1	PDF解析	文档处理

项目初始化

// package.json 配置示例
{
  "name": "nodejs-ai-app",
  "version": "1.0.0",
  "description": "Node.js AI应用开发基础项目",
  "main": "dist/index.js",
  "scripts": {
    "dev": "tsx watch src/index.ts",
    "build": "tsc",
    "start": "node dist/index.js",
    "test": "jest",
    "lint": "eslint src/**/*.ts",
    "format": "prettier --write src/**/*.ts"
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.24.0",
    "openai": "^4.0.0",
    "@google/generative-ai": "^0.15.0",
    "langchain": "^0.2.0",
    "express": "^4.18.0",
    "cors": "^2.8.5",
    "helmet": "^7.0.0",
    "dotenv": "^16.0.0",
    "winston": "^3.8.0",
    "joi": "^17.9.0",
    "rate-limiter-flexible": "^3.0.0"
  },
  "devDependencies": {
    "@types/node": "^20.0.0",
    "@types/express": "^4.17.0",
    "typescript": "^5.0.0",
    "tsx": "^4.0.0",
    "jest": "^29.0.0",
    "@types/jest": "^29.0.0",
    "eslint": "^8.0.0",
    "prettier": "^3.0.0"
  },
  "engines": {
    "node": ">=18.0.0"
  }
}

🔧 AI API集成实战

统一AI客户端管理器

const fs = require('fs');
const path = require('path');
const { Anthropic } = require('@anthropic-ai/sdk');
const { OpenAI } = require('openai');
const { GoogleGenerativeAI } = require('@google/generative-ai');
const winston = require('winston');

// 配置日志
const logger = winston.createLogger({
  level: 'info',
  format: winston.format.combine(
    winston.format.timestamp(),
    winston.format.errors({ stack: true }),
    winston.format.json()
  ),
  transports: [
    new winston.transports.File({ filename: 'logs/error.log', level: 'error' }),
    new winston.transports.File({ filename: 'logs/combined.log' }),
    new winston.transports.Console({
      format: winston.format.simple()
    })
  ]
});

class AIClientManager {
  constructor(config = {}) {
    this.config = {
      defaultProvider: 'openai',
      timeout: 30000,
      maxRetries: 3,
      retryDelay: 1000,
      enableCache: true,
      cacheExpiry: 3600000, // 1小时
      ...config
    };
    
    this.clients = new Map();
    this.cache = new Map();
    this.requestStats = {
      total: 0,
      success: 0,
      errors: 0,
      cacheHits: 0
    };
    
    this.initializeClients();
    this.setupCacheCleanup();
  }

  initializeClients() {
    console.log('🔧 初始化AI客户端...');
    
    try {
      // OpenAI客户端
      if (process.env.OPENAI_API_KEY) {
        this.clients.set('openai', new OpenAI({
          apiKey: process.env.OPENAI_API_KEY,
          timeout: this.config.timeout,
          maxRetries: this.config.maxRetries
        }));
        logger.info('OpenAI客户端初始化成功');
      }
      
      // Anthropic客户端
      if (process.env.ANTHROPIC_API_KEY) {
        this.clients.set('anthropic', new Anthropic({
          apiKey: process.env.ANTHROPIC_API_KEY,
          timeout: this.config.timeout,
          maxRetries: this.config.maxRetries
        }));
        logger.info('Anthropic客户端初始化成功');
      }
      
      // Google AI客户端
      if (process.env.GOOGLE_AI_API_KEY) {
        this.clients.set('google', new GoogleGenerativeAI(
          process.env.GOOGLE_AI_API_KEY
        ));
        logger.info('Google AI客户端初始化成功');
      }
      
      console.log(`✅ 已初始化 ${this.clients.size} 个AI客户端`);
      
    } catch (error) {
      logger.error('AI客户端初始化失败:', error);
      throw error;
    }
  }

  async generateText(prompt, options = {}) {
    const startTime = Date.now();
    this.requestStats.total++;
    
    try {
      const {
        provider = this.config.defaultProvider,
        model = 'gpt-3.5-turbo',
        maxTokens = 1000,
        temperature = 0.7,
        useCache = this.config.enableCache,
        ...otherOptions
      } = options;
      
      // 检查缓存
      const cacheKey = this.generateCacheKey(prompt, options);
      if (useCache && this.cache.has(cacheKey)) {
        const cached = this.cache.get(cacheKey);
        if (Date.now() - cached.timestamp < this.config.cacheExpiry) {
          this.requestStats.cacheHits++;
          logger.info(`缓存命中: ${cacheKey}`);
          return {
            ...cached.result,
            fromCache: true,
            responseTime: Date.now() - startTime
          };
        } else {
          this.cache.delete(cacheKey);
        }
      }
      
      console.log(`\n🤖 调用${provider}生成文本...`);
      console.log(`模型: ${model}`);
      console.log(`提示长度: ${prompt.length} 字符`);
      
      let result;
      
      switch (provider) {
        case 'openai':
          result = await this.callOpenAI(prompt, { model, maxTokens, temperature, ...otherOptions });
          break;
        case 'anthropic':
          result = await this.callAnthropic(prompt, { model, maxTokens, temperature, ...otherOptions });
          break;
        case 'google':
          result = await this.callGoogle(prompt, { model, maxTokens, temperature, ...otherOptions });
          break;
        default:
          throw new Error(`不支持的AI提供商: ${provider}`);
      }
      
      // 缓存结果
      if (useCache) {
        this.cache.set(cacheKey, {
          result,
          timestamp: Date.now()
        });
      }
      
      this.requestStats.success++;
      
      const responseTime = Date.now() - startTime;
      logger.info(`AI请求成功 - 提供商: ${provider}, 响应时间: ${responseTime}ms`);
      
      return {
        ...result,
        provider,
        model,
        responseTime,
        fromCache: false
      };
      
    } catch (error) {
      this.requestStats.errors++;
      logger.error('AI文本生成失败:', {
        error: error.message,
        provider: options.provider,
        promptLength: prompt.length
      });
      
      throw new Error(`AI文本生成失败: ${error.message}`);
    }
  }

  async callOpenAI(prompt, options) {
    const client = this.clients.get('openai');
    if (!client) throw new Error('OpenAI客户端未初始化');
    
    const { model, maxTokens, temperature, stream = false } = options;
    
    const response = await client.chat.completions.create({
      model,
      messages: [{ role: 'user', content: prompt }],
      max_tokens: maxTokens,
      temperature,
      stream
    });
    
    if (stream) {
      return { stream: response };
    }
    
    return {
      text: response.choices[0].message.content,
      usage: response.usage,
      finishReason: response.choices[0].finish_reason
    };
  }

  async callAnthropic(prompt, options) {
    const client = this.clients.get('anthropic');
    if (!client) throw new Error('Anthropic客户端未初始化');
    
    const { model = 'claude-3-sonnet-20240229', maxTokens, temperature } = options;
    
    const response = await client.messages.create({
      model,
      max_tokens: maxTokens,
      temperature,
      messages: [{ role: 'user', content: prompt }]
    });
    
    return {
      text: response.content[0].text,
      usage: response.usage,
      stopReason: response.stop_reason
    };
  }

  async callGoogle(prompt, options) {
    const client = this.clients.get('google');
    if (!client) throw new Error('Google AI客户端未初始化');
    
    const { model = 'gemini-pro', temperature } = options;
    
    const genModel = client.getGenerativeModel({ model });
    const result = await genModel.generateContent({
      contents: [{ parts: [{ text: prompt }] }],
      generationConfig: {
        temperature,
        maxOutputTokens: options.maxTokens
      }
    });
    
    const response = await result.response;
    
    return {
      text: response.text(),
      usage: response.usageMetadata,
      finishReason: response.candidates[0].finishReason
    };
  }

  async generateImage(prompt, options = {}) {
    const startTime = Date.now();
    
    try {
      const {
        provider = 'openai',
        model = 'dall-e-3',
        size = '1024x1024',
        quality = 'standard',
        style = 'vivid',
        n = 1
      } = options;
      
      console.log(`\n🎨 调用${provider}生成图像...`);
      console.log(`模型: ${model}, 尺寸: ${size}`);
      
      let result;
      
      switch (provider) {
        case 'openai':
          const client = this.clients.get('openai');
          if (!client) throw new Error('OpenAI客户端未初始化');
          
          const response = await client.images.generate({
            model,
            prompt,
            n,
            size,
            quality,
            style
          });
          
          result = {
            images: response.data.map(img => ({
              url: img.url,
              revisedPrompt: img.revised_prompt
            })),
            usage: response.usage
          };
          break;
          
        default:
          throw new Error(`不支持的图像生成提供商: ${provider}`);
      }
      
      const responseTime = Date.now() - startTime;
      logger.info(`图像生成成功 - 提供商: ${provider}, 响应时间: ${responseTime}ms`);
      
      return {
        ...result,
        provider,
        model,
        responseTime
      };
      
    } catch (error) {
      logger.error('图像生成失败:', error);
      throw new Error(`图像生成失败: ${error.message}`);
    }
  }

  async analyzeImage(imageUrl, prompt, options = {}) {
    const startTime = Date.now();
    
    try {
      const {
        provider = 'openai',
        model = 'gpt-4-vision-preview',
        maxTokens = 1000
      } = options;
      
      console.log(`\n👁️ 调用${provider}分析图像...`);
      
      let result;
      
      switch (provider) {
        case 'openai':
          const client = this.clients.get('openai');
          if (!client) throw new Error('OpenAI客户端未初始化');
          
          const response = await client.chat.completions.create({
            model,
            messages: [
              {
                role: 'user',
                content: [
                  { type: 'text', text: prompt },
                  { type: 'image_url', image_url: { url: imageUrl } }
                ]
              }
            ],
            max_tokens: maxTokens
          });
          
          result = {
            analysis: response.choices[0].message.content,
            usage: response.usage
          };
          break;
          
        default:
          throw new Error(`不支持的图像分析提供商: ${provider}`);
      }
      
      const responseTime = Date.now() - startTime;
      logger.info(`图像分析成功 - 响应时间: ${responseTime}ms`);
      
      return {
        ...result,
        provider,
        model,
        responseTime
      };
      
    } catch (error) {
      logger.error('图像分析失败:', error);
      throw new Error(`图像分析失败: ${error.message}`);
    }
  }

  generateCacheKey(prompt, options) {
    const keyData = {
      prompt: prompt.substring(0, 100), // 只取前100字符
      provider: options.provider,
      model: options.model,
      temperature: options.temperature,
      maxTokens: options.maxTokens
    };
    
    return Buffer.from(JSON.stringify(keyData)).toString('base64');
  }

  setupCacheCleanup() {
    // 每小时清理过期缓存
    setInterval(() => {
      const now = Date.now();
      let cleanedCount = 0;
      
      for (const [key, value] of this.cache.entries()) {
        if (now - value.timestamp > this.config.cacheExpiry) {
          this.cache.delete(key);
          cleanedCount++;
        }
      }
      
      if (cleanedCount > 0) {
        logger.info(`清理了 ${cleanedCount} 个过期缓存项`);
      }
    }, 3600000); // 1小时
  }

  getStats() {
    return {
      ...this.requestStats,
      cacheSize: this.cache.size,
      availableProviders: Array.from(this.clients.keys()),
      cacheHitRate: this.requestStats.total > 0 ? 
        (this.requestStats.cacheHits / this.requestStats.total * 100).toFixed(2) + '%' : '0%'
    };
  }

  clearCache() {
    this.cache.clear();
    logger.info('缓存已清空');
  }

  async healthCheck() {
    const results = {};
    
    for (const [provider, client] of this.clients.entries()) {
      try {
        const startTime = Date.now();
        
        // 发送简单的健康检查请求
        await this.generateText('Hello', {
          provider,
          maxTokens: 10,
          useCache: false
        });
        
        results[provider] = {
          status: 'healthy',
          responseTime: Date.now() - startTime
        };
      } catch (error) {
        results[provider] = {
          status: 'unhealthy',
          error: error.message
        };
      }
    }
    
    return results;
  }

  exportStats(format = 'json') {
    const stats = {
      timestamp: new Date().toISOString(),
      ...this.getStats(),
      cacheEntries: Array.from(this.cache.entries()).map(([key, value]) => ({
        key,
        timestamp: new Date(value.timestamp).toISOString(),
        age: Date.now() - value.timestamp
      }))
    };
    
    const filename = `ai_client_stats_${Date.now()}.${format}`;
    
    if (format === 'json') {
      fs.writeFileSync(filename, JSON.stringify(stats, null, 2));
    }
    
    console.log(`📊 统计数据已导出到: ${filename}`);
    return filename;
  }
}

// 使用示例
const aiManager = new AIClientManager({
  defaultProvider: 'openai',
  enableCache: true,
  timeout: 30000
});

// 文本生成示例
async function textGenerationDemo() {
  console.log('🚀 AI文本生成演示\n');
  
  try {
    // OpenAI GPT示例
    const gptResult = await aiManager.generateText(
      '请用Node.js编写一个简单的HTTP服务器',
      {
        provider: 'openai',
        model: 'gpt-3.5-turbo',
        maxTokens: 500,
        temperature: 0.7
      }
    );
    
    console.log('GPT-3.5 回复:');
    console.log(gptResult.text);
    console.log(`响应时间: ${gptResult.responseTime}ms\n`);
    
    // Claude示例
    if (process.env.ANTHROPIC_API_KEY) {
      const claudeResult = await aiManager.generateText(
        '解释一下什么是RESTful API',
        {
          provider: 'anthropic',
          model: 'claude-3-sonnet-20240229',
          maxTokens: 300,
          temperature: 0.5
        }
      );
      
      console.log('Claude 回复:');
      console.log(claudeResult.text);
      console.log(`响应时间: ${claudeResult.responseTime}ms\n`);
    }
    
    // 显示统计信息
    const stats = aiManager.getStats();
    console.log('📊 AI客户端统计:');
    console.log(`总请求数: ${stats.total}`);
    console.log(`成功请求: ${stats.success}`);
    console.log(`缓存命中率: ${stats.cacheHitRate}`);
    console.log(`可用提供商: ${stats.availableProviders.join(', ')}`);
    
  } catch (error) {
    console.error('演示失败:', error.message);
  }
}

// 图像生成示例
async function imageGenerationDemo() {
  console.log('\n🎨 AI图像生成演示\n');
  
  try {
    const imageResult = await aiManager.generateImage(
      'A beautiful sunset over a mountain landscape, digital art style',
      {
        provider: 'openai',
        model: 'dall-e-3',
        size: '1024x1024',
        quality: 'standard'
      }
    );
    
    console.log('图像生成成功:');
    console.log(`图像URL: ${imageResult.images[0].url}`);
    console.log(`修订提示: ${imageResult.images[0].revisedPrompt}`);
    console.log(`响应时间: ${imageResult.responseTime}ms`);
    
  } catch (error) {
    console.error('图像生成失败:', error.message);
  }
}

// 健康检查示例
async function healthCheckDemo() {
  console.log('\n🏥 AI服务健康检查\n');
  
  const healthStatus = await aiManager.healthCheck();
  
  Object.entries(healthStatus).forEach(([provider, status]) => {
    console.log(`${provider}: ${status.status}`);
    if (status.responseTime) {
      console.log(`  响应时间: ${status.responseTime}ms`);
    }
    if (status.error) {
      console.log(`  错误: ${status.error}`);
    }
  });
}

// 运行演示
if (require.main === module) {
  (async () => {
    await textGenerationDemo();
    await imageGenerationDemo();
    await healthCheckDemo();
    
    // 导出统计数据
    aiManager.exportStats();
  })().catch(console.error);
}

module.exports = { AIClientManager };

🔄 流式响应处理

实时流式文本生成

const EventEmitter = require('events');

class StreamingAIClient extends EventEmitter {
  constructor(aiManager) {
    super();
    this.aiManager = aiManager;
  }

  async streamText(prompt, options = {}) {
    const {
      provider = 'openai',
      model = 'gpt-3.5-turbo',
      onChunk = null,
      onComplete = null,
      onError = null
    } = options;
    
    console.log(`\n🌊 开始流式文本生成...`);
    
    try {
      const result = await this.aiManager.generateText(prompt, {
        ...options,
        stream: true
      });
      
      let fullText = '';
      const chunks = [];
      
      for await (const chunk of result.stream) {
        const content = chunk.choices[0]?.delta?.content || '';
        
        if (content) {
          fullText += content;
          chunks.push({
            content,
            timestamp: Date.now(),
            index: chunks.length
          });
          
          // 触发chunk事件
          this.emit('chunk', {
            content,
            fullText,
            chunkIndex: chunks.length - 1
          });
          
          // 调用回调函数
          if (onChunk) {
            onChunk(content, fullText);
          }
          
          // 实时输出
          process.stdout.write(content);
        }
        
        // 检查是否完成
        if (chunk.choices[0]?.finish_reason) {
          console.log('\n\n✅ 流式生成完成');
          
          const completeResult = {
            fullText,
            chunks,
            finishReason: chunk.choices[0].finish_reason,
            totalChunks: chunks.length
          };
          
          this.emit('complete', completeResult);
          
          if (onComplete) {
            onComplete(completeResult);
          }
          
          return completeResult;
        }
      }
      
    } catch (error) {
      console.error('\n❌ 流式生成失败:', error.message);
      
      this.emit('error', error);
      
      if (onError) {
        onError(error);
      }
      
      throw error;
    }
  }

  async streamToFile(prompt, filePath, options = {}) {
    const fs = require('fs');
    const writeStream = fs.createWriteStream(filePath);
    
    console.log(`\n📝 流式写入文件: ${filePath}`);
    
    try {
      await this.streamText(prompt, {
        ...options,
        onChunk: (content) => {
          writeStream.write(content);
        },
        onComplete: () => {
          writeStream.end();
          console.log(`✅ 文件写入完成: ${filePath}`);
        },
        onError: (error) => {
          writeStream.destroy();
          console.error(`❌ 文件写入失败: ${error.message}`);
        }
      });
    } catch (error) {
      writeStream.destroy();
      throw error;
    }
  }
}

// 使用示例
async function streamingDemo() {
  const streamClient = new StreamingAIClient(aiManager);
  
  // 监听事件
  streamClient.on('chunk', (data) => {
    // 可以在这里处理每个chunk
    console.log(`\n[Chunk ${data.chunkIndex}] 长度: ${data.content.length}`);
  });
  
  streamClient.on('complete', (result) => {
    console.log(`\n📊 生成统计:`);
    console.log(`总长度: ${result.fullText.length} 字符`);
    console.log(`总块数: ${result.totalChunks}`);
    console.log(`完成原因: ${result.finishReason}`);
  });
  
  try {
    await streamClient.streamText(
      '请详细解释Node.js的事件循环机制，包括各个阶段的作用',
      {
        provider: 'openai',
        model: 'gpt-3.5-turbo',
        maxTokens: 1000,
        temperature: 0.7
      }
    );
  } catch (error) {
    console.error('流式演示失败:', error.message);
  }
}

📊 数据处理与预处理

文档处理工具

const fs = require('fs');
const path = require('path');
const pdfParse = require('pdf-parse');
const sharp = require('sharp');

class DocumentProcessor {
  constructor() {
    this.supportedFormats = {
      text: ['.txt', '.md', '.json'],
      pdf: ['.pdf'],
      image: ['.jpg', '.jpeg', '.png', '.webp', '.gif'],
      office: ['.docx', '.xlsx', '.pptx']
    };
  }

  async processDocument(filePath, options = {}) {
    const ext = path.extname(filePath).toLowerCase();
    const stats = fs.statSync(filePath);
    
    console.log(`\n📄 处理文档: ${path.basename(filePath)}`);
    console.log(`文件大小: ${(stats.size / 1024).toFixed(2)} KB`);
    
    const result = {
      filePath,
      fileName: path.basename(filePath),
      fileSize: stats.size,
      extension: ext,
      processedAt: new Date().toISOString(),
      content: null,
      metadata: {},
      error: null
    };
    
    try {
      switch (true) {
        case this.supportedFormats.text.includes(ext):
          result.content = await this.processTextFile(filePath, options);
          break;
        case this.supportedFormats.pdf.includes(ext):
          result.content = await this.processPDFFile(filePath, options);
          break;
        case this.supportedFormats.image.includes(ext):
          result.content = await this.processImageFile(filePath, options);
          break;
        default:
          throw new Error(`不支持的文件格式: ${ext}`);
      }
      
      console.log(`✅ 文档处理完成`);
      console.log(`内容长度: ${result.content?.text?.length || 0} 字符`);
      
    } catch (error) {
      result.error = error.message;
      console.error(`❌ 文档处理失败: ${error.message}`);
    }
    
    return result;
  }

  async processTextFile(filePath, options = {}) {
    const { encoding = 'utf8', maxSize = 10 * 1024 * 1024 } = options;
    
    const content = fs.readFileSync(filePath, encoding);
    
    if (content.length > maxSize) {
      throw new Error(`文件过大: ${content.length} > ${maxSize}`);
    }
    
    return {
      text: content,
      wordCount: content.split(/\s+/).length,
      lineCount: content.split('\n').length,
      encoding
    };
  }

  async processPDFFile(filePath, options = {}) {
    const { maxPages = 100 } = options;
    
    const dataBuffer = fs.readFileSync(filePath);
    const pdfData = await pdfParse(dataBuffer);
    
    if (pdfData.numpages > maxPages) {
      throw new Error(`PDF页数过多: ${pdfData.numpages} > ${maxPages}`);
    }
    
    return {
      text: pdfData.text,
      pageCount: pdfData.numpages,
      wordCount: pdfData.text.split(/\s+/).length,
      metadata: pdfData.metadata,
      info: pdfData.info
    };
  }

  async processImageFile(filePath, options = {}) {
    const {
      resize = null,
      format = 'jpeg',
      quality = 80,
      extractText = false
    } = options;
    
    const image = sharp(filePath);
    const metadata = await image.metadata();
    
    let processedImage = image;
    
    // 调整大小
    if (resize) {
      processedImage = processedImage.resize(resize.width, resize.height, {
        fit: 'inside',
        withoutEnlargement: true
      });
    }
    
    // 转换格式和质量
    if (format === 'jpeg') {
      processedImage = processedImage.jpeg({ quality });
    } else if (format === 'png') {
      processedImage = processedImage.png({ quality });
    }
    
    const buffer = await processedImage.toBuffer();
    const base64 = buffer.toString('base64');
    
    const result = {
      base64: `data:image/${format};base64,${base64}`,
      metadata: {
        width: metadata.width,
        height: metadata.height,
        format: metadata.format,
        size: metadata.size,
        density: metadata.density
      },
      processedSize: buffer.length
    };
    
    // OCR文字提取（需要额外的OCR库）
    if (extractText) {
      // 这里可以集成Tesseract.js或其他OCR库
      result.extractedText = ''; // 占位符
    }
    
    return result;
  }

  async batchProcess(directory, options = {}) {
    const {
      recursive = false,
      filePattern = null,
      maxFiles = 100,
      parallel = 3
    } = options;
    
    console.log(`\n📁 批量处理目录: ${directory}`);
    
    const files = this.findFiles(directory, { recursive, filePattern, maxFiles });
    console.log(`找到 ${files.length} 个文件`);
    
    const results = [];
    const errors = [];
    
    // 并行处理文件
    for (let i = 0; i < files.length; i += parallel) {
      const batch = files.slice(i, i + parallel);
      
      const batchPromises = batch.map(async (file) => {
        try {
          const result = await this.processDocument(file, options);
          return result;
        } catch (error) {
          errors.push({ file, error: error.message });
          return null;
        }
      });
      
      const batchResults = await Promise.all(batchPromises);
      results.push(...batchResults.filter(r => r !== null));
      
      console.log(`已处理 ${Math.min(i + parallel, files.length)}/${files.length} 个文件`);
    }
    
    return {
      totalFiles: files.length,
      successCount: results.length,
      errorCount: errors.length,
      results,
      errors
    };
  }

  findFiles(directory, options = {}) {
    const { recursive = false, filePattern = null, maxFiles = 100 } = options;
    const files = [];
    
    const scanDirectory = (dir) => {
      if (files.length >= maxFiles) return;
      
      const items = fs.readdirSync(dir);
      
      for (const item of items) {
        if (files.length >= maxFiles) break;
        
        const fullPath = path.join(dir, item);
        const stat = fs.statSync(fullPath);
        
        if (stat.isDirectory() && recursive) {
          scanDirectory(fullPath);
        } else if (stat.isFile()) {
          const ext = path.extname(item).toLowerCase();
          const isSupported = Object.values(this.supportedFormats)
            .flat()
            .includes(ext);
          
          if (isSupported) {
            if (!filePattern || item.match(filePattern)) {
              files.push(fullPath);
            }
          }
        }
      }
    };
    
    scanDirectory(directory);
    return files;
  }

  async createSearchIndex(documents, options = {}) {
    const {
      chunkSize = 1000,
      overlap = 200,
      minChunkSize = 100
    } = options;
    
    console.log(`\n🔍 创建搜索索引...`);
    
    const index = {
      documents: [],
      chunks: [],
      metadata: {
        createdAt: new Date().toISOString(),
        totalDocuments: documents.length,
        totalChunks: 0,
        options
      }
    };
    
    for (const doc of documents) {
      if (!doc.content?.text) continue;
      
      const docIndex = {
        id: doc.fileName,
        filePath: doc.filePath,
        fileSize: doc.fileSize,
        processedAt: doc.processedAt,
        chunkCount: 0
      };
      
      // 分块处理
      const chunks = this.chunkText(doc.content.text, {
        chunkSize,
        overlap,
        minChunkSize
      });
      
      chunks.forEach((chunk, chunkIndex) => {
        index.chunks.push({
          id: `${doc.fileName}_${chunkIndex}`,
          documentId: doc.fileName,
          chunkIndex,
          text: chunk,
          length: chunk.length,
          wordCount: chunk.split(/\s+/).length
        });
      });
      
      docIndex.chunkCount = chunks.length;
      index.documents.push(docIndex);
    }
    
    index.metadata.totalChunks = index.chunks.length;
    
    console.log(`✅ 索引创建完成`);
    console.log(`文档数: ${index.documents.length}`);
    console.log(`块数: ${index.chunks.length}`);
    
    return index;
  }

  chunkText(text, options = {}) {
    const { chunkSize = 1000, overlap = 200, minChunkSize = 100 } = options;
    
    const chunks = [];
    let start = 0;
    
    while (start < text.length) {
      let end = start + chunkSize;
      
      // 如果不是最后一块，尝试在句子边界分割
      if (end < text.length) {
        const sentenceEnd = text.lastIndexOf('.', end);
        const paragraphEnd = text.lastIndexOf('\n', end);
        
        const boundary = Math.max(sentenceEnd, paragraphEnd);
        if (boundary > start + minChunkSize) {
          end = boundary + 1;
        }
      }
      
      const chunk = text.slice(start, end).trim();
      
      if (chunk.length >= minChunkSize) {
        chunks.push(chunk);
      }
      
      start = end - overlap;
      
      // 避免无限循环
      if (start >= end) {
        start = end;
      }
    }
    
    return chunks;
  }

  async saveIndex(index, filePath) {
    fs.writeFileSync(filePath, JSON.stringify(index, null, 2));
    console.log(`💾 索引已保存到: ${filePath}`);
  }

  async loadIndex(filePath) {
    const data = fs.readFileSync(filePath, 'utf8');
    const index = JSON.parse(data);
    console.log(`📖 索引已加载: ${index.metadata.totalDocuments} 文档, ${index.metadata.totalChunks} 块`);
    return index;
  }
}

// 使用示例
const docProcessor = new DocumentProcessor();

async function documentProcessingDemo() {
  console.log('📚 文档处理演示\n');
  
  try {
    // 处理单个文档
    const result = await docProcessor.processDocument('./example.pdf', {
      maxPages: 50
    });
    
    if (result.content) {
      console.log(`文档内容预览: ${result.content.text.substring(0, 200)}...`);
    }
    
    // 批量处理目录
    const batchResult = await docProcessor.batchProcess('./documents', {
      recursive: true,
      maxFiles: 10,
      parallel: 2
    });
    
    console.log(`\n批量处理结果:`);
    console.log(`成功: ${batchResult.successCount}`);
    console.log(`失败: ${batchResult.errorCount}`);
    
    // 创建搜索索引
    if (batchResult.results.length > 0) {
      const index = await docProcessor.createSearchIndex(batchResult.results, {
        chunkSize: 800,
        overlap: 100
      });
      
      await docProcessor.saveIndex(index, './search_index.json');
    }
    
  } catch (error) {
    console.error('文档处理演示失败:', error.message);
  }
}

module.exports = { DocumentProcessor };

🎯 学习检验

理论理解检验

环境配置：能否正确配置Node.js AI开发环境？
API集成：能否理解不同AI服务的API调用方式？
数据处理：能否实现文档的预处理和格式转换？
性能优化：能否理解缓存、并发等优化策略？

实践能力检验

客户端开发：能否开发统一的AI客户端管理器？
流式处理：能否实现实时的流式响应处理？
文档处理：能否构建完整的文档处理工具？
错误处理：能否实现健壮的错误处理和重试机制？

🚀 实践项目建议

基础实战项目

AI聊天应用：构建支持多个AI模型的聊天应用
文档问答系统：基于文档内容的智能问答系统
代码生成工具：AI驱动的代码生成和优化工具
内容创作助手：多功能的AI内容创作工具

高级综合项目

企业AI平台：企业级的AI服务集成平台
多模态AI应用：支持文本、图像、语音的综合AI应用
AI开发框架：可复用的Node.js AI开发框架
智能数据分析：AI驱动的数据分析和可视化平台

📚 延伸阅读

技术文档

"Node.js AI Development Guide" - Node.js AI开发指南
"OpenAI API Documentation" - OpenAI API官方文档
"Building AI Applications with JavaScript" - JavaScript AI应用开发
"LangChain.js Documentation" - LangChain JavaScript版本文档

开源项目

LangChain.js - JavaScript AI应用开发框架
Vercel AI SDK - AI应用开发工具包
OpenAI Node.js Library - OpenAI官方Node.js库
Hugging Face Transformers.js - 浏览器端AI模型库

💡 学习提示：Node.js AI应用开发需要掌握异步编程、API集成、数据处理等多项技能。建议从简单的API调用开始，逐步构建复杂的AI应用。重视错误处理和性能优化，关注AI服务的成本控制。实践中要注意API密钥的安全管理，合理使用缓存和批处理来提高效率。

📖 概述​

🚀 Node.js AI开发环境搭建​

基础环境配置​

核心依赖包​

项目初始化​

🔧 AI API集成实战​

统一AI客户端管理器​

🔄 流式响应处理​

实时流式文本生成​

📊 数据处理与预处理​

文档处理工具​

🎯 学习检验​

理论理解检验​

实践能力检验​

🚀 实践项目建议​

基础实战项目​

高级综合项目​

📚 延伸阅读​

技术文档​

开源项目​

📖 概述

🚀 Node.js AI开发环境搭建

基础环境配置

核心依赖包

项目初始化

🔧 AI API集成实战

统一AI客户端管理器

🔄 流式响应处理

实时流式文本生成

📊 数据处理与预处理

文档处理工具

🎯 学习检验

理论理解检验

实践能力检验

🚀 实践项目建议

基础实战项目

高级综合项目

📚 延伸阅读

技术文档

开源项目