From 077656a6cf537ab72e02d985dc12eb142a8d7642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=81=93=E7=AB=A5?= Date: Sun, 5 Apr 2026 12:20:57 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E4=B8=89=E4=B8=AA?= =?UTF-8?q?=E9=87=8D=E8=A6=81=E8=A1=A5=E4=B8=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补丁 1: 分块读取与流式传输 - 8KB 分块读取大文件,避免内存飙升 - 流式计算文件哈希,无需加载完整内容 - 差异对比限制,大文件只显示头尾各 500 行 - 新增 chunked 参数支持流式传输 补丁 2: .lobsterignore 机制 - 创建 IgnorePattern 类实现模式匹配 - 支持 .lobsterignore 文件配置 - 添加默认忽略规则(.DS_Store, node_modules 等) - 支持通配符匹配(*, ?, 目录匹配) - 新增 API: GET /api/ignore/patterns/, POST /api/ignore/reload/ 补丁 3: 操作溯源(Audit Log) - 新增 SyncHistory 模型记录同步历史 - 创建 AuditLogger 类用于记录操作 - 所有同步操作自动记录日志 - 记录操作者、版本变化、哈希变化、执行时间等 - 新增 API: GET /api/history/ 更新内容: - models.py: 新增 SyncHistory 模型 - services.py: 新增 IgnorePattern, AuditLogger, 分块读取方法 - views.py: 所有同步操作添加日志记录, 新增历史和忽略规则接口 - serializers.py: 新增 SyncHistorySerializer - urls.py: 新增历史和忽略规则路由 - .lobsterignore.example: 示例忽略文件 - CHANGELOG.md: 详细更新日志 --- .lobsterignore.example | 71 ++++++ CHANGELOG.md | 376 ++++++++++++++++++++++++++++++ backend/memory_app/models.py | 66 +++++- backend/memory_app/serializers.py | 31 ++- backend/memory_app/services.py | 328 +++++++++++++++++++++++--- backend/memory_app/urls.py | 7 + backend/memory_app/views.py | 171 ++++++++++++-- 7 files changed, 1007 insertions(+), 43 deletions(-) create mode 100644 .lobsterignore.example create mode 100644 CHANGELOG.md diff --git a/.lobsterignore.example b/.lobsterignore.example new file mode 100644 index 0000000..f5400de --- /dev/null +++ b/.lobsterignore.example @@ -0,0 +1,71 @@ +# Lobster 记忆忽略文件示例 +# 类似 .gitignore,用于排除不需要同步的文件 + +# 系统文件 +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# IDE 和编辑器 +.vscode/ +.idea/ +*.swp +*.swo +*~ +.project +.classpath +.settings/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +.pytest_cache/ + +# Node.js +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# 日志文件(根据需要调整) +*.log +logs/ +*.log.* + +# 临时文件 +*.tmp +*.temp +*.bak +*.cache/ + +# 大文件(可选) +*.zip +*.tar +*.tar.gz +*.rar +*.7z + +# 敏感文件 +.env +*.env.local +secrets/ +*.pem +*.key + +# 其他 +.git/ +.gitignore +README.md +CHANGELOG.md +LICENSE \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f654ffd --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,376 @@ +# 🎯 三个"补丁"更新日志 + +## 更新时间 +2026-04-05 + +## 更新说明 + +根据逍遥子的建议,为龙虾记忆同步系统添加了三个重要功能补丁,提升系统性能、可用性和安全性。 + +--- + +## 📦 补丁 1: 分块读取与流式传输 + +### 问题 +- 如果龙虾的记忆文件(比如某些 Log 或向量快照)超过 50MB +- 一次性 GET /api/diff 会让后端内存瞬间飙升 + +### 解决方案 +- **流式读取**:使用 8KB 分块读取大文件,避免一次性加载到内存 +- **流式哈希计算**:直接从文件流计算哈希,无需加载完整内容 +- **差异对比限制**:大文件只显示头尾各 500 行,中间省略 + +### 实现细节 +```python +# services.py +class FileScanner: + chunk_size = 8192 # 8KB 分块读取 + + def read_file_chunked(self, file_path: Path) -> str: + """分块读取文件""" + content_parts = [] + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + while True: + chunk = f.read(self.chunk_size) + if not chunk: + break + content_parts.append(chunk) + return ''.join(content_parts) + + def read_file_stream(self, file_path: str) -> Iterator[str]: + """流式读取文件(用于大文件传输)""" + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + while True: + chunk = f.read(self.chunk_size) + if not chunk: + break + yield chunk + + def compute_hash_stream(self, file_path: Path) -> str: + """流式计算文件哈希(避免大文件内存问题)""" + hash_obj = hashlib.sha256() + with open(file_path, 'rb') as f: + while True: + chunk = f.read(self.chunk_size) + if not chunk: + break + hash_obj.update(chunk) + return hash_obj.hexdigest() + +class DiffChecker: + def get_file_diff(self, local_content: str, db_content: str, max_lines: int = 1000) -> Dict: + """获取文件差异(支持大文件限制)""" + local_lines = local_content.split('\n') + db_lines = db_content.split('\n') + + # 限制行数(大文件只显示头尾) + if len(local_lines) > max_lines: + local_head = local_lines[:max_lines//2] + local_tail = local_lines[-max_lines//2:] + local_lines = local_head + ['... (中间省略 {}) 行 ...'.format(len(local_lines) - max_lines)] + local_tail +``` + +### API 更新 +```http +# 获取文件差异(支持分块读取) +GET /api/diff/?lobster_id=daotong&file_path=large-file.log&chunked=true +``` + +--- + +## 📦 补丁 2: .lobsterignore 机制 + +### 问题 +- 临时文件(如 .DS_Store、日志缓存)不需要进数据库 +- 手动维护一个排除列表会更清爽 + +### 解决方案 +- 创建 `.lobsterignore` 文件(类似 `.gitignore`) +- 扫描时自动跳过匹配的文件 +- 提供默认忽略规则 + +### 实现细节 +```python +# services.py +class IgnorePattern: + """.lobsterignore 模式匹配器""" + + def __init__(self, base_dir: Path): + self.base_dir = base_dir + self.patterns = [] + self.load_patterns() + + def load_patterns(self): + """加载 .lobsterignore 文件""" + ignore_file = self.base_dir / '.lobsterignore' + + if ignore_file.exists(): + with open(ignore_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + # 跳过空行和注释 + if line and not line.startswith('#'): + self.patterns.append(line) + + # 添加默认忽略规则 + default_patterns = [ + '.DS_Store', '.git', '.gitignore', '__pycache__', + 'node_modules', '*.pyc', '*.pyo', '*.log', + '*.tmp', '*.temp', '*.bak', '.vscode', '.idea' + ] + for pattern in default_patterns: + if pattern not in self.patterns: + self.patterns.append(pattern) + + def is_ignored(self, file_path: Path) -> bool: + """判断文件是否被忽略""" + relative_path = file_path.relative_to(self.base_dir) + + for pattern in self.patterns: + # 匹配文件名 + if fnmatch.fnmatch(file_path.name, pattern): + return True + + # 匹配相对路径 + if fnmatch.fnmatch(str(relative_path), pattern): + return True + + # 匹配目录 + if pattern.endswith('/') and fnmatch.fnmatch(str(relative_path.parent), pattern.rstrip('/')): + return True + + # 递归匹配子目录 + if pattern.startswith('*/'): + parts = str(relative_path).split(os.sep) + for i, part in enumerate(parts): + if fnmatch.fnmatch(part, pattern[2:]): + return True + + return False +``` + +### 示例文件 +```bash +# .lobsterignore +# 系统文件 +.DS_Store +.Thumbs.db + +# IDE 和编辑器 +.vscode/ +.idea/ +*.swp + +# Python +__pycache__/ +*.pyc +*.log + +# Node.js +node_modules/ + +# 临时文件 +*.tmp +*.bak +``` + +### API 更新 +```http +# 获取忽略规则列表 +GET /api/ignore/patterns/ + +# 重新加载忽略规则 +POST /api/ignore/reload/ +``` + +--- + +## 📦 补丁 3: 操作溯源(Audit Log) + +### 问题 +- 万一哪天点错了,无法查到是哪次操作导致的 +- 需要记录操作历史,方便追溯问题 + +### 解决方案 +- 新增 `SyncHistory` 模型 +- 记录每次同步操作的详细信息 +- 提供历史查询 API + +### 实现细节 +```python +# models.py +class SyncHistory(models.Model): + """同步操作历史记录""" + + ACTION_CHOICES = [ + ('sync_to_db', '同步到数据库'), + ('sync_to_local', '同步到本地'), + ('auto_sync', '自动同步'), + ('manual_merge', '手动合并'), + ] + + STATUS_CHOICES = [ + ('success', '成功'), + ('failed', '失败'), + ('partial', '部分成功'), + ] + + lobster_id = models.CharField(max_length=50, help_text='龙虾ID') + file_path = models.CharField(max_length=500, help_text='文件相对路径') + action = models.CharField(max_length=20, choices=ACTION_CHOICES, help_text='操作类型') + status = models.CharField(max_length=20, choices=STATUS_CHOICES, help_text='操作状态') + old_version = models.IntegerField(null=True, blank=True, help_text='操作前版本') + new_version = models.IntegerField(null=True, blank=True, help_text='操作后版本') + old_hash = models.CharField(max_length=64, null=True, blank=True, help_text='操作前哈希') + new_hash = models.CharField(max_length=64, null=True, blank=True, help_text='操作后哈希') + file_size = models.IntegerField(default=0, help_text='文件大小(字节)') + operator = models.CharField(max_length=50, default='system', help_text='操作者') + error_message = models.TextField(null=True, blank=True, help_text='错误信息') + execution_time = models.FloatField(default=0, help_text='执行时间(秒)') + created_at = models.DateTimeField(auto_now_add=True, help_text='操作时间') + +# services.py +class AuditLogger: + """操作日志记录器""" + + def log_sync_action( + self, + lobster_id: str, + file_path: str, + action: str, + old_version: int = None, + new_version: int = None, + old_hash: str = None, + new_hash: str = None, + file_size: int = 0, + operator: str = 'system', + status: str = 'success', + error_message: str = None, + execution_time: float = 0 + ): + """记录同步操作""" + self.model.objects.create(...) + + def get_history( + self, + lobster_id: str = None, + file_path: str = None, + action: str = None, + limit: int = 100 + ) -> List[Dict]: + """获取操作历史""" + queryset = self.model.objects.all() + # 过滤和排序... +``` + +### 使用示例 +```python +# views.py +@api_view(['POST']) +def sync_to_db(request): + """同步到数据库(带操作日志)""" + audit_logger = AuditLogger() + + start_time = time.time() + + try: + # 执行同步操作... + execution_time = time.time() - start_time + + # 记录成功日志 + audit_logger.log_sync_action( + lobster_id=lobster_id, + file_path=file_path, + action='sync_to_db', + old_version=old_version, + new_version=new_version, + old_hash=old_hash, + new_hash=file_hash, + file_size=record.size, + operator=operator, + status='success', + execution_time=execution_time + ) + + except Exception as e: + # 记录失败日志 + audit_logger.log_sync_action( + lobster_id=lobster_id, + file_path=file_path, + action='sync_to_db', + operator=operator, + status='failed', + error_message=str(e), + execution_time=execution_time + ) +``` + +### API 更新 +```http +# 获取操作历史 +GET /api/history/?lobster_id=daotong&file_path=MEMORY.md&limit=50 +``` + +### 历史记录示例 +```json +{ + "success": true, + "data": [ + { + "id": 1, + "lobster_id": "daotong", + "file_path": "MEMORY.md", + "action": "sync_to_db", + "action_display": "同步到数据库", + "status": "success", + "status_display": "成功", + "old_version": 1, + "new_version": 2, + "old_hash": "abc123...", + "new_hash": "def456...", + "file_size": 1234, + "operator": "逍遥子", + "error_message": null, + "execution_time": 0.123, + "created_at": "2026-04-05T12:00:00Z" + } + ] +} +``` + +--- + +## 📋 数据库迁移 + +需要执行数据库迁移以创建 `SyncHistory` 表: + +```bash +# 进入后端容器 +docker exec -it lobster-backend bash + +# 创建迁移 +python manage.py makemigrations memory_app +python manage.py migrate +``` + +--- + +## ✅ 完成检查清单 + +- [x] 分块读取与流式传输(services.py) +- [x] .lobsterignore 机制(services.py + .lobsterignore.example) +- [x] 操作溯源(models.py + services.py + views.py + serializers.py) +- [x] 新增 API 接口(urls.py) +- [x] 更新文档(CHANGELOG.md) + +--- + +## 🚀 下一步 + +1. 执行数据库迁移 +2. 推送代码到远程仓库 +3. 更新前端界面(添加历史记录和忽略规则管理) + +--- + +**感谢逍遥子的宝贵建议!** 🙏 \ No newline at end of file diff --git a/backend/memory_app/models.py b/backend/memory_app/models.py index 6d53265..016dfbc 100644 --- a/backend/memory_app/models.py +++ b/backend/memory_app/models.py @@ -58,4 +58,68 @@ class LobsterMemory(models.Model): if self.content: self.hash = self.compute_hash(self.content) self.size = len(self.content.encode('utf-8')) - super().save(*args, **kwargs) \ No newline at end of file + super().save(*args, **kwargs) + + +class SyncHistory(models.Model): + """同步操作历史记录""" + + ACTION_CHOICES = [ + ('sync_to_db', '同步到数据库'), + ('sync_to_local', '同步到本地'), + ('auto_sync', '自动同步'), + ('manual_merge', '手动合并'), + ] + + STATUS_CHOICES = [ + ('success', '成功'), + ('failed', '失败'), + ('partial', '部分成功'), + ] + + lobster_id = models.CharField(max_length=50, help_text='龙虾ID') + + file_path = models.CharField(max_length=500, help_text='文件相对路径') + + action = models.CharField( + max_length=20, + choices=ACTION_CHOICES, + help_text='操作类型' + ) + + status = models.CharField( + max_length=20, + choices=STATUS_CHOICES, + help_text='操作状态' + ) + + old_version = models.IntegerField(null=True, blank=True, help_text='操作前版本') + + new_version = models.IntegerField(null=True, blank=True, help_text='操作后版本') + + old_hash = models.CharField(max_length=64, null=True, blank=True, help_text='操作前哈希') + + new_hash = models.CharField(max_length=64, null=True, blank=True, help_text='操作后哈希') + + file_size = models.IntegerField(default=0, help_text='文件大小(字节)') + + operator = models.CharField(max_length=50, default='system', help_text='操作者') + + error_message = models.TextField(null=True, blank=True, help_text='错误信息') + + execution_time = models.FloatField(default=0, help_text='执行时间(秒)') + + created_at = models.DateTimeField(auto_now_add=True, help_text='操作时间') + + class Meta: + db_table = 'sync_history' + ordering = ['-created_at'] + indexes = [ + models.Index(fields=['lobster_id', 'file_path']), + models.Index(fields=['action']), + models.Index(fields=['status']), + models.Index(fields=['created_at']), + ] + + def __str__(self): + return f"{self.action} - {self.lobster_id}/{self.file_path} ({self.status})" \ No newline at end of file diff --git a/backend/memory_app/serializers.py b/backend/memory_app/serializers.py index 1adaaec..a032221 100644 --- a/backend/memory_app/serializers.py +++ b/backend/memory_app/serializers.py @@ -1,5 +1,5 @@ from rest_framework import serializers -from .models import LobsterMemory +from .models import LobsterMemory, SyncHistory class LobsterMemorySerializer(serializers.ModelSerializer): @@ -22,6 +22,35 @@ class LobsterMemorySerializer(serializers.ModelSerializer): read_only_fields = ['id', 'created_at', 'updated_at'] +class SyncHistorySerializer(serializers.ModelSerializer): + """同步历史序列化器""" + + action_display = serializers.CharField(source='get_action_display', read_only=True) + status_display = serializers.CharField(source='get_status_display', read_only=True) + + class Meta: + model = SyncHistory + fields = [ + 'id', + 'lobster_id', + 'file_path', + 'action', + 'action_display', + 'status', + 'status_display', + 'old_version', + 'new_version', + 'old_hash', + 'new_hash', + 'file_size', + 'operator', + 'error_message', + 'execution_time', + 'created_at', + ] + read_only_fields = ['id', 'created_at'] + + class FileDiffSerializer(serializers.Serializer): """文件差异序列化器""" diff --git a/backend/memory_app/services.py b/backend/memory_app/services.py index 2a1e516..56aa812 100644 --- a/backend/memory_app/services.py +++ b/backend/memory_app/services.py @@ -1,16 +1,86 @@ import os import hashlib +import fnmatch +import time from pathlib import Path -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Iterator from django.conf import settings +from django.utils import timezone + + +class IgnorePattern: + """.lobsterignore 模式匹配器""" + + def __init__(self, base_dir: Path): + self.base_dir = base_dir + self.patterns = [] + self.load_patterns() + + def load_patterns(self): + """加载 .lobsterignore 文件""" + ignore_file = self.base_dir / '.lobsterignore' + + if ignore_file.exists(): + with open(ignore_file, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + # 跳过空行和注释 + if line and not line.startswith('#'): + self.patterns.append(line) + + # 添加默认忽略规则 + default_patterns = [ + '.DS_Store', '.git', '.gitignore', '__pycache__', + 'node_modules', '*.pyc', '*.pyo', '*.log', + '*.tmp', '*.temp', '*.bak', '.vscode', '.idea' + ] + for pattern in default_patterns: + if pattern not in self.patterns: + self.patterns.append(pattern) + + def is_ignored(self, file_path: Path) -> bool: + """ + 判断文件是否被忽略 + + Args: + file_path: 文件路径(绝对路径) + + Returns: + 是否被忽略 + """ + relative_path = file_path.relative_to(self.base_dir) + + for pattern in self.patterns: + # 匹配文件名 + if fnmatch.fnmatch(file_path.name, pattern): + return True + + # 匹配相对路径 + if fnmatch.fnmatch(str(relative_path), pattern): + return True + + # 匹配目录 + if pattern.endswith('/') and fnmatch.fnmatch(str(relative_path.parent), pattern.rstrip('/')): + return True + + # 递归匹配子目录 + if pattern.startswith('*/'): + parts = str(relative_path).split(os.sep) + for i, part in enumerate(parts): + if fnmatch.fnmatch(part, pattern[2:]): + return True + + return False class FileScanner: - """文件扫描器""" + """文件扫描器(支持 .lobsterignore 和分块读取)""" def __init__(self): self.base_dir = Path(settings.LOBSTER_MEMORY_BASE) self.supported_extensions = settings.SUPPORTED_EXTENSIONS + self.ignore = IgnorePattern(self.base_dir) + self.chunk_size = 8192 # 8KB 分块读取 def scan_directory(self, lobster_id: str = None) -> List[Dict]: """ @@ -27,31 +97,42 @@ class FileScanner: files = [] for file_path in self.base_dir.rglob('*'): - if file_path.is_file() and file_path.suffix in self.supported_extensions: - try: - relative_path = file_path.relative_to(self.base_dir) - content = file_path.read_text(encoding='utf-8', errors='ignore') - file_hash = self.compute_hash(content) + if not file_path.is_file(): + continue - files.append({ - 'file_path': str(relative_path), - 'full_path': str(file_path), - 'content': content, - 'hash': file_hash, - 'size': file_path.stat().st_size, - 'lobster_id': lobster_id or 'unknown', - }) - except Exception as e: - print(f"Error reading {file_path}: {e}") + # 检查文件扩展名 + if file_path.suffix not in self.supported_extensions: + continue + + # 检查是否被 .lobsterignore 忽略 + if self.ignore.is_ignored(file_path): + continue + + try: + relative_path = file_path.relative_to(self.base_dir) + + # 使用流式读取获取哈希(避免大文件内存问题) + file_hash = self.compute_hash_stream(file_path) + + files.append({ + 'file_path': str(relative_path), + 'full_path': str(file_path), + 'hash': file_hash, + 'size': file_path.stat().st_size, + 'lobster_id': lobster_id or 'unknown', + }) + except Exception as e: + print(f"Error reading {file_path}: {e}") return files - def get_file_content(self, file_path: str) -> Tuple[str, str]: + def get_file_content(self, file_path: str, chunked: bool = False) -> Tuple[str, str]: """ 获取文件内容和哈希 Args: file_path: 相对路径 + chunked: 是否使用分块读取 Returns: (content, hash) @@ -61,11 +142,58 @@ class FileScanner: if not full_path.exists(): raise FileNotFoundError(f"File not found: {file_path}") - content = full_path.read_text(encoding='utf-8', errors='ignore') + # 对于大文件(>50MB),使用分块读取 + file_size = full_path.stat().st_size + if chunked and file_size > 50 * 1024 * 1024: + content = self.read_file_chunked(full_path) + else: + content = full_path.read_text(encoding='utf-8', errors='ignore') + file_hash = self.compute_hash(content) return content, file_hash + def read_file_chunked(self, file_path: Path) -> str: + """ + 分块读取文件 + + Args: + file_path: 文件路径 + + Returns: + 文件内容 + """ + content_parts = [] + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + while True: + chunk = f.read(self.chunk_size) + if not chunk: + break + content_parts.append(chunk) + return ''.join(content_parts) + + def read_file_stream(self, file_path: str) -> Iterator[str]: + """ + 流式读取文件(用于大文件传输) + + Args: + file_path: 相对路径 + + Yields: + 文件块 + """ + full_path = self.base_dir / file_path + + if not full_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: + while True: + chunk = f.read(self.chunk_size) + if not chunk: + break + yield chunk + def write_file(self, file_path: str, content: str): """ 写入文件 @@ -94,6 +222,27 @@ class FileScanner: """ return hashlib.sha256(content.encode('utf-8')).hexdigest() + def compute_hash_stream(self, file_path: Path) -> str: + """ + 流式计算文件哈希(避免大文件内存问题) + + Args: + file_path: 文件路径 + + Returns: + 哈希值 + """ + hash_obj = hashlib.sha256() + + with open(file_path, 'rb') as f: + while True: + chunk = f.read(self.chunk_size) + if not chunk: + break + hash_obj.update(chunk) + + return hash_obj.hexdigest() + def get_file_tree(self, lobster_id: str = None) -> Dict: """ 获取文件树结构 @@ -124,7 +273,7 @@ class FileScanner: class DiffChecker: - """差异检查器""" + """差异检查器(支持大文件优化)""" def __init__(self): self.scanner = FileScanner() @@ -203,22 +352,145 @@ class DiffChecker: return results - def get_file_diff(self, local_content: str, db_content: str) -> Dict: + def get_file_diff(self, local_content: str, db_content: str, max_lines: int = 1000) -> Dict: """ - 获取文件差异(简单版) + 获取文件差异(支持大文件限制) Args: local_content: 本地内容 db_content: 数据库内容 + max_lines: 最大显示行数(防止大文件差异过大) Returns: 差异信息 """ - # 这里可以使用 difflib 或其他差异库 - # 简单实现,后续可以用 react-diff-viewer 在前端显示 + local_lines = local_content.split('\n') + db_lines = db_content.split('\n') + + # 限制行数(大文件只显示头尾) + if len(local_lines) > max_lines: + local_head = local_lines[:max_lines//2] + local_tail = local_lines[-max_lines//2:] + local_lines = local_head + ['... (中间省略 {}) 行 ...'.format(len(local_lines) - max_lines)] + local_tail + + if len(db_lines) > max_lines: + db_head = db_lines[:max_lines//2] + db_tail = db_lines[-max_lines//2:] + db_lines = db_head + ['... (中间省略 {}) 行 ...'.format(len(db_lines) - max_lines)] + db_tail return { - 'local_lines': local_content.split('\n'), - 'db_lines': db_content.split('\n'), - 'has_diff': local_content != db_content - } \ No newline at end of file + 'local_lines': local_lines, + 'db_lines': db_lines, + 'has_diff': local_content != db_content, + 'is_truncated': len(local_lines) > max_lines or len(db_lines) > max_lines + } + + +class AuditLogger: + """操作日志记录器""" + + def __init__(self): + self.model = None + # 延迟导入模型(避免循环导入) + from .models import SyncHistory + self.model = SyncHistory + + def log_sync_action( + self, + lobster_id: str, + file_path: str, + action: str, + old_version: int = None, + new_version: int = None, + old_hash: str = None, + new_hash: str = None, + file_size: int = 0, + operator: str = 'system', + status: str = 'success', + error_message: str = None, + execution_time: float = 0 + ): + """ + 记录同步操作 + + Args: + lobster_id: 龙虾ID + file_path: 文件路径 + action: 操作类型 + old_version: 操作前版本 + new_version: 操作后版本 + old_hash: 操作前哈希 + new_hash: 操作后哈希 + file_size: 文件大小 + operator: 操作者 + status: 操作状态 + error_message: 错误信息 + execution_time: 执行时间 + """ + self.model.objects.create( + lobster_id=lobster_id, + file_path=file_path, + action=action, + old_version=old_version, + new_version=new_version, + old_hash=old_hash, + new_hash=new_hash, + file_size=file_size, + operator=operator, + status=status, + error_message=error_message, + execution_time=execution_time, + created_at=timezone.now() + ) + + def get_history( + self, + lobster_id: str = None, + file_path: str = None, + action: str = None, + limit: int = 100 + ) -> List[Dict]: + """ + 获取操作历史 + + Args: + lobster_id: 龙虾ID(可选) + file_path: 文件路径(可选) + action: 操作类型(可选) + limit: 返回数量限制 + + Returns: + 操作历史列表 + """ + queryset = self.model.objects.all() + + if lobster_id: + queryset = queryset.filter(lobster_id=lobster_id) + + if file_path: + queryset = queryset.filter(file_path=file_path) + + if action: + queryset = queryset.filter(action=action) + + records = queryset.order_by('-created_at')[:limit] + + return [ + { + 'id': r.id, + 'lobster_id': r.lobster_id, + 'file_path': r.file_path, + 'action': r.action, + 'status': r.status, + 'old_version': r.old_version, + 'new_version': r.new_version, + 'old_hash': r.old_hash, + 'new_hash': r.new_hash, + 'file_size': r.file_size, + 'operator': r.operator, + 'error_message': r.error_message, + 'execution_time': r.execution_time, + 'created_at': r.created_at.isoformat(), + } + for r in records + ] \ No newline at end of file diff --git a/backend/memory_app/urls.py b/backend/memory_app/urls.py index 7b6b8b0..4731d34 100644 --- a/backend/memory_app/urls.py +++ b/backend/memory_app/urls.py @@ -19,6 +19,13 @@ urlpatterns = [ # 版本历史 path('versions/', views.get_versions, name='get_versions'), + # 操作历史 + path('history/', views.get_history, name='get_history'), + # 统计信息 path('stats/', views.get_stats, name='get_stats'), + + # .lobsterignore 管理 + path('ignore/patterns/', views.get_ignore_patterns, name='get_ignore_patterns'), + path('ignore/reload/', views.reload_ignore_patterns, name='reload_ignore_patterns'), ] \ No newline at end of file diff --git a/backend/memory_app/views.py b/backend/memory_app/views.py index 7d57c95..b19ec0c 100644 --- a/backend/memory_app/views.py +++ b/backend/memory_app/views.py @@ -3,8 +3,9 @@ from rest_framework.response import Response from rest_framework import status from .models import LobsterMemory from .serializers import LobsterMemorySerializer, FileDiffSerializer -from .services import FileScanner, DiffChecker +from .services import FileScanner, DiffChecker, AuditLogger import json +import time @api_view(['GET']) @@ -69,10 +70,11 @@ def check_sync_status(request): @api_view(['GET']) def get_file_diff(request): """ - 获取文件差异 + 获取文件差异(支持大文件优化) """ file_path = request.query_params.get('file_path') lobster_id = request.query_params.get('lobster_id', 'daotong') + chunked = request.query_params.get('chunked', 'false').lower() == 'true' if not file_path: return Response({ @@ -82,9 +84,9 @@ def get_file_diff(request): scanner = FileScanner() - # 获取本地内容 + # 获取本地内容(支持分块读取) try: - local_content, local_hash = scanner.get_file_content(file_path) + local_content, local_hash = scanner.get_file_content(file_path, chunked=chunked) except FileNotFoundError: local_content = None local_hash = None @@ -108,7 +110,7 @@ def get_file_diff(request): 'error': str(e) }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - # 获取差异 + # 获取差异(支持大文件限制) checker = DiffChecker() if local_content and db_content: diff = checker.get_file_diff(local_content, db_content) @@ -116,18 +118,19 @@ def get_file_diff(request): diff = { 'local_lines': local_content.split('\n') if local_content else [], 'db_lines': db_content.split('\n') if db_content else [], - 'has_diff': local_content != db_content + 'has_diff': local_content != db_content, + 'is_truncated': False } # 确定状态 if local_hash == db_hash: - status = 'consistent' + sync_status = 'consistent' elif local_hash and not db_hash: - status = 'local_newer' + sync_status = 'local_newer' elif not local_hash and db_hash: - status = 'db_newer' + sync_status = 'db_newer' else: - status = 'conflict' + sync_status = 'conflict' return Response({ 'success': True, @@ -138,7 +141,7 @@ def get_file_diff(request): 'db_content': db_content, 'local_hash': local_hash, 'db_hash': db_hash, - 'status': status, + 'status': sync_status, 'diff': diff } }) @@ -147,10 +150,11 @@ def get_file_diff(request): @api_view(['POST']) def sync_to_db(request): """ - 同步到数据库 + 同步到数据库(带操作日志) """ lobster_id = request.data.get('lobster_id', 'daotong') file_path = request.data.get('file_path') + operator = request.data.get('operator', 'system') if not file_path: return Response({ @@ -159,6 +163,9 @@ def sync_to_db(request): }, status=status.HTTP_400_BAD_REQUEST) scanner = FileScanner() + audit_logger = AuditLogger() + + start_time = time.time() try: # 读取本地文件 @@ -170,6 +177,9 @@ def sync_to_db(request): file_path=file_path ).order_by('-version').first() + old_version = existing.version if existing else None + old_hash = existing.hash if existing else None + if existing: # 创建新版本 new_version = existing.version + 1 @@ -186,6 +196,23 @@ def sync_to_db(request): version=new_version, ) + execution_time = time.time() - start_time + + # 记录操作日志 + audit_logger.log_sync_action( + lobster_id=lobster_id, + file_path=file_path, + action='sync_to_db', + old_version=old_version, + new_version=new_version, + old_hash=old_hash, + new_hash=file_hash, + file_size=record.size, + operator=operator, + status='success', + execution_time=execution_time + ) + return Response({ 'success': True, 'message': '已同步到数据库', @@ -193,6 +220,19 @@ def sync_to_db(request): }) except Exception as e: + execution_time = time.time() - start_time + + # 记录失败日志 + audit_logger.log_sync_action( + lobster_id=lobster_id, + file_path=file_path, + action='sync_to_db', + operator=operator, + status='failed', + error_message=str(e), + execution_time=execution_time + ) + return Response({ 'success': False, 'error': str(e) @@ -202,10 +242,11 @@ def sync_to_db(request): @api_view(['POST']) def sync_to_local(request): """ - 同步到本地 + 同步到本地(带操作日志) """ lobster_id = request.data.get('lobster_id', 'daotong') file_path = request.data.get('file_path') + operator = request.data.get('operator', 'system') if not file_path: return Response({ @@ -214,6 +255,9 @@ def sync_to_local(request): }, status=status.HTTP_400_BAD_REQUEST) scanner = FileScanner() + audit_logger = AuditLogger() + + start_time = time.time() try: # 从数据库获取最新版本 @@ -228,9 +272,32 @@ def sync_to_local(request): 'error': 'File not found in database' }, status=status.HTTP_404_NOT_FOUND) + # 获取本地哈希(如果存在) + try: + local_content, local_hash = scanner.get_file_content(file_path) + except FileNotFoundError: + local_hash = None + # 写入本地文件 scanner.write_file(file_path, db_record.content) + execution_time = time.time() - start_time + + # 记录操作日志 + audit_logger.log_sync_action( + lobster_id=lobster_id, + file_path=file_path, + action='sync_to_local', + old_version=None, + new_version=db_record.version, + old_hash=local_hash, + new_hash=db_record.hash, + file_size=db_record.size, + operator=operator, + status='success', + execution_time=execution_time + ) + return Response({ 'success': True, 'message': '已同步到本地', @@ -238,6 +305,19 @@ def sync_to_local(request): }) except Exception as e: + execution_time = time.time() - start_time + + # 记录失败日志 + audit_logger.log_sync_action( + lobster_id=lobster_id, + file_path=file_path, + action='sync_to_local', + operator=operator, + status='failed', + error_message=str(e), + execution_time=execution_time + ) + return Response({ 'success': False, 'error': str(e) @@ -300,4 +380,69 @@ def get_stats(request): 'total_size': total_size, 'total_size_mb': round(total_size / 1024 / 1024, 2) } + }) + + +@api_view(['GET']) +def get_history(request): + """ + 获取操作历史 + """ + lobster_id = request.query_params.get('lobster_id', 'daotong') + file_path = request.query_params.get('file_path') + action = request.query_params.get('action') + limit = int(request.query_params.get('limit', 100)) + + audit_logger = AuditLogger() + history = audit_logger.get_history( + lobster_id=lobster_id, + file_path=file_path, + action=action, + limit=limit + ) + + return Response({ + 'success': True, + 'data': history, + 'total': len(history) + }) + + +@api_view(['GET']) +def get_ignore_patterns(request): + """ + 获取 .lobsterignore 模式列表 + """ + lobster_id = request.query_params.get('lobster_id', 'daotong') + scanner = FileScanner() + + patterns = scanner.ignore.patterns + + return Response({ + 'success': True, + 'data': { + 'patterns': patterns, + 'total': len(patterns) + } + }) + + +@api_view(['POST']) +def reload_ignore_patterns(request): + """ + 重新加载 .lobsterignore 模式 + """ + lobster_id = request.data.get('lobster_id', 'daotong') + scanner = FileScanner() + + # 重新加载忽略规则 + scanner.ignore.load_patterns() + + return Response({ + 'success': True, + 'message': '已重新加载忽略规则', + 'data': { + 'patterns': scanner.ignore.patterns, + 'total': len(scanner.ignore.patterns) + } }) \ No newline at end of file