feat: 添加三个重要补丁

补丁 1: 分块读取与流式传输
- 8KB 分块读取大文件,避免内存飙升
- 流式计算文件哈希,无需加载完整内容
- 差异对比限制,大文件只显示头尾各 500 行
- 新增 chunked 参数支持流式传输

补丁 2: .lobsterignore 机制
- 创建 IgnorePattern 类实现模式匹配
- 支持 .lobsterignore 文件配置
- 添加默认忽略规则(.DS_Store, node_modules 等)
- 支持通配符匹配(*, ?, 目录匹配)
- 新增 API: GET /api/ignore/patterns/, POST /api/ignore/reload/

补丁 3: 操作溯源(Audit Log)
- 新增 SyncHistory 模型记录同步历史
- 创建 AuditLogger 类用于记录操作
- 所有同步操作自动记录日志
- 记录操作者、版本变化、哈希变化、执行时间等
- 新增 API: GET /api/history/

更新内容:
- models.py: 新增 SyncHistory 模型
- services.py: 新增 IgnorePattern, AuditLogger, 分块读取方法
- views.py: 所有同步操作添加日志记录, 新增历史和忽略规则接口
- serializers.py: 新增 SyncHistorySerializer
- urls.py: 新增历史和忽略规则路由
- .lobsterignore.example: 示例忽略文件
- CHANGELOG.md: 详细更新日志
This commit is contained in:
道童
2026-04-05 12:20:57 +00:00
parent d9420b6cc6
commit 077656a6cf
7 changed files with 1007 additions and 43 deletions

View File

@@ -58,4 +58,68 @@ class LobsterMemory(models.Model):
if self.content:
self.hash = self.compute_hash(self.content)
self.size = len(self.content.encode('utf-8'))
super().save(*args, **kwargs)
super().save(*args, **kwargs)
class SyncHistory(models.Model):
"""同步操作历史记录"""
ACTION_CHOICES = [
('sync_to_db', '同步到数据库'),
('sync_to_local', '同步到本地'),
('auto_sync', '自动同步'),
('manual_merge', '手动合并'),
]
STATUS_CHOICES = [
('success', '成功'),
('failed', '失败'),
('partial', '部分成功'),
]
lobster_id = models.CharField(max_length=50, help_text='龙虾ID')
file_path = models.CharField(max_length=500, help_text='文件相对路径')
action = models.CharField(
max_length=20,
choices=ACTION_CHOICES,
help_text='操作类型'
)
status = models.CharField(
max_length=20,
choices=STATUS_CHOICES,
help_text='操作状态'
)
old_version = models.IntegerField(null=True, blank=True, help_text='操作前版本')
new_version = models.IntegerField(null=True, blank=True, help_text='操作后版本')
old_hash = models.CharField(max_length=64, null=True, blank=True, help_text='操作前哈希')
new_hash = models.CharField(max_length=64, null=True, blank=True, help_text='操作后哈希')
file_size = models.IntegerField(default=0, help_text='文件大小(字节)')
operator = models.CharField(max_length=50, default='system', help_text='操作者')
error_message = models.TextField(null=True, blank=True, help_text='错误信息')
execution_time = models.FloatField(default=0, help_text='执行时间(秒)')
created_at = models.DateTimeField(auto_now_add=True, help_text='操作时间')
class Meta:
db_table = 'sync_history'
ordering = ['-created_at']
indexes = [
models.Index(fields=['lobster_id', 'file_path']),
models.Index(fields=['action']),
models.Index(fields=['status']),
models.Index(fields=['created_at']),
]
def __str__(self):
return f"{self.action} - {self.lobster_id}/{self.file_path} ({self.status})"

View File

@@ -1,5 +1,5 @@
from rest_framework import serializers
from .models import LobsterMemory
from .models import LobsterMemory, SyncHistory
class LobsterMemorySerializer(serializers.ModelSerializer):
@@ -22,6 +22,35 @@ class LobsterMemorySerializer(serializers.ModelSerializer):
read_only_fields = ['id', 'created_at', 'updated_at']
class SyncHistorySerializer(serializers.ModelSerializer):
"""同步历史序列化器"""
action_display = serializers.CharField(source='get_action_display', read_only=True)
status_display = serializers.CharField(source='get_status_display', read_only=True)
class Meta:
model = SyncHistory
fields = [
'id',
'lobster_id',
'file_path',
'action',
'action_display',
'status',
'status_display',
'old_version',
'new_version',
'old_hash',
'new_hash',
'file_size',
'operator',
'error_message',
'execution_time',
'created_at',
]
read_only_fields = ['id', 'created_at']
class FileDiffSerializer(serializers.Serializer):
"""文件差异序列化器"""

View File

@@ -1,16 +1,86 @@
import os
import hashlib
import fnmatch
import time
from pathlib import Path
from typing import List, Dict, Tuple
from typing import List, Dict, Tuple, Iterator
from django.conf import settings
from django.utils import timezone
class IgnorePattern:
""".lobsterignore 模式匹配器"""
def __init__(self, base_dir: Path):
self.base_dir = base_dir
self.patterns = []
self.load_patterns()
def load_patterns(self):
"""加载 .lobsterignore 文件"""
ignore_file = self.base_dir / '.lobsterignore'
if ignore_file.exists():
with open(ignore_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
# 跳过空行和注释
if line and not line.startswith('#'):
self.patterns.append(line)
# 添加默认忽略规则
default_patterns = [
'.DS_Store', '.git', '.gitignore', '__pycache__',
'node_modules', '*.pyc', '*.pyo', '*.log',
'*.tmp', '*.temp', '*.bak', '.vscode', '.idea'
]
for pattern in default_patterns:
if pattern not in self.patterns:
self.patterns.append(pattern)
def is_ignored(self, file_path: Path) -> bool:
"""
判断文件是否被忽略
Args:
file_path: 文件路径(绝对路径)
Returns:
是否被忽略
"""
relative_path = file_path.relative_to(self.base_dir)
for pattern in self.patterns:
# 匹配文件名
if fnmatch.fnmatch(file_path.name, pattern):
return True
# 匹配相对路径
if fnmatch.fnmatch(str(relative_path), pattern):
return True
# 匹配目录
if pattern.endswith('/') and fnmatch.fnmatch(str(relative_path.parent), pattern.rstrip('/')):
return True
# 递归匹配子目录
if pattern.startswith('*/'):
parts = str(relative_path).split(os.sep)
for i, part in enumerate(parts):
if fnmatch.fnmatch(part, pattern[2:]):
return True
return False
class FileScanner:
"""文件扫描器"""
"""文件扫描器(支持 .lobsterignore 和分块读取)"""
def __init__(self):
self.base_dir = Path(settings.LOBSTER_MEMORY_BASE)
self.supported_extensions = settings.SUPPORTED_EXTENSIONS
self.ignore = IgnorePattern(self.base_dir)
self.chunk_size = 8192 # 8KB 分块读取
def scan_directory(self, lobster_id: str = None) -> List[Dict]:
"""
@@ -27,31 +97,42 @@ class FileScanner:
files = []
for file_path in self.base_dir.rglob('*'):
if file_path.is_file() and file_path.suffix in self.supported_extensions:
try:
relative_path = file_path.relative_to(self.base_dir)
content = file_path.read_text(encoding='utf-8', errors='ignore')
file_hash = self.compute_hash(content)
if not file_path.is_file():
continue
files.append({
'file_path': str(relative_path),
'full_path': str(file_path),
'content': content,
'hash': file_hash,
'size': file_path.stat().st_size,
'lobster_id': lobster_id or 'unknown',
})
except Exception as e:
print(f"Error reading {file_path}: {e}")
# 检查文件扩展名
if file_path.suffix not in self.supported_extensions:
continue
# 检查是否被 .lobsterignore 忽略
if self.ignore.is_ignored(file_path):
continue
try:
relative_path = file_path.relative_to(self.base_dir)
# 使用流式读取获取哈希(避免大文件内存问题)
file_hash = self.compute_hash_stream(file_path)
files.append({
'file_path': str(relative_path),
'full_path': str(file_path),
'hash': file_hash,
'size': file_path.stat().st_size,
'lobster_id': lobster_id or 'unknown',
})
except Exception as e:
print(f"Error reading {file_path}: {e}")
return files
def get_file_content(self, file_path: str) -> Tuple[str, str]:
def get_file_content(self, file_path: str, chunked: bool = False) -> Tuple[str, str]:
"""
获取文件内容和哈希
Args:
file_path: 相对路径
chunked: 是否使用分块读取
Returns:
(content, hash)
@@ -61,11 +142,58 @@ class FileScanner:
if not full_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
content = full_path.read_text(encoding='utf-8', errors='ignore')
# 对于大文件(>50MB使用分块读取
file_size = full_path.stat().st_size
if chunked and file_size > 50 * 1024 * 1024:
content = self.read_file_chunked(full_path)
else:
content = full_path.read_text(encoding='utf-8', errors='ignore')
file_hash = self.compute_hash(content)
return content, file_hash
def read_file_chunked(self, file_path: Path) -> str:
"""
分块读取文件
Args:
file_path: 文件路径
Returns:
文件内容
"""
content_parts = []
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
while True:
chunk = f.read(self.chunk_size)
if not chunk:
break
content_parts.append(chunk)
return ''.join(content_parts)
def read_file_stream(self, file_path: str) -> Iterator[str]:
"""
流式读取文件(用于大文件传输)
Args:
file_path: 相对路径
Yields:
文件块
"""
full_path = self.base_dir / file_path
if not full_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
while True:
chunk = f.read(self.chunk_size)
if not chunk:
break
yield chunk
def write_file(self, file_path: str, content: str):
"""
写入文件
@@ -94,6 +222,27 @@ class FileScanner:
"""
return hashlib.sha256(content.encode('utf-8')).hexdigest()
def compute_hash_stream(self, file_path: Path) -> str:
"""
流式计算文件哈希(避免大文件内存问题)
Args:
file_path: 文件路径
Returns:
哈希值
"""
hash_obj = hashlib.sha256()
with open(file_path, 'rb') as f:
while True:
chunk = f.read(self.chunk_size)
if not chunk:
break
hash_obj.update(chunk)
return hash_obj.hexdigest()
def get_file_tree(self, lobster_id: str = None) -> Dict:
"""
获取文件树结构
@@ -124,7 +273,7 @@ class FileScanner:
class DiffChecker:
"""差异检查器"""
"""差异检查器(支持大文件优化)"""
def __init__(self):
self.scanner = FileScanner()
@@ -203,22 +352,145 @@ class DiffChecker:
return results
def get_file_diff(self, local_content: str, db_content: str) -> Dict:
def get_file_diff(self, local_content: str, db_content: str, max_lines: int = 1000) -> Dict:
"""
获取文件差异(简单版
获取文件差异(支持大文件限制
Args:
local_content: 本地内容
db_content: 数据库内容
max_lines: 最大显示行数(防止大文件差异过大)
Returns:
差异信息
"""
# 这里可以使用 difflib 或其他差异库
# 简单实现,后续可以用 react-diff-viewer 在前端显示
local_lines = local_content.split('\n')
db_lines = db_content.split('\n')
# 限制行数(大文件只显示头尾)
if len(local_lines) > max_lines:
local_head = local_lines[:max_lines//2]
local_tail = local_lines[-max_lines//2:]
local_lines = local_head + ['... (中间省略 {}) 行 ...'.format(len(local_lines) - max_lines)] + local_tail
if len(db_lines) > max_lines:
db_head = db_lines[:max_lines//2]
db_tail = db_lines[-max_lines//2:]
db_lines = db_head + ['... (中间省略 {}) 行 ...'.format(len(db_lines) - max_lines)] + db_tail
return {
'local_lines': local_content.split('\n'),
'db_lines': db_content.split('\n'),
'has_diff': local_content != db_content
}
'local_lines': local_lines,
'db_lines': db_lines,
'has_diff': local_content != db_content,
'is_truncated': len(local_lines) > max_lines or len(db_lines) > max_lines
}
class AuditLogger:
"""操作日志记录器"""
def __init__(self):
self.model = None
# 延迟导入模型(避免循环导入)
from .models import SyncHistory
self.model = SyncHistory
def log_sync_action(
self,
lobster_id: str,
file_path: str,
action: str,
old_version: int = None,
new_version: int = None,
old_hash: str = None,
new_hash: str = None,
file_size: int = 0,
operator: str = 'system',
status: str = 'success',
error_message: str = None,
execution_time: float = 0
):
"""
记录同步操作
Args:
lobster_id: 龙虾ID
file_path: 文件路径
action: 操作类型
old_version: 操作前版本
new_version: 操作后版本
old_hash: 操作前哈希
new_hash: 操作后哈希
file_size: 文件大小
operator: 操作者
status: 操作状态
error_message: 错误信息
execution_time: 执行时间
"""
self.model.objects.create(
lobster_id=lobster_id,
file_path=file_path,
action=action,
old_version=old_version,
new_version=new_version,
old_hash=old_hash,
new_hash=new_hash,
file_size=file_size,
operator=operator,
status=status,
error_message=error_message,
execution_time=execution_time,
created_at=timezone.now()
)
def get_history(
self,
lobster_id: str = None,
file_path: str = None,
action: str = None,
limit: int = 100
) -> List[Dict]:
"""
获取操作历史
Args:
lobster_id: 龙虾ID可选
file_path: 文件路径(可选)
action: 操作类型(可选)
limit: 返回数量限制
Returns:
操作历史列表
"""
queryset = self.model.objects.all()
if lobster_id:
queryset = queryset.filter(lobster_id=lobster_id)
if file_path:
queryset = queryset.filter(file_path=file_path)
if action:
queryset = queryset.filter(action=action)
records = queryset.order_by('-created_at')[:limit]
return [
{
'id': r.id,
'lobster_id': r.lobster_id,
'file_path': r.file_path,
'action': r.action,
'status': r.status,
'old_version': r.old_version,
'new_version': r.new_version,
'old_hash': r.old_hash,
'new_hash': r.new_hash,
'file_size': r.file_size,
'operator': r.operator,
'error_message': r.error_message,
'execution_time': r.execution_time,
'created_at': r.created_at.isoformat(),
}
for r in records
]

View File

@@ -19,6 +19,13 @@ urlpatterns = [
# 版本历史
path('versions/', views.get_versions, name='get_versions'),
# 操作历史
path('history/', views.get_history, name='get_history'),
# 统计信息
path('stats/', views.get_stats, name='get_stats'),
# .lobsterignore 管理
path('ignore/patterns/', views.get_ignore_patterns, name='get_ignore_patterns'),
path('ignore/reload/', views.reload_ignore_patterns, name='reload_ignore_patterns'),
]

View File

@@ -3,8 +3,9 @@ from rest_framework.response import Response
from rest_framework import status
from .models import LobsterMemory
from .serializers import LobsterMemorySerializer, FileDiffSerializer
from .services import FileScanner, DiffChecker
from .services import FileScanner, DiffChecker, AuditLogger
import json
import time
@api_view(['GET'])
@@ -69,10 +70,11 @@ def check_sync_status(request):
@api_view(['GET'])
def get_file_diff(request):
"""
获取文件差异
获取文件差异(支持大文件优化)
"""
file_path = request.query_params.get('file_path')
lobster_id = request.query_params.get('lobster_id', 'daotong')
chunked = request.query_params.get('chunked', 'false').lower() == 'true'
if not file_path:
return Response({
@@ -82,9 +84,9 @@ def get_file_diff(request):
scanner = FileScanner()
# 获取本地内容
# 获取本地内容(支持分块读取)
try:
local_content, local_hash = scanner.get_file_content(file_path)
local_content, local_hash = scanner.get_file_content(file_path, chunked=chunked)
except FileNotFoundError:
local_content = None
local_hash = None
@@ -108,7 +110,7 @@ def get_file_diff(request):
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
# 获取差异
# 获取差异(支持大文件限制)
checker = DiffChecker()
if local_content and db_content:
diff = checker.get_file_diff(local_content, db_content)
@@ -116,18 +118,19 @@ def get_file_diff(request):
diff = {
'local_lines': local_content.split('\n') if local_content else [],
'db_lines': db_content.split('\n') if db_content else [],
'has_diff': local_content != db_content
'has_diff': local_content != db_content,
'is_truncated': False
}
# 确定状态
if local_hash == db_hash:
status = 'consistent'
sync_status = 'consistent'
elif local_hash and not db_hash:
status = 'local_newer'
sync_status = 'local_newer'
elif not local_hash and db_hash:
status = 'db_newer'
sync_status = 'db_newer'
else:
status = 'conflict'
sync_status = 'conflict'
return Response({
'success': True,
@@ -138,7 +141,7 @@ def get_file_diff(request):
'db_content': db_content,
'local_hash': local_hash,
'db_hash': db_hash,
'status': status,
'status': sync_status,
'diff': diff
}
})
@@ -147,10 +150,11 @@ def get_file_diff(request):
@api_view(['POST'])
def sync_to_db(request):
"""
同步到数据库
同步到数据库(带操作日志)
"""
lobster_id = request.data.get('lobster_id', 'daotong')
file_path = request.data.get('file_path')
operator = request.data.get('operator', 'system')
if not file_path:
return Response({
@@ -159,6 +163,9 @@ def sync_to_db(request):
}, status=status.HTTP_400_BAD_REQUEST)
scanner = FileScanner()
audit_logger = AuditLogger()
start_time = time.time()
try:
# 读取本地文件
@@ -170,6 +177,9 @@ def sync_to_db(request):
file_path=file_path
).order_by('-version').first()
old_version = existing.version if existing else None
old_hash = existing.hash if existing else None
if existing:
# 创建新版本
new_version = existing.version + 1
@@ -186,6 +196,23 @@ def sync_to_db(request):
version=new_version,
)
execution_time = time.time() - start_time
# 记录操作日志
audit_logger.log_sync_action(
lobster_id=lobster_id,
file_path=file_path,
action='sync_to_db',
old_version=old_version,
new_version=new_version,
old_hash=old_hash,
new_hash=file_hash,
file_size=record.size,
operator=operator,
status='success',
execution_time=execution_time
)
return Response({
'success': True,
'message': '已同步到数据库',
@@ -193,6 +220,19 @@ def sync_to_db(request):
})
except Exception as e:
execution_time = time.time() - start_time
# 记录失败日志
audit_logger.log_sync_action(
lobster_id=lobster_id,
file_path=file_path,
action='sync_to_db',
operator=operator,
status='failed',
error_message=str(e),
execution_time=execution_time
)
return Response({
'success': False,
'error': str(e)
@@ -202,10 +242,11 @@ def sync_to_db(request):
@api_view(['POST'])
def sync_to_local(request):
"""
同步到本地
同步到本地(带操作日志)
"""
lobster_id = request.data.get('lobster_id', 'daotong')
file_path = request.data.get('file_path')
operator = request.data.get('operator', 'system')
if not file_path:
return Response({
@@ -214,6 +255,9 @@ def sync_to_local(request):
}, status=status.HTTP_400_BAD_REQUEST)
scanner = FileScanner()
audit_logger = AuditLogger()
start_time = time.time()
try:
# 从数据库获取最新版本
@@ -228,9 +272,32 @@ def sync_to_local(request):
'error': 'File not found in database'
}, status=status.HTTP_404_NOT_FOUND)
# 获取本地哈希(如果存在)
try:
local_content, local_hash = scanner.get_file_content(file_path)
except FileNotFoundError:
local_hash = None
# 写入本地文件
scanner.write_file(file_path, db_record.content)
execution_time = time.time() - start_time
# 记录操作日志
audit_logger.log_sync_action(
lobster_id=lobster_id,
file_path=file_path,
action='sync_to_local',
old_version=None,
new_version=db_record.version,
old_hash=local_hash,
new_hash=db_record.hash,
file_size=db_record.size,
operator=operator,
status='success',
execution_time=execution_time
)
return Response({
'success': True,
'message': '已同步到本地',
@@ -238,6 +305,19 @@ def sync_to_local(request):
})
except Exception as e:
execution_time = time.time() - start_time
# 记录失败日志
audit_logger.log_sync_action(
lobster_id=lobster_id,
file_path=file_path,
action='sync_to_local',
operator=operator,
status='failed',
error_message=str(e),
execution_time=execution_time
)
return Response({
'success': False,
'error': str(e)
@@ -300,4 +380,69 @@ def get_stats(request):
'total_size': total_size,
'total_size_mb': round(total_size / 1024 / 1024, 2)
}
})
@api_view(['GET'])
def get_history(request):
"""
获取操作历史
"""
lobster_id = request.query_params.get('lobster_id', 'daotong')
file_path = request.query_params.get('file_path')
action = request.query_params.get('action')
limit = int(request.query_params.get('limit', 100))
audit_logger = AuditLogger()
history = audit_logger.get_history(
lobster_id=lobster_id,
file_path=file_path,
action=action,
limit=limit
)
return Response({
'success': True,
'data': history,
'total': len(history)
})
@api_view(['GET'])
def get_ignore_patterns(request):
"""
获取 .lobsterignore 模式列表
"""
lobster_id = request.query_params.get('lobster_id', 'daotong')
scanner = FileScanner()
patterns = scanner.ignore.patterns
return Response({
'success': True,
'data': {
'patterns': patterns,
'total': len(patterns)
}
})
@api_view(['POST'])
def reload_ignore_patterns(request):
"""
重新加载 .lobsterignore 模式
"""
lobster_id = request.data.get('lobster_id', 'daotong')
scanner = FileScanner()
# 重新加载忽略规则
scanner.ignore.load_patterns()
return Response({
'success': True,
'message': '已重新加载忽略规则',
'data': {
'patterns': scanner.ignore.patterns,
'total': len(scanner.ignore.patterns)
}
})