- Delete data-entry-app/ (1.6GB), reports-app/ (447MB), .auto-build-data/
- Saved ~1.4GB disk space (64% reduction: 2.2GB → 845MB)
Updated references across 38 files:
- .claude/rules/ paths: backend/modules/, src/modules/
- .claude/commands/validate.md: all validation paths
- docs/ (13 files): data-entry, telegram, README, CLAUDE.md
- scripts/ (3 files): backup-secrets, restore-secrets, test-docker
- security/ (2 files): git_cleanup, SECURITY_PROCEDURES
- deployment/ & shared/: updated all stale comments
All paths now reflect ultrathin monolith architecture:
- Backend: backend/modules/{reports,data_entry,telegram}/
- Frontend: src/modules/{reports,data-entry}/
- Shared: shared/{auth,database,routes}/
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
368 lines
14 KiB
Python
368 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
🧹 ROA2WEB Git History Cleanup Tool
|
|
Safely removes secrets from git history using BFG Repo-Cleaner and git filter-branch.
|
|
|
|
⚠️ WARNING: This tool rewrites git history. Make sure to:
|
|
1. Create a complete backup of your repository
|
|
2. Coordinate with all team members
|
|
3. Force-push to all remotes after cleanup
|
|
4. Regenerate all compromised credentials
|
|
|
|
Usage:
|
|
python security/git_cleanup.py --backup --scan --cleanup [--force]
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import argparse
|
|
import shutil
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import List, Dict
|
|
|
|
class GitHistoryCleanup:
|
|
"""Git history cleanup and secrets removal tool"""
|
|
|
|
def __init__(self, repo_path: str = "."):
|
|
self.repo_path = Path(repo_path).resolve()
|
|
self.backup_path = None
|
|
self.cleanup_log = []
|
|
|
|
# Files and patterns to remove from history
|
|
self.FILES_TO_REMOVE = [
|
|
"app/.env",
|
|
"roa2web/backend/.env",
|
|
"roa2web/.env",
|
|
"roa2web/.env.development",
|
|
"roa2web/.env.production",
|
|
"roa2web/ssh-tunnel/roa_oracle_server"
|
|
]
|
|
|
|
# Text patterns to replace in history
|
|
self.SECRETS_TO_REPLACE = {
|
|
"ACTUAL_ORACLE_PASS": "***REMOVED***",
|
|
"ACTUAL_USER_PASS": "***REMOVED***",
|
|
"DB_PASSWORD=ACTUAL_ORACLE_PASS": "DB_PASSWORD=***REMOVED***",
|
|
'"marius": "ACTUAL_USER_PASS"': '"marius": "***REMOVED***"',
|
|
'"eli": "eli"': '"eli": "***REMOVED***"'
|
|
}
|
|
|
|
def log_action(self, action: str, details: str = "") -> None:
|
|
"""Log cleanup actions"""
|
|
timestamp = datetime.now().isoformat()
|
|
log_entry = {
|
|
"timestamp": timestamp,
|
|
"action": action,
|
|
"details": details
|
|
}
|
|
self.cleanup_log.append(log_entry)
|
|
print(f"📝 {timestamp}: {action}")
|
|
if details:
|
|
print(f" Details: {details}")
|
|
|
|
def check_prerequisites(self) -> bool:
|
|
"""Check if git and required tools are available"""
|
|
try:
|
|
# Check git
|
|
subprocess.run(['git', '--version'], check=True, capture_output=True)
|
|
|
|
# Check if we're in a git repo
|
|
subprocess.run(['git', 'status'], cwd=self.repo_path, check=True, capture_output=True)
|
|
|
|
self.log_action("Prerequisites check passed")
|
|
return True
|
|
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
print("❌ Error: Git not available or not in a git repository")
|
|
return False
|
|
|
|
def create_backup(self) -> bool:
|
|
"""Create complete repository backup"""
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
backup_name = f"roa2web_backup_{timestamp}"
|
|
self.backup_path = self.repo_path.parent / backup_name
|
|
|
|
try:
|
|
print(f"💾 Creating backup at: {self.backup_path}")
|
|
|
|
# Use git clone to create a complete backup with all history
|
|
subprocess.run([
|
|
'git', 'clone', '--mirror',
|
|
str(self.repo_path),
|
|
str(self.backup_path)
|
|
], check=True)
|
|
|
|
self.log_action("Backup created", str(self.backup_path))
|
|
print(f"✅ Backup created successfully: {self.backup_path}")
|
|
return True
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Backup failed: {e}")
|
|
return False
|
|
|
|
def scan_for_secrets(self) -> Dict:
|
|
"""Scan repository for secrets that need cleanup"""
|
|
print("🔍 Scanning for secrets in git history...")
|
|
|
|
secrets_found = {
|
|
"files_with_secrets": [],
|
|
"commits_with_secrets": [],
|
|
"patterns_found": {}
|
|
}
|
|
|
|
try:
|
|
# Check if files exist in git history
|
|
for file_path in self.FILES_TO_REMOVE:
|
|
result = subprocess.run([
|
|
'git', 'log', '--oneline', '--', file_path
|
|
], cwd=self.repo_path, capture_output=True, text=True)
|
|
|
|
if result.stdout.strip():
|
|
secrets_found["files_with_secrets"].append(file_path)
|
|
print(f" 📄 Found in history: {file_path}")
|
|
|
|
# Check for secret patterns in git log
|
|
for secret_pattern in self.SECRETS_TO_REPLACE.keys():
|
|
result = subprocess.run([
|
|
'git', 'log', '-S', secret_pattern, '--oneline'
|
|
], cwd=self.repo_path, capture_output=True, text=True)
|
|
|
|
if result.stdout.strip():
|
|
commits = result.stdout.strip().split('\n')
|
|
secrets_found["patterns_found"][secret_pattern] = len(commits)
|
|
secrets_found["commits_with_secrets"].extend(commits)
|
|
print(f" 🔑 Pattern '{secret_pattern}' found in {len(commits)} commits")
|
|
|
|
self.log_action("Secrets scan completed", json.dumps(secrets_found, indent=2))
|
|
return secrets_found
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Scan failed: {e}")
|
|
return secrets_found
|
|
|
|
def remove_files_from_history(self) -> bool:
|
|
"""Remove sensitive files from git history using git filter-branch"""
|
|
print("🧹 Removing sensitive files from git history...")
|
|
|
|
try:
|
|
for file_path in self.FILES_TO_REMOVE:
|
|
print(f" Removing: {file_path}")
|
|
|
|
# Use git filter-branch to remove file from history
|
|
subprocess.run([
|
|
'git', 'filter-branch', '--force', '--index-filter',
|
|
f'git rm --cached --ignore-unmatch {file_path}',
|
|
'--prune-empty', '--tag-name-filter', 'cat', '--', '--all'
|
|
], cwd=self.repo_path, check=True)
|
|
|
|
self.log_action(f"Removed file from history", file_path)
|
|
|
|
return True
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ File removal failed: {e}")
|
|
return False
|
|
|
|
def replace_secrets_in_history(self) -> bool:
|
|
"""Replace secret patterns in git history"""
|
|
print("🔄 Replacing secrets in git history...")
|
|
|
|
# Create temporary file with replacements
|
|
replacements_file = self.repo_path / "temp_replacements.txt"
|
|
|
|
try:
|
|
with open(replacements_file, 'w') as f:
|
|
for secret, replacement in self.SECRETS_TO_REPLACE.items():
|
|
f.write(f"{secret}==>{replacement}\n")
|
|
|
|
# Use git filter-branch with replace text
|
|
subprocess.run([
|
|
'git', 'filter-branch', '--force', '--tree-filter',
|
|
f'find . -type f -exec sed -i.bak -f <(echo "s/{list(self.SECRETS_TO_REPLACE.keys())[0]}/{list(self.SECRETS_TO_REPLACE.values())[0]}/g") {{}} \\; 2>/dev/null || true',
|
|
'--prune-empty', '--tag-name-filter', 'cat', '--', '--all'
|
|
], cwd=self.repo_path, check=True)
|
|
|
|
self.log_action("Secrets replaced in history")
|
|
return True
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Secret replacement failed: {e}")
|
|
return False
|
|
finally:
|
|
# Clean up temporary file
|
|
if replacements_file.exists():
|
|
replacements_file.unlink()
|
|
|
|
def cleanup_git_refs(self) -> bool:
|
|
"""Clean up git references and garbage collect"""
|
|
print("🗑️ Cleaning up git references...")
|
|
|
|
try:
|
|
# Remove backup refs created by filter-branch
|
|
subprocess.run([
|
|
'git', 'for-each-ref', '--format=delete %(refname)', 'refs/original'
|
|
], cwd=self.repo_path, capture_output=True, text=True, check=True)
|
|
|
|
# Expire reflog
|
|
subprocess.run([
|
|
'git', 'reflog', 'expire', '--expire=now', '--all'
|
|
], cwd=self.repo_path, check=True)
|
|
|
|
# Garbage collect
|
|
subprocess.run([
|
|
'git', 'gc', '--prune=now', '--aggressive'
|
|
], cwd=self.repo_path, check=True)
|
|
|
|
self.log_action("Git cleanup completed")
|
|
return True
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Git cleanup failed: {e}")
|
|
return False
|
|
|
|
def verify_cleanup(self) -> bool:
|
|
"""Verify that secrets have been removed from history"""
|
|
print("🔍 Verifying cleanup...")
|
|
|
|
verification_results = {
|
|
"files_still_present": [],
|
|
"secrets_still_present": []
|
|
}
|
|
|
|
try:
|
|
# Check if files are still in history
|
|
for file_path in self.FILES_TO_REMOVE:
|
|
result = subprocess.run([
|
|
'git', 'log', '--oneline', '--', file_path
|
|
], cwd=self.repo_path, capture_output=True, text=True)
|
|
|
|
if result.stdout.strip():
|
|
verification_results["files_still_present"].append(file_path)
|
|
|
|
# Check if secrets are still in history
|
|
for secret_pattern in self.SECRETS_TO_REPLACE.keys():
|
|
result = subprocess.run([
|
|
'git', 'log', '-S', secret_pattern, '--oneline'
|
|
], cwd=self.repo_path, capture_output=True, text=True)
|
|
|
|
if result.stdout.strip():
|
|
verification_results["secrets_still_present"].append(secret_pattern)
|
|
|
|
if not verification_results["files_still_present"] and not verification_results["secrets_still_present"]:
|
|
print("✅ Cleanup verification passed!")
|
|
self.log_action("Cleanup verification passed")
|
|
return True
|
|
else:
|
|
print("⚠️ Cleanup verification failed:")
|
|
if verification_results["files_still_present"]:
|
|
print(f" Files still present: {verification_results['files_still_present']}")
|
|
if verification_results["secrets_still_present"]:
|
|
print(f" Secrets still present: {verification_results['secrets_still_present']}")
|
|
return False
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Verification failed: {e}")
|
|
return False
|
|
|
|
def save_cleanup_log(self) -> None:
|
|
"""Save cleanup log to file"""
|
|
log_file = self.repo_path / f"security_cleanup_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
|
|
with open(log_file, 'w') as f:
|
|
json.dump({
|
|
"cleanup_timestamp": datetime.now().isoformat(),
|
|
"repository_path": str(self.repo_path),
|
|
"backup_path": str(self.backup_path) if self.backup_path else None,
|
|
"files_removed": self.FILES_TO_REMOVE,
|
|
"secrets_replaced": self.SECRETS_TO_REPLACE,
|
|
"actions": self.cleanup_log
|
|
}, f, indent=2)
|
|
|
|
print(f"📝 Cleanup log saved: {log_file}")
|
|
|
|
def run_full_cleanup(self, force: bool = False) -> bool:
|
|
"""Run complete cleanup process"""
|
|
print("🚀 Starting ROA2WEB Git History Cleanup")
|
|
print("="*60)
|
|
|
|
if not force:
|
|
print("\n⚠️ WARNING: This will rewrite git history!")
|
|
print("Make sure you have:")
|
|
print("1. ✅ Created a backup")
|
|
print("2. ✅ Coordinated with team members")
|
|
print("3. ✅ Are ready to regenerate credentials")
|
|
|
|
confirm = input("\nProceed with cleanup? (yes/NO): ")
|
|
if confirm.lower() != 'yes':
|
|
print("❌ Cleanup cancelled")
|
|
return False
|
|
|
|
# Check prerequisites
|
|
if not self.check_prerequisites():
|
|
return False
|
|
|
|
# Create backup
|
|
if not self.create_backup():
|
|
return False
|
|
|
|
# Scan for secrets
|
|
secrets_found = self.scan_for_secrets()
|
|
if not secrets_found["files_with_secrets"] and not secrets_found["patterns_found"]:
|
|
print("✅ No secrets found in git history")
|
|
return True
|
|
|
|
# Remove files from history
|
|
if not self.remove_files_from_history():
|
|
return False
|
|
|
|
# Replace secrets in history
|
|
if not self.replace_secrets_in_history():
|
|
return False
|
|
|
|
# Cleanup git references
|
|
if not self.cleanup_git_refs():
|
|
return False
|
|
|
|
# Verify cleanup
|
|
if not self.verify_cleanup():
|
|
print("⚠️ Cleanup may not be complete. Check manually.")
|
|
|
|
# Save log
|
|
self.save_cleanup_log()
|
|
|
|
print("\n✅ Git history cleanup completed!")
|
|
print("\n🔧 NEXT STEPS:")
|
|
print("1. 🔑 Regenerate all compromised credentials")
|
|
print("2. 🚀 Force push to all remotes: git push --force-with-lease --all")
|
|
print("3. 📢 Notify team members to re-clone repository")
|
|
print("4. 🗑️ Delete old backup when confident: rm -rf", self.backup_path)
|
|
|
|
return True
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="ROA2WEB Git History Cleanup")
|
|
parser.add_argument('--backup', action='store_true', help='Create backup only')
|
|
parser.add_argument('--scan', action='store_true', help='Scan for secrets only')
|
|
parser.add_argument('--cleanup', action='store_true', help='Run full cleanup')
|
|
parser.add_argument('--force', action='store_true', help='Skip confirmation prompts')
|
|
parser.add_argument('--repo-path', default='.', help='Repository path')
|
|
|
|
args = parser.parse_args()
|
|
|
|
cleaner = GitHistoryCleanup(args.repo_path)
|
|
|
|
if args.backup:
|
|
cleaner.create_backup()
|
|
elif args.scan:
|
|
cleaner.scan_for_secrets()
|
|
elif args.cleanup:
|
|
success = cleaner.run_full_cleanup(args.force)
|
|
sys.exit(0 if success else 1)
|
|
else:
|
|
parser.print_help()
|
|
|
|
if __name__ == "__main__":
|
|
main() |