Files
litterbox/app/utils.py
T
2025-08-19 09:40:05 -07:00

1401 lines
57 KiB
Python

# app/utils.py
import datetime
import glob
import hashlib
import math
import mimetypes
import os
import shutil
import psutil
import pefile
import json
import struct
import pathlib
from functools import lru_cache
from werkzeug.utils import secure_filename
from oletools.olevba import VBA_Parser
import datetime as dt
from flask import render_template
from .analyzers.static.lnk_parser import LnkForensics
# Global runtime imports configuration for easy maintenance
RUNTIME_IMPORTS = {
'go': {
'kernel32.dll': {
'addvectoredexceptionhandler',
'closehandle',
'createeventa',
'createfilea',
'createiocompletionport',
'createthread',
'createwaitabletimerexw',
'duplicatehandle',
'exitprocess',
'freeenvironmentstringsw',
'getconsolemode',
'getenvironmentstringsw',
'getprocaddress',
'getprocessaffinitymask',
'getqueuedcompletionstatusex',
'getstdhandle',
'getsystemdirectorya',
'getsysteminfo',
'getthreadcontext',
'loadlibrarya',
'loadlibraryw',
'postqueuedcompletionstatus',
'resumethread',
'setconsolectrlhandler',
'seterrormode',
'setevent',
'setprocesspriorityboost',
'setthreadcontext',
'setunhandledexceptionfilter',
'setwaitabletimer',
'suspendthread',
'switchtothread',
'virtualalloc',
'virtualfree',
'virtualquery',
'waitformultipleobjects',
'waitforsingleobject',
'writeconsolew',
'writefile'
}
},
'rust': {
'kernel32.dll': {
'addvectoredexceptionhandler',
'closehandle',
'createmutexa',
'formatmessagew',
'getconsolemode',
'getcurrentdirectoryw',
'getcurrentprocess',
'getcurrentprocessid',
'getcurrentthread',
'getcurrentthreadid',
'getenvironmentvariablew',
'getlasterror',
'getmodulehandlea',
'getmodulehandlew',
'getprocaddress',
'getprocessheap',
'getstdhandle',
'getsystemtimeasfiletime',
'heapalloc',
'heapfree',
'heaprealloc',
'initializeslisthead',
'isdebuggerpresent',
'isprocessorfeaturepresent',
'loadlibrarya',
'multibytetowidechar',
'queryperformancecounter',
'releasemutex',
'rtlcapturecontext',
'rtllookupfunctionentry',
'rtlvirtualunwind',
'setlasterror',
'setthreadstackguarantee',
'setunhandledexceptionfilter',
'unhandledexceptionfilter',
'waitforsingleobject',
'waitforsingleobjectex',
'widechartomultibyte',
'writeconsolew',
'lstrlenw',
},
'ntdll.dll': {
'ntwritefile',
'rtlntstatustodoserror'
}
}
}
class FileTypeDetector:
"""Centralized file type detection with magic bytes and structure analysis"""
# Magic byte signatures
MZ = b"MZ" # PE files
CFBF = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" # Compound File (old Office)
ZIP_PK = b"PK\x03\x04" # ZIP (OOXML, ODT, etc.)
LNK_HEADER = b"\x4C\x00\x00\x00" # LNK files (76 byte header)
# PE machines (architectures)
PE_MACHINES = {0x14c: "x86", 0x8664: "x64", 0x1c0: "ARM", 0xaa64: "ARM64"}
@classmethod
def detect_file_type(cls, filepath):
"""Detect file type based on magic bytes and internal structure"""
try:
p = pathlib.Path(filepath)
with p.open('rb') as fp:
header = fp.read(20)
if header.startswith(cls.MZ):
return cls._detect_pe_type(p)
elif header.startswith(cls.CFBF):
return cls._detect_ole_type(filepath)
elif header.startswith(cls.ZIP_PK):
return cls._detect_zip_type(filepath)
elif header.startswith(cls.LNK_HEADER):
return cls._detect_lnk_type(filepath)
return {"family": "unknown", "type": "unknown"}
except Exception as e:
return {"family": "error", "type": str(e)}
@classmethod
def _detect_lnk_type(cls, filepath):
"""Detect LNK file type"""
try:
with open(filepath, 'rb') as f:
header = f.read(76)
# Verify LNK GUID at offset 4
lnk_guid = b"\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"
if len(header) >= 20 and header[4:20] == lnk_guid:
return {"family": "lnk", "type": "windows_shortcut"}
else:
return {"family": "lnk", "type": "invalid"}
except Exception:
return {"family": "lnk", "type": "error"}
@classmethod
def _detect_pe_type(cls, path):
"""Detect PE file type and architecture"""
try:
with path.open('rb') as fp:
fp.seek(0x3C)
pe_offset = struct.unpack('<I', fp.read(4))[0]
fp.seek(pe_offset)
if fp.read(4) != b'PE\x00\x00':
return {"family": "pe", "type": "corrupted"}
machine, _, _, _, _, opt_header_size, characteristics = struct.unpack('<HHIIIHH', fp.read(20))
opt_header = fp.read(opt_header_size)
if len(opt_header) < 70:
return {"family": "pe", "type": "corrupted"}
subsystem = struct.unpack_from('<H', opt_header, 68)[0]
is_dll = bool(characteristics & 0x2000) # IMAGE_FILE_DLL
is_system = bool(characteristics & 0x1000) # IMAGE_FILE_SYSTEM
is_driver = is_system or subsystem in (1, 11, 12) # Native or EFI driver
arch = cls.PE_MACHINES.get(machine, f"0x{machine:x}")
if is_driver:
return {"family": "pe", "type": "sys", "arch": arch}
elif is_dll:
return {"family": "pe", "type": "dll", "arch": arch}
else:
return {"family": "pe", "type": "exe", "arch": arch}
except Exception:
return {"family": "pe", "type": "corrupted"}
@classmethod
def _detect_ole_type(cls, filepath):
"""Detect OLE/Compound File type"""
try:
import olefile
if not olefile.isOleFile(filepath):
return {"family": "office", "type": "invalid"}
with olefile.OleFileIO(filepath) as ole:
streams = {entry[0].lower() for entry in ole.listdir()}
office_types = {
"worddocument": "doc",
"workbook": "xls",
"book": "xls",
"powerpoint document": "ppt",
"visio document": "vsd",
"outlinecache": "one"
}
for stream, file_type in office_types.items():
if stream in streams:
return {"family": "office", "type": file_type}
return {"family": "office", "type": "ole-unknown"}
except ImportError:
return {"family": "office", "type": "ole-storage"}
except Exception:
return {"family": "office", "type": "corrupted"}
@classmethod
def _detect_zip_type(cls, filepath):
"""Detect ZIP-based file types"""
try:
import zipfile
with zipfile.ZipFile(filepath) as z:
names = {n.lower() for n in z.namelist()}
# Office Open XML formats
if "[content_types].xml" in names:
ooxml_types = {
"word/document.xml": "docx",
"xl/workbook.xml": "xlsx",
"ppt/presentation.xml": "pptx",
"visio/document.xml": "vsdx"
}
for path, file_type in ooxml_types.items():
if path in names:
return {"family": "office", "type": file_type}
return {"family": "office", "type": "ooxml-unknown"}
# OpenDocument formats
if "mimetype" in names:
try:
with z.open("mimetype") as f:
mimetype = f.read().decode('utf-8').strip()
odt_types = {
"opendocument.text": "odt",
"opendocument.spreadsheet": "ods",
"opendocument.presentation": "odp"
}
for mime_part, file_type in odt_types.items():
if mime_part in mimetype:
return {"family": "office", "type": file_type}
except:
pass
return {"family": "zip", "type": "zip"}
except zipfile.BadZipFile:
return {"family": "zip", "type": "corrupted"}
except Exception:
return {"family": "zip", "type": "error"}
class SecurityAnalyzer:
"""Centralized security analysis for PE files and Office documents"""
def __init__(self, malapi_path):
self.malapi_data = self._load_malapi_data(malapi_path)
self.dll_function_map = self._build_function_map()
def _load_malapi_data(self, malapi_path):
"""Load MalAPI data with error handling"""
try:
with open(malapi_path, "r", encoding="utf-8") as f:
return json.loads(f.read())
except Exception as e:
print(f"Error loading MalAPI database: {e}")
return {}
def _build_function_map(self):
"""Build optimized lookup dictionary for API functions"""
dll_function_map = {}
for category, functions in self.malapi_data.items():
for function_name, function_info in functions.items():
if isinstance(function_info, dict):
description = function_info.get("description", "")
dll_name = function_info.get("dll", "Unknown").lower()
else:
description = function_info
dll_name = "unknown"
if dll_name not in dll_function_map:
dll_function_map[dll_name] = {}
dll_function_map[dll_name][function_name.lower()] = (category, description)
if "unknown" not in dll_function_map:
dll_function_map["unknown"] = {}
dll_function_map["unknown"][function_name.lower()] = (category, description)
return dll_function_map
def _detect_runtime_type(self, pe):
"""Detect if PE is built with Go, Rust, or neither - returns runtime type"""
try:
# Check for Rust indicators first
rust_indicators = [
b'rustc',
b'rust_begin_unwind',
b'rust_panic',
b'rust_oom',
b'__rust_',
b'.rustc_info',
b'cargo',
b'rustup'
]
# Check for Rust indicators
rust_found = False
for section in pe.sections:
try:
section_data = section.get_data()
for rust_indicator in rust_indicators:
if rust_indicator in section_data:
rust_found = True
break
if rust_found:
break
except Exception:
continue
if rust_found:
return "rust"
# Check for Go indicators
# Go-specific section names (highest confidence)
go_sections = ['.go.buildinfo', '.go.plt']
for section in pe.sections:
section_name = section.Name.decode().rstrip('\x00')
if section_name in go_sections:
return "go"
# Look for highly Go-specific strings
high_confidence_indicators = [
b'go.buildinfo', # Go build info section content
b'runtime.main', # Go's main runtime function
b'runtime.goexit', # Go's goroutine exit function
b'runtime.newproc', # Go's process creation
b'runtime.mallocgc', # Go's garbage collector malloc
b'go.string.', # Go string type prefix
b'go.func.', # Go function type prefix
b'go.itab.', # Go interface table prefix
b'go.mod', # Go module information
b'runtime.systemstack', # Go system stack function
b'go:linkname', # Go linkname directive
b'go:nosplit', # Go nosplit directive
b'go:noescape', # Go noescape directive
b'runtime.schedt', # Go scheduler type
b'runtime.g', # Go goroutine type
b'runtime.m' # Go machine type
]
# Count how many highly specific Go indicators we find
go_indicator_count = 0
for section in pe.sections:
try:
section_data = section.get_data()
for indicator in high_confidence_indicators:
if indicator in section_data:
go_indicator_count += 1
# If we find multiple highly specific indicators, it's very likely Go
if go_indicator_count >= 2:
return "go"
except Exception:
continue
# No runtime detected
return None
except Exception:
return None
def analyze_pe_imports(self, pe):
"""Analyze PE imports for suspicious behavior - using global RUNTIME_IMPORTS"""
suspicious_imports = []
build_with = self._detect_runtime_type(pe)
if not hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
return suspicious_imports, build_with
for entry in pe.DIRECTORY_ENTRY_IMPORT:
dll_name = entry.dll.decode().lower()
for imp in entry.imports:
if not imp.name:
continue
func_name = imp.name.decode().lower()
# Check specific DLL first, then fallback to unknown
for lookup_dll in [dll_name, "unknown"]:
if lookup_dll in self.dll_function_map and func_name in self.dll_function_map[lookup_dll]:
category, description = self.dll_function_map[lookup_dll][func_name]
# Extract hint value with runtime-specific logic
hint_value = None
if hasattr(imp, 'import_by_ordinal') and imp.import_by_ordinal:
hint_value = imp.ordinal if hasattr(imp, 'ordinal') and imp.ordinal is not None else None
else:
if hasattr(imp, 'hint') and imp.hint is not None:
if build_with in ['go', 'rust'] and imp.hint == 0:
hint_value = None
else:
hint_value = imp.hint
# Determine if this is actually a runtime import using global RUNTIME_IMPORTS
is_runtime_import = False
if build_with and build_with in RUNTIME_IMPORTS:
runtime_dlls = RUNTIME_IMPORTS[build_with]
is_runtime_import = (
dll_name in runtime_dlls and
func_name in runtime_dlls[dll_name]
)
suspicious_imports.append({
'dll': dll_name,
'function': func_name,
'category': category,
'note': description,
'hint': hint_value,
'is_runtime_import': is_runtime_import,
'runtime_type': build_with if is_runtime_import else None
})
break
return suspicious_imports, build_with
def analyze_pe_sections(self, pe, entropy_calculator):
"""Analyze PE sections with entropy and detection notes"""
sections_info = []
standard_sections = ['.text', '.data', '.bss', '.rdata', '.edata', '.idata', '.pdata', '.reloc', '.rsrc', '.tls', '.debug']
for section in pe.sections:
section_name = section.Name.decode().rstrip('\x00')
section_data = section.get_data()
section_entropy = entropy_calculator(section_data)
is_standard = section_name in standard_sections
detection_notes = []
if section_entropy > 7.2:
detection_notes.append('High entropy may trigger detection')
if section_name == '.text' and section_entropy > 7.0:
detection_notes.append('Unusual entropy for code section')
if not is_standard:
detection_notes.append('Non-standard section name - may trigger detection')
sections_info.append({
'name': section_name,
'entropy': section_entropy,
'size': len(section_data),
'characteristics': section.Characteristics,
'is_standard': is_standard,
'detection_notes': detection_notes
})
return sections_info
def analyze_office_macros(self, filepath):
"""Analyze Office document macros for threats"""
try:
vbaparser = VBA_Parser(filepath)
detection_notes = []
info = {
'file_type': 'Microsoft Office Document',
'has_macros': vbaparser.detect_vba_macros(),
'macro_info': None,
'detection_notes': detection_notes
}
if vbaparser.detect_vba_macros():
macro_analysis = vbaparser.analyze_macros()
info['macro_info'] = macro_analysis
macro_text = str(macro_analysis).lower()
detection_patterns = {
'shell': 'Shell command execution detected',
'wscript': 'WScript execution detected',
'powershell': 'PowerShell execution detected',
'http': 'Network communication detected',
'auto': 'Auto-execution mechanism detected',
'document_open': 'Document open auto-execution',
'windowshide': 'Hidden window execution',
'createobject': 'COM object creation detected'
}
for pattern, note in detection_patterns.items():
if pattern in macro_text:
detection_notes.append(note)
vbaparser.close()
return {'office_info': info}
except Exception as e:
print(f"Error analyzing Office file: {e}")
return {'office_info': None}
class RiskCalculator:
"""Centralized risk calculation for both file and process analysis"""
SEVERITY_WEIGHTS = {
'CRITICAL': 100,
'HIGH': 80,
'MEDIUM': 50,
'LOW': 20,
'INFO': 5
}
NUMERIC_SEVERITY_MAP = {
100: 'CRITICAL',
80: 'HIGH',
50: 'MEDIUM',
20: 'LOW',
5: 'INFO'
}
@classmethod
def calculate_yara_risk(cls, matches):
"""Calculate risk based on YARA matches considering severity levels"""
if not matches:
return 0, None
max_severity_score = 0
severity_counts = {level: 0 for level in cls.SEVERITY_WEIGHTS}
for match in matches:
meta = match.get('metadata', {})
severity = meta.get('severity', 'MEDIUM')
if isinstance(severity, int):
severity = cls.NUMERIC_SEVERITY_MAP.get(severity, 'MEDIUM')
severity = severity.upper()
if severity in cls.SEVERITY_WEIGHTS:
severity_counts[severity] += 1
max_severity_score = max(max_severity_score, cls.SEVERITY_WEIGHTS[severity])
total_score = 0
risk_factors = []
for severity, count in severity_counts.items():
if count > 0:
severity_score = cls.SEVERITY_WEIGHTS[severity]
if count > 1:
additional_score = sum(severity_score * (0.5 ** i) for i in range(1, count))
total_score += severity_score + additional_score
else:
total_score += severity_score
risk_factors.append(f"Found {count} {severity.lower()} severity YARA match{'es' if count > 1 else ''}")
normalized_score = min(100, total_score / 2)
return normalized_score, risk_factors
@classmethod
def calculate_pe_risk(cls, pe_info):
"""Calculate risk from PE information - updated for multi-runtime support"""
pe_risk = 0
risk_factors = []
# Enhanced entropy detection
high_entropy_sections = 0
very_high_entropy_sections = 0
for section in pe_info.get('sections', []):
entropy = section.get('entropy', 0)
if entropy > 7.5:
very_high_entropy_sections += 1
risk_factors.append(f"Critical entropy in section {section.get('name', 'UNKNOWN')}: {entropy:.2f}")
elif entropy > 7.0:
high_entropy_sections += 1
risk_factors.append(f"High entropy in section {section.get('name', 'UNKNOWN')}: {entropy:.2f}")
pe_risk += min(high_entropy_sections * 10 + very_high_entropy_sections * 20, 40)
# Enhanced import analysis
suspicious_imports = pe_info.get('suspicious_imports', [])
if suspicious_imports:
critical_functions = {
'createremotethread', 'virtualallocex', 'writeprocessmemory',
'ntmapviewofsection', 'zwmapviewofsection'
}
high_risk_functions = {
'loadlibrarya', 'loadlibraryw', 'getprocaddress',
'openprocess', 'virtualallocexnuma'
}
critical_imports = sum(1 for imp in suspicious_imports
if imp.get('function', '').lower() in critical_functions)
high_risk_imports = sum(1 for imp in suspicious_imports
if imp.get('function', '').lower() in high_risk_functions)
pe_risk += min(critical_imports * 15 + high_risk_imports * 8, 30)
if critical_imports > 0 or high_risk_imports > 0:
risk_factors.append(f"Found {critical_imports} critical process manipulation and {high_risk_imports} high-risk dynamic loading imports")
# Enhanced checksum analysis - updated for multi-runtime support
if pe_info.get('checksum_info'):
checksum = pe_info['checksum_info']
if checksum.get('stored_checksum') != checksum.get('calculated_checksum'):
# Don't penalize runtime binaries for checksum mismatches
build_with = checksum.get('build_with')
if build_with not in ['go', 'rust']:
pe_risk += 25
risk_factors.append("PE checksum mismatch detected")
return pe_risk, risk_factors
class Utils:
def __init__(self, config):
self.config = config
self.security_analyzer = SecurityAnalyzer(config['utils']['malapi_path'])
self.file_detector = FileTypeDetector()
@lru_cache(maxsize=128)
def allowed_file(self, filename):
"""Check if the uploaded file has an allowed extension with caching"""
return ('.' in filename and
filename.rsplit('.', 1)[1].lower() in self.config['utils']['allowed_extensions'])
def calculate_entropy(self, data):
"""Calculate Shannon entropy of data with detection insights"""
if len(data) == 0:
return 0
if isinstance(data, str):
data = data.encode()
byte_counts = {}
for byte in data:
byte_counts[byte] = byte_counts.get(byte, 0) + 1
entropy = 0
for count in byte_counts.values():
p_x = count / len(data)
entropy += -p_x * math.log2(p_x)
return round(entropy, 2)
def get_pe_info(self, filepath):
"""Enhanced PE file analysis - updated for multi-runtime support"""
try:
pe = pefile.PE(filepath)
suspicious_imports, build_with = self.security_analyzer.analyze_pe_imports(pe)
sections_info = self.security_analyzer.analyze_pe_sections(pe, self.calculate_entropy)
# Check PE Checksum
is_valid_checksum = pe.verify_checksum()
calculated_checksum = pe.generate_checksum()
stored_checksum = pe.OPTIONAL_HEADER.CheckSum
# Create malware category summary
malware_categories = {}
if suspicious_imports:
for imp in suspicious_imports:
category = imp.get('category', 'Unknown')
malware_categories[category] = malware_categories.get(category, 0) + 1
info = {
'file_type': 'PE32+ executable' if pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS else 'PE32 executable',
'machine_type': pefile.MACHINE_TYPE.get(pe.FILE_HEADER.Machine, f"UNKNOWN ({pe.FILE_HEADER.Machine})").replace('IMAGE_FILE_MACHINE_', ''),
'compile_time': datetime.datetime.fromtimestamp(pe.FILE_HEADER.TimeDateStamp).strftime('%Y-%m-%d %H:%M:%S'),
'subsystem': pefile.SUBSYSTEM_TYPE.get(pe.OPTIONAL_HEADER.Subsystem, f"UNKNOWN ({pe.OPTIONAL_HEADER.Subsystem})").replace('IMAGE_SUBSYSTEM_', ''),
'entry_point': hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint),
'sections': sections_info,
'imports': list(set(entry.dll.decode() for entry in getattr(pe, 'DIRECTORY_ENTRY_IMPORT', []))),
'suspicious_imports': suspicious_imports,
'malware_categories': malware_categories,
'detection_notes': self._build_pe_detection_notes(is_valid_checksum, suspicious_imports, malware_categories, sections_info, build_with),
'build_with': build_with, # Changed from is_go_binary
'checksum_info': {
'is_valid': is_valid_checksum,
'stored_checksum': hex(stored_checksum),
'calculated_checksum': hex(calculated_checksum),
'needs_update': calculated_checksum != stored_checksum,
'build_with': build_with # Changed from is_go_binary
}
}
pe.close()
return {'pe_info': info}
except Exception as e:
print(f"Error analyzing PE file: {e}")
return {'pe_info': None}
def _build_pe_detection_notes(self, is_valid_checksum, suspicious_imports, malware_categories, sections_info, build_with=None):
"""Build detection notes for PE analysis - updated for multi-runtime support"""
detection_notes = []
if not is_valid_checksum:
if build_with == 'go':
detection_notes.append('Go binary with non-standard PE checksum - This is normal for Go binaries')
elif build_with == 'rust':
detection_notes.append('Rust binary with non-standard PE checksum - This is normal for Rust binaries')
else:
detection_notes.append('Invalid PE checksum - Common in modified/packed files (~83% correlation with malware)')
if suspicious_imports:
if build_with == 'go':
detection_notes.append(f'Go binary detected: {len(suspicious_imports)} imports found are typically part of Go runtime - Not necessarily malicious')
elif build_with == 'rust':
detection_notes.append(f'Rust binary detected: {len(suspicious_imports)} imports found are typically part of Rust runtime - Not necessarily malicious')
else:
detection_notes.append(f'Found {len(suspicious_imports)} suspicious API imports - Review import analysis')
for category, count in malware_categories.items():
if build_with == 'go':
detection_notes.append(f'Found {count} imports in category "{category}" (Go runtime related)')
elif build_with == 'rust':
detection_notes.append(f'Found {count} imports in category "{category}" (Rust runtime related)')
else:
detection_notes.append(f'Found {count} suspicious imports in category "{category}"')
# Special detection notes for high-risk categories (only for non-runtime binaries)
if not build_with:
high_risk_categories = {
'Injection': 'WARNING: Process injection capabilities detected',
'Ransomware': 'WARNING: File encryption/ransomware capabilities detected',
'Anti-Debugging': 'WARNING: Anti-analysis techniques detected'
}
for category, warning in high_risk_categories.items():
if category in malware_categories:
detection_notes.append(warning)
if any(section['entropy'] > 7.2 for section in sections_info):
detection_notes.append('High entropy sections detected - Consider entropy reduction techniques')
text_sections = [s for s in sections_info if s['name'] == '.text']
if text_sections and text_sections[0]['entropy'] > 7.0:
detection_notes.append('Packed/encrypted code section may trigger heuristics')
if any(not section['is_standard'] for section in sections_info):
detection_notes.append('Non-standard PE sections detected - May trigger static analysis')
return detection_notes
def get_office_info(self, filepath):
"""Enhanced Office document analysis with detection insights"""
return self.security_analyzer.analyze_office_macros(filepath)
def save_uploaded_file(self, file):
"""Save uploaded file and generate comprehensive file information - updated for multi-runtime"""
file_content = file.read()
file.close()
# Calculate hashes
md5_hash = hashlib.md5(file_content).hexdigest()
sha256_hash = hashlib.sha256(file_content).hexdigest()
# Prepare file paths
original_filename = secure_filename(file.filename)
extension = os.path.splitext(original_filename)[1].lower()
filename = f"{md5_hash}_{original_filename}"
upload_folder = self.config['utils']['upload_folder']
result_folder = self.config['utils']['result_folder']
# Create directories
os.makedirs(upload_folder, exist_ok=True)
filepath = os.path.join(upload_folder, filename)
os.makedirs(result_folder, exist_ok=True)
os.makedirs(os.path.join(result_folder, filename), exist_ok=True)
# Save file
with open(filepath, 'wb') as f:
f.write(file_content)
# Calculate entropy and detect file type
entropy_value = self.calculate_entropy(file_content)
file_type_info = self.file_detector.detect_file_type(filepath)
# Build basic file info
file_info = {
'original_name': original_filename,
'md5': md5_hash,
'sha256': sha256_hash,
'size': len(file_content),
'extension': file_type_info['type'],
'mime_type': mimetypes.guess_type(original_filename)[0] or 'application/octet-stream',
'upload_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'entropy': entropy_value,
'entropy_analysis': self._build_entropy_analysis(entropy_value),
'detected_type': file_type_info
}
# Add specific file type information
if file_type_info['family'] == 'pe':
file_info.update(self.get_pe_info(filepath))
# Add risk assessment with new build_with support
if file_info.get('pe_info'):
pe_info = file_info['pe_info']
build_with = pe_info.get('build_with')
# Calculate risk score
risk_score = 0
risk_factors = []
if build_with in ['go', 'rust']:
# Lower risk for runtime binaries
risk_score = 15
risk_factors.append(f"Binary built with {build_with.upper()} - Runtime imports expected")
else:
# Use normal risk calculation for other binaries
pe_risk, pe_factors = RiskCalculator.calculate_pe_risk(pe_info)
risk_score = pe_risk
risk_factors.extend(pe_factors)
# Determine risk level
if risk_score >= 75:
risk_level = "Critical"
elif risk_score >= 50:
risk_level = "High"
elif risk_score >= 25:
risk_level = "Medium"
else:
risk_level = "Low"
file_info['risk_assessment'] = {
'score': risk_score,
'level': risk_level,
'factors': risk_factors
}
elif file_type_info['family'] == 'office':
office_result = self.get_office_info(filepath)
if 'error' not in office_result:
file_info.update(office_result)
elif file_type_info['family'] == 'lnk':
lnk_result = self.get_lnk_info(filepath)
if 'error' not in lnk_result:
file_info.update(lnk_result)
# Save file info
with open(os.path.join(result_folder, filename, 'file_info.json'), 'w') as f:
json.dump(file_info, f)
return file_info
def get_lnk_info(self, filepath):
"""Analyze LNK file using LnkForensics module"""
try:
lnk = LnkForensics(filepath)
if not lnk.is_valid():
return {'lnk_info': None}
forensic_data = lnk.get_forensic_data()
return {'lnk_info': forensic_data}
except Exception as e:
print(f"Error analyzing LNK file: {e}")
return {'lnk_info': None}
def _build_entropy_analysis(self, entropy_value):
"""Build entropy analysis with detection risk assessment - no changes needed"""
analysis = {
'value': entropy_value,
'detection_risk': 'High' if entropy_value > 7.2 else 'Medium' if entropy_value > 6.8 else 'Low',
'notes': []
}
if entropy_value > 7.2:
analysis['notes'].append('High entropy indicates encryption/packing - consider entropy reduction')
elif entropy_value > 6.8:
analysis['notes'].append('Moderate entropy - may trigger basic detection')
return analysis
def detect_file_type(self, filepath):
"""Detect file type based on magic bytes and internal structure"""
return self.file_detector.detect_file_type(filepath)
def find_file_by_hash(self, file_hash, search_folder):
"""Find a file in the specified folder by its hash"""
try:
for filename in os.listdir(search_folder):
if filename.startswith(file_hash):
return os.path.join(search_folder, filename)
except FileNotFoundError:
pass
return None
def check_tool(self, tool_path):
"""Check if a tool is accessible and executable"""
return os.path.isfile(tool_path) and os.access(tool_path, os.X_OK)
def validate_pid(self, pid):
"""Validate if a PID exists and is accessible"""
try:
pid = int(pid)
if pid <= 0:
return False, "Invalid PID: must be a positive integer"
if not psutil.pid_exists(pid):
return False, f"Process with PID {pid} does not exist"
try:
process = psutil.Process(pid)
process.name() # Try to access process name to verify permissions
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
return False, f"Cannot access process {pid}: {str(e)}"
return True, None
except ValueError:
return False, "Invalid PID: must be a number"
except Exception as e:
return False, f"Error validating PID: {str(e)}"
def get_entropy_risk_level(self, entropy):
"""Determine the risk level based on entropy value"""
if entropy > 7.2:
return 'High'
elif entropy > 6.8:
return 'Medium'
return 'Low'
def format_hex(self, value):
"""Format a value as a hexadecimal string"""
if isinstance(value, str) and value.startswith('0x'):
return value.lower()
try:
return f"0x{int(value):x}"
except (ValueError, TypeError):
return str(value)
def calculate_yara_risk(self, matches):
"""Calculate risk based on YARA matches considering severity levels"""
return RiskCalculator.calculate_yara_risk(matches)
def calculate_risk(self, analysis_type='process', file_info=None, static_results=None, dynamic_results=None, byovd_results=None):
"""Unified risk calculation function that handles file, process, and driver analysis"""
risk_score = 0
risk_factors = []
# *** CRITICAL FIX: Handle driver analysis first ***
if analysis_type == 'driver':
if byovd_results:
byovd_risk, byovd_factors = self._calculate_byovd_risk(byovd_results)
# For drivers, use BYOVD score directly (no weighting needed)
risk_factors.extend([f"BYOVD: {factor}" for factor in byovd_factors])
final_score = round(min(max(byovd_risk, 0), 100), 2)
return final_score, risk_factors
else:
return 0, ["No BYOVD analysis available"]
# Define weights for non-driver analysis types
weights = {
'file': {'pe_info': 0.10, 'static': 0.50, 'dynamic': 0.40},
'process': {'dynamic': 1.0}
}[analysis_type]
# PE Information Risk Calculation (file analysis only)
if analysis_type == 'file' and file_info and file_info.get('pe_info'):
pe_risk, pe_factors = RiskCalculator.calculate_pe_risk(file_info['pe_info'])
risk_factors.extend(pe_factors)
risk_score += (pe_risk / 100) * weights['pe_info'] * 100
# Static Analysis Risk Calculation (file analysis only)
if analysis_type == 'file' and static_results:
static_risk, static_factors = self._calculate_static_risk(static_results)
risk_factors.extend([f"Static: {factor}" for factor in static_factors])
risk_score += (static_risk / 100) * weights['static'] * 100
# Dynamic Analysis Risk Calculation (file and process analysis)
if analysis_type in ['file', 'process'] and dynamic_results:
dynamic_risk, dynamic_factors = self._calculate_dynamic_risk(dynamic_results, analysis_type)
risk_factors.extend([f"Dynamic: {factor}" for factor in dynamic_factors])
risk_score += (dynamic_risk / 100) * weights['dynamic'] * 100
# Final normalization and scaling
risk_score = self._normalize_risk_score(risk_score, analysis_type, dynamic_results, risk_factors)
return round(min(max(risk_score, 0), 100), 2), risk_factors
def _calculate_byovd_risk(self, byovd_results):
"""Calculate risk based on BYOVD (Bring Your Own Vulnerable Driver) analysis results"""
risk_score = 0
risk_factors = []
if not byovd_results:
return 0, []
# Extract data from BYOVD results structure
findings = byovd_results.get('findings', {})
summary = findings.get('summary', {})
detailed = findings.get('detailed_analysis', {})
# Extract key indicators (matching frontend logic)
is_lol = summary.get('is_loldriver', False)
win10_blocked = summary.get('is_win10_blocked', False)
win11_blocked = summary.get('is_win11_blocked', False)
# Calculate "hasDanger" equivalent from frontend - FIXED to match exactly
critical_imports = detailed.get('critical_imports', '')
has_terminate_process = detailed.get('has_terminate_process', False)
has_communication = detailed.get('has_communication', False)
has_dangerous_imports = detailed.get('has_dangerous_imports', False)
has_danger = bool(
has_dangerous_imports or
(isinstance(critical_imports, str) and critical_imports.strip()) or
has_terminate_process or
has_communication
)
# Apply frontend scoring logic exactly
# Frontend: if (win11 && win10) return 0;
if win11_blocked and win10_blocked:
risk_factors.append("Blocked on both Windows 10 and 11 - minimal exploitation potential")
return 0, risk_factors
# Frontend scoring logic with debug output:
# if (hasDanger) score += 55;
if has_danger:
risk_score += 55
danger_factors = []
if has_dangerous_imports:
danger_factors.append("dangerous imports detected")
if critical_imports and critical_imports.strip():
danger_factors.append("critical imports detected")
if has_terminate_process:
danger_factors.append("process termination capability")
if has_communication:
danger_factors.append("communication mechanisms")
if danger_factors:
risk_factors.append(f"Dangerous capabilities: {', '.join(danger_factors)}")
# if (!win11) score += 25; else score -= 50;
if not win11_blocked:
risk_score += 25
risk_factors.append("Not blocked on Windows 11")
else:
risk_score -= 50
risk_factors.append("Blocked on Windows 11")
# if (!win10) score += 20; else score -= 20;
if not win10_blocked:
risk_score += 20
risk_factors.append("Not blocked on Windows 10")
else:
risk_score -= 20
risk_factors.append("Blocked on Windows 10")
# if (!isLol) score += 10; else score -= 5;
if not is_lol:
risk_score += 10
risk_factors.append("Not listed in LOLDrivers database")
else:
risk_score -= 5
risk_factors.append("Listed in LOLDrivers database")
# Frontend: return Math.max(0, Math.min(100, score));
final_score = max(0, min(100, risk_score))
# Add additional context from BYOVD analysis
if detailed.get('win10_block_reason'):
risk_factors.append(f"Win10 block reason: {detailed['win10_block_reason']}")
if detailed.get('win11_block_reason'):
risk_factors.append(f"Win11 block reason: {detailed['win11_block_reason']}")
return final_score, risk_factors
def _calculate_static_risk(self, static_results):
"""Calculate risk from static analysis results"""
static_risk = 0
risk_factors = []
# YARA detection scoring
yara_matches = static_results.get('yara', {}).get('matches', [])
yara_score, yara_factors = self.calculate_yara_risk(yara_matches)
if yara_score > 0:
match_multiplier = min(len(yara_matches) * 0.15 + 1, 1.5)
static_risk += yara_score * match_multiplier
risk_factors.extend(yara_factors)
# CheckPLZ analysis
checkplz_findings = static_results.get('checkplz', {}).get('findings', {})
if checkplz_findings:
threat_score = 0
if checkplz_findings.get('initial_threat'):
threat_score += 50
risk_factors.append("Critical: CheckPLZ detected initial threat indicators")
indicators = checkplz_findings.get('threat_indicators', [])
if indicators:
indicator_score = min(len(indicators) * 15, 40)
threat_score += indicator_score
risk_factors.append(f"Found {len(indicators)} additional threat indicators")
static_risk += threat_score
# File entropy analysis
if static_results.get('file_entropy'):
entropy = static_results['file_entropy']
if entropy > 7.5:
static_risk += 30
risk_factors.append(f"Critical overall file entropy: {entropy:.2f}")
elif entropy > 7.0:
static_risk += 20
risk_factors.append(f"High overall file entropy: {entropy:.2f}")
return static_risk, risk_factors
def _calculate_dynamic_risk(self, dynamic_results, analysis_type):
"""Calculate risk from dynamic analysis results"""
dynamic_risk = 0
risk_factors = []
# YARA dynamic detections
yara_matches = dynamic_results.get('yara', {}).get('matches', [])
yara_score, yara_factors = self.calculate_yara_risk(yara_matches)
if yara_score > 0:
dynamic_risk += yara_score
risk_factors.extend(yara_factors)
# PE-Sieve scoring
pesieve_findings = dynamic_results.get('pe_sieve', {}).get('findings', {})
pesieve_suspicious = int(pesieve_findings.get('total_suspicious', 0))
if pesieve_suspicious > 0:
severity_multiplier = 1.5 if pesieve_findings.get('severity') == 'critical' else 1.0
pe_sieve_score = min(pesieve_suspicious * (20 if analysis_type == 'file' else 15) * severity_multiplier,
45 if analysis_type == 'file' else 30)
dynamic_risk += pe_sieve_score
risk_factors.append(f"PE-Sieve found {pesieve_suspicious} suspicious indicators")
# Memory anomaly detection
dynamic_risk += self._calculate_memory_anomaly_risk(dynamic_results, analysis_type, risk_factors)
# Behavior analysis
dynamic_risk += self._calculate_behavior_risk(dynamic_results, analysis_type, risk_factors)
# HSB detection
dynamic_risk += self._calculate_hsb_risk(dynamic_results, analysis_type, risk_factors)
return dynamic_risk, risk_factors
def _calculate_memory_anomaly_risk(self, dynamic_results, analysis_type, risk_factors):
"""Calculate risk from memory anomalies"""
moneta_findings = dynamic_results.get('moneta', {}).get('findings', {})
if not moneta_findings:
return 0
memory_scores = {
'total_private_rwx': 15 if analysis_type == 'file' else 10,
'total_modified_code': 12 if analysis_type == 'file' else 10,
'total_heap_executable': 10,
'total_modified_pe_header': 10,
'total_private_rx': 8,
'total_inconsistent_x': 8,
'total_missing_peb': 5,
'total_mismatching_peb': 5
}
total_score = 0
anomaly_count = 0
for key, weight in memory_scores.items():
count = int(moneta_findings.get(key, 0) or 0)
if count > 0:
total_score += min(count * weight, weight * 2)
anomaly_count += count
if anomaly_count > 0:
risk_factors.append(f"Found {anomaly_count} weighted memory anomalies")
return min(total_score, 40 if analysis_type == 'file' else 30)
return 0
def _calculate_behavior_risk(self, dynamic_results, analysis_type, risk_factors):
"""Calculate risk from behavioral analysis"""
patriot_findings = dynamic_results.get('patriot', {}).get('findings', {})
if not patriot_findings:
return 0
behaviors = patriot_findings.get('findings', [])
behavior_count = len(behaviors)
if behavior_count == 0:
return 0
severity_scores = {
'critical': 25 if analysis_type == 'file' else 20,
'high': 15,
'medium': 10,
'low': 5
}
behavior_score = 0
for behavior in behaviors:
severity = behavior.get('severity', 'low')
behavior_score += severity_scores.get(severity, 5)
risk_factors.append(f"Found {behavior_count} weighted suspicious behaviors")
return min(behavior_score, 35)
def _calculate_hsb_risk(self, dynamic_results, analysis_type, risk_factors):
"""Calculate risk from HSB detection"""
hsb_findings = dynamic_results.get('hsb', {}).get('findings', {})
if not (hsb_findings and hsb_findings.get('detections')):
return 0
total_hsb_score = 0
for detection in hsb_findings['detections']:
if not detection.get('findings'):
continue
count = len(detection['findings'])
severity = detection.get('max_severity', 0)
if analysis_type == 'file':
severity_multiplier = 1 + (severity * 0.5)
detection_score = min(count * 15 * severity_multiplier, 40)
else:
severity_scores = {0: 10, 1: 15, 2: 20} # LOW, MID, HIGH
max_scores = {0: 20, 1: 25, 2: 35}
detection_score = min(count * severity_scores.get(severity, 10), max_scores.get(severity, 20))
total_hsb_score += detection_score
severity_text = ["LOW", "MID", "HIGH"][min(severity, 2)]
if severity >= 2:
risk_factors.append(f"Critical: Found {count} high-severity memory operations")
else:
risk_factors.append(f"Found {count} {severity_text} severity memory operations")
return min(total_hsb_score, 45 if analysis_type == 'file' else 35)
def _normalize_risk_score(self, risk_score, analysis_type, dynamic_results, risk_factors):
"""Normalize and apply final scaling to risk score"""
if analysis_type == 'file':
base_score = min(max(risk_score, 0), 100)
if base_score > 75:
risk_score = min(base_score * 1.15, 100)
else: # process
yara_matches = dynamic_results.get('yara', {}).get('matches', []) if dynamic_results else []
pesieve_findings = dynamic_results.get('pe_sieve', {}).get('findings', {}) if dynamic_results else {}
pesieve_suspicious = int(pesieve_findings.get('total_suspicious', 0))
if len(yara_matches) == 0 and pesieve_suspicious <= 1:
risk_score = min(risk_score, 65)
if all(f.lower().find('high') == -1 for f in risk_factors):
risk_score = min(risk_score, 75)
return risk_score
def get_risk_level(self, risk_score):
"""Convert numerical risk score to categorical risk level"""
if risk_score >= 75:
return "Critical"
elif risk_score >= 50:
return "High"
elif risk_score >= 25:
return "Medium"
else:
return "Low"
def load_json_file(self, filepath):
"""Helper function to safely load JSON files"""
if not os.path.exists(filepath):
return None
try:
with open(filepath, 'r') as f:
return json.load(f)
except Exception as e:
print(f"Error loading JSON file {filepath}: {str(e)}")
return None
def extract_detection_counts(self, results):
"""Extract all detection counts from analysis results"""
counts = {'yara': 0, 'pesieve': 0, 'moneta': 0, 'patriot': 0, 'hsb': 0}
try:
# YARA
yara_matches = results.get('yara', {}).get('matches', [])
counts['yara'] = len({match.get('rule') for match in yara_matches if match.get('rule')}) if isinstance(yara_matches, list) else 0
# PE-sieve
pesieve_findings = results.get('pe_sieve', {}).get('findings', {})
counts['pesieve'] = int(pesieve_findings.get('total_suspicious', 0) or 0)
# Moneta - only count actual suspicious findings
moneta_findings = results.get('moneta', {}).get('findings', {})
non_detection_fields = ['total_regions', 'total_unsigned_modules', 'scan_duration']
counts['moneta'] = sum(
int(moneta_findings.get(key, 0) or 0)
for key in moneta_findings
if key.startswith('total_') and key not in non_detection_fields
)
# Patriot
patriot_findings = results.get('patriot', {}).get('findings', {}).get('findings', [])
counts['patriot'] = len(patriot_findings) if isinstance(patriot_findings, list) else 0
# HSB
hsb_findings = results.get('hsb', {}).get('findings', {})
if hsb_findings and hsb_findings.get('detections'):
counts['hsb'] = len(hsb_findings['detections'][0].get('findings', []))
except (TypeError, ValueError, IndexError):
pass
return counts
def generate_html_report(self, file_info=None, static_results=None, dynamic_results=None, pid=None):
"""Generate comprehensive HTML report using Jinja2 template"""
is_process_analysis = pid is not None and not file_info
analysis_type = 'process' if is_process_analysis else 'file'
risk_score, risk_factors = self.calculate_risk(
analysis_type=analysis_type,
file_info=file_info,
static_results=static_results,
dynamic_results=dynamic_results
)
risk_level = self.get_risk_level(risk_score)
detections = {}
if static_results or dynamic_results:
detections = self.extract_detection_counts(dynamic_results or static_results)
# Ensure dynamic_results has process_output for template compatibility
if dynamic_results and is_process_analysis:
if 'process_output' not in dynamic_results:
dynamic_results['process_output'] = {
'had_output': False,
'output': '',
'stdout': '',
'stderr': ''
}
return render_template(
"report.html",
generated_on=dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
is_process_analysis=is_process_analysis,
risk_score=risk_score,
risk_level=risk_level,
risk_factors=risk_factors,
detections=detections,
file_info=file_info,
static_results=static_results,
dynamic_results=dynamic_results,
pid=pid,
format_size=self._format_size
)
def _format_size(self, size_bytes):
"""Format file size to human-readable format"""
if size_bytes < 1024:
return f"{size_bytes} bytes"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.2f} KB"
elif size_bytes < 1024 * 1024 * 1024:
return f"{size_bytes / (1024 * 1024):.2f} MB"
else:
return f"{size_bytes / (1024 * 1024 * 1024):.2f} GB"