Add HTML smuggling analyzer, surface macro detail, split per-type modules
This commit is contained in:
@@ -98,6 +98,13 @@ All notable changes to this project will be documented in this file.
|
||||
- Elastic YARA rules synced to upstream `d131ea8` (2026-04-30, 686 rules — 684 upstream + Morpes/Torii retained locally after Elastic rotated them out)
|
||||
- YARA-Forge bumped to 0.9.1 (release `20260503`, 2026-05-03) — separate `YARAForge_Extended.yar` pack alongside the Elastic rules
|
||||
|
||||
### File-type analyzers
|
||||
- HTML smuggling analyzer (`app/utils/htmlsmuggle.py`) — pattern set + scoring model ported from RootUp/SmuggleShield. Runs at upload time on `.html` / `.htm` files. Catches in-page payload assembly (atob → Uint8Array → Blob → URL.createObjectURL → `<a download>` click), GWT smuggling artifacts, WebAssembly drop chains, dataset-driven payload tags, and ~80 other regex signatures. Output lands in `file_info.html_smuggle_info`.
|
||||
- Office macro detail surfacing — the existing olevba pipeline now exposes per-module VBA source, autoexec triggers, suspicious keyword hits, and IOCs as structured tables on the upload-result page (was previously only a one-line "5 auto-execution triggers detected" summary).
|
||||
- T1221 Remote Template Injection detection — `_scan_external_relationships` walks every OOXML container's `*.rels` files looking for external `attachedTemplate` / `oleObject` / `subDocument` / `frame` references. Catches Atomic Red Team's `Calculator.docx` (and the wider class) where `has_macros: false` but the malicious VBA lives in a remote `.dotm`.
|
||||
- File-type analyzers split into dedicated modules — `utils/office.py`, `utils/lnk.py`, `utils/htmlsmuggle.py`. `forensics.py` is now strictly PE / MalAPI / entropy. Re-exports preserved through `app/utils/__init__.py` so existing call sites keep working.
|
||||
- `allowed_extensions` expanded to cover macro-enabled Office (`docm`, `dotm`, `xlsm`, `xltm`), legacy CFBF binaries (`doc`, `xls`, `rtf`), and HTML (`html`, `htm`). Upload page now gates analysis tabs by file family: office + html files only show Static (Dynamic / EDR aren't relevant for these without an Office install on the target host); driver files keep the existing static-driver + HolyGrail flow.
|
||||
|
||||
### Notes
|
||||
- New runtime dependency: `requests==2.32.3`
|
||||
- Whiskers binary not committed — build via `cargo build --release` (see `Whiskers/BUILD.md`)
|
||||
|
||||
+16
-3
@@ -8,13 +8,26 @@ application:
|
||||
|
||||
utils:
|
||||
allowed_extensions:
|
||||
# Executables / loaders / drivers
|
||||
- exe
|
||||
- dll
|
||||
- bin
|
||||
- docx
|
||||
- xlsx
|
||||
- lnk
|
||||
- sys
|
||||
- lnk
|
||||
# Word (OOXML + legacy CFBF)
|
||||
- docx # no macros per spec, but still uploadable for T1221 template injection samples
|
||||
- docm # macro-enabled
|
||||
- dotm # template macro-enabled (T1221 target)
|
||||
- doc # Word 97-2003 -- frequently weaponised with VBA macros
|
||||
- rtf # Rich Text Format -- OLE-embedded payloads, T1203 patterns
|
||||
# Excel (OOXML + legacy CFBF)
|
||||
- xlsx # no macros per spec; still routed through olevba in case of XLM smuggling
|
||||
- xlsm # macro-enabled
|
||||
- xltm # template macro-enabled
|
||||
- xls # Excel 97-2003 -- carries VBA + Excel 4.0 / XLM macros
|
||||
# HTML / HTML Application -- routed through the SmuggleShield-derived static analyzer
|
||||
- html # plain HTML
|
||||
- htm # alternate HTML extension
|
||||
max_file_size: 104857600 # 100MB in bytes
|
||||
upload_folder: "Uploads"
|
||||
result_folder: "Results"
|
||||
|
||||
+322
-24
@@ -64,6 +64,10 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
officeInfo: document.getElementById('officeInfo'),
|
||||
macroStatus: document.getElementById('macroStatus'),
|
||||
macroDetectionNotes: document.getElementById('macroDetectionNotes'),
|
||||
htmlSmuggleInfo: document.getElementById('htmlSmuggleInfo'),
|
||||
smuggleStatus: document.getElementById('smuggleStatus'),
|
||||
smuggleDetectionNotes: document.getElementById('smuggleDetectionNotes'),
|
||||
smuggleInfo: document.getElementById('smuggleInfo'),
|
||||
checksumInfo: document.getElementById('checksumInfo'),
|
||||
checksumStatus: document.getElementById('checksumStatus'),
|
||||
storedChecksum: document.getElementById('storedChecksum'),
|
||||
@@ -193,19 +197,42 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
//
|
||||
// The analysis-mode selector is a single segmented control with one tab
|
||||
// per mode (Static / Dynamic / each EDR profile / HolyGrail). Each tab
|
||||
// is tagged data-family="regular" or "driver"; we only show the family
|
||||
// matching the uploaded file. The first visible tab becomes active.
|
||||
// is tagged with one or more `data-family` values (space-separated) and
|
||||
// only tabs matching the uploaded file's family are shown.
|
||||
//
|
||||
// Four families:
|
||||
// driver -- .sys (-> static-driver, holygrail)
|
||||
// office -- Word / Excel macro-bearing documents (-> static only;
|
||||
// dynamic / EDR don't make sense without an Office install
|
||||
// on the target host -- olevba is the relevant scanner)
|
||||
// html -- .html / .htm (-> static only; SmuggleShield-derived
|
||||
// pattern analyzer runs at upload time as html_smuggle_info)
|
||||
// regular -- everything else (-> all / static / dynamic / edr:*)
|
||||
const DRIVER_EXTS = new Set(['sys']);
|
||||
const OFFICE_EXTS = new Set([
|
||||
'docx', 'docm', 'dotm', 'doc', 'rtf',
|
||||
'xlsx', 'xlsm', 'xltm', 'xls',
|
||||
]);
|
||||
const HTML_EXTS = new Set(['html', 'htm']);
|
||||
|
||||
function updateAnalysisOptions(fileExtension) {
|
||||
isDriverFile = fileExtension.toLowerCase() === 'sys';
|
||||
const family = isDriverFile ? 'driver' : 'regular';
|
||||
const ext = (fileExtension || '').toLowerCase();
|
||||
isDriverFile = DRIVER_EXTS.has(ext);
|
||||
const family = isDriverFile ? 'driver'
|
||||
: OFFICE_EXTS.has(ext) ? 'office'
|
||||
: HTML_EXTS.has(ext) ? 'html'
|
||||
: 'regular';
|
||||
|
||||
const tabs = document.querySelectorAll('#modeTabs .lb-tab');
|
||||
const bodies = document.querySelectorAll('.lb-mode-body');
|
||||
|
||||
// Show only tabs for this file family; active state moves to first.
|
||||
// Show only tabs whose `data-family` list contains this file's family.
|
||||
// Multiple families are space-separated (e.g. `regular office` for the
|
||||
// Static tab, which serves both classes).
|
||||
let firstVisible = null;
|
||||
tabs.forEach(t => {
|
||||
const matches = t.dataset.family === family;
|
||||
const families = (t.dataset.family || '').split(/\s+/);
|
||||
const matches = families.includes(family);
|
||||
t.classList.toggle('hidden', !matches);
|
||||
t.classList.remove('active');
|
||||
if (matches && !firstVisible) firstVisible = t;
|
||||
@@ -245,6 +272,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
function renderFileTypeSpecificInfo(fileInfo) {
|
||||
elements.peInfo.classList.add('hidden');
|
||||
elements.officeInfo.classList.add('hidden');
|
||||
if (elements.htmlSmuggleInfo) elements.htmlSmuggleInfo.classList.add('hidden');
|
||||
elements.suspiciousImports.classList.add('hidden');
|
||||
|
||||
if (fileInfo.entropy_analysis) {
|
||||
@@ -359,24 +387,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
}
|
||||
else if (fileInfo.office_info) {
|
||||
elements.officeInfo.classList.remove('hidden');
|
||||
const office = fileInfo.office_info;
|
||||
|
||||
elements.macroStatus.className = `px-3 py-1 text-sm rounded-full ${
|
||||
office.has_macros ? 'bg-red-500/8 text-red-300 border border-red-500/22' : 'bg-green-500/8 text-green-300 border border-green-500/22'
|
||||
}`;
|
||||
elements.macroStatus.textContent = office.has_macros ? 'Macros Present' : 'No Macros';
|
||||
|
||||
if (office.detection_notes && office.detection_notes.length > 0) {
|
||||
elements.macroDetectionNotes.innerHTML = office.detection_notes.map(note => `
|
||||
<div class="flex items-center space-x-2">
|
||||
<svg class="w-4 h-4 text-yellow-300" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"/>
|
||||
</svg>
|
||||
<span>${note}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
}
|
||||
renderOfficeInfo(fileInfo.office_info);
|
||||
}
|
||||
else if (fileInfo.lnk_info) {
|
||||
// Show LNK-specific information section
|
||||
@@ -384,6 +395,293 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
lnkInfoSection.classList.remove('hidden');
|
||||
renderLnkInfo(fileInfo.lnk_info);
|
||||
}
|
||||
else if (fileInfo.html_smuggle_info) {
|
||||
const htmlSection = document.getElementById('htmlSmuggleInfo');
|
||||
if (htmlSection) htmlSection.classList.remove('hidden');
|
||||
renderHtmlSmuggleInfo(fileInfo.html_smuggle_info);
|
||||
}
|
||||
}
|
||||
|
||||
// -- Office macro / template-injection rendering --------------------
|
||||
//
|
||||
// Surfaces every non-empty piece of the `office_info` structure:
|
||||
// * Status pill: Macros Present / No Macros
|
||||
// * Detection notes (one-line summaries)
|
||||
// * Autoexec triggers (table: keyword + description)
|
||||
// * Suspicious keywords (table: keyword + description)
|
||||
// * IOCs (table: type + value)
|
||||
// * External refs (table: relationship + target -- T1221 etc.)
|
||||
// * Per-module VBA source code (collapsible <details>)
|
||||
// * Hex / Base64 / VBA strings (collapsible)
|
||||
//
|
||||
// The DOM container (#officeInfo) already exists in upload.html; this
|
||||
// function rewrites #macroDetectionNotes (status notes) and #macroInfo
|
||||
// (detail blocks) every time it runs.
|
||||
function escapeHtml(s) {
|
||||
return String(s ?? '').replace(/[&<>"']/g, c => (
|
||||
{ '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' }[c]
|
||||
));
|
||||
}
|
||||
|
||||
function macroSeverityClass(office) {
|
||||
// Treat external attachedTemplate references and live macros as the
|
||||
// strong signals. Everything else goes "info".
|
||||
if (office.has_macros) return 'critical';
|
||||
if ((office.external_refs || []).some(r => r.relationship === 'attachedTemplate')) return 'critical';
|
||||
if ((office.external_refs || []).length > 0) return 'medium';
|
||||
return 'low';
|
||||
}
|
||||
|
||||
function renderTable(headers, rows) {
|
||||
if (!rows.length) return '';
|
||||
const head = headers.map(h => `<th style="text-align:left;padding:4px 8px;border-bottom:1px solid var(--lb-border);font-size:11px;color:var(--lb-text-dim);text-transform:uppercase;letter-spacing:0.5px;">${escapeHtml(h)}</th>`).join('');
|
||||
const body = rows.map(r => `<tr>${r.map(c => `<td style="padding:4px 8px;font-size:12px;vertical-align:top;border-bottom:1px solid rgba(255,255,255,0.04);">${c}</td>`).join('')}</tr>`).join('');
|
||||
return `<table style="width:100%;border-collapse:collapse;margin:6px 0 12px 0;"><thead><tr>${head}</tr></thead><tbody>${body}</tbody></table>`;
|
||||
}
|
||||
|
||||
function renderSection(title, body, opts) {
|
||||
opts = opts || {};
|
||||
if (!body) return '';
|
||||
const collapsible = opts.collapsible;
|
||||
const open = opts.open === undefined ? false : opts.open;
|
||||
const heading = `<div style="font-size:11px;color:var(--lb-text-dim);text-transform:uppercase;letter-spacing:0.5px;margin:14px 0 4px 0;">${escapeHtml(title)}</div>`;
|
||||
if (collapsible) {
|
||||
return `${heading}<details ${open ? 'open' : ''} style="border:1px solid var(--lb-border);padding:8px;border-radius:3px;background:rgba(255,255,255,0.02);"><summary style="cursor:pointer;font-size:12px;color:var(--lb-text);">${escapeHtml(opts.summary || 'show')}</summary>${body}</details>`;
|
||||
}
|
||||
return `${heading}${body}`;
|
||||
}
|
||||
|
||||
function renderOfficeInfo(office) {
|
||||
// Status pill
|
||||
const sev = macroSeverityClass(office);
|
||||
const sevClassMap = {
|
||||
critical: 'bg-red-500/8 text-red-300 border border-red-500/22',
|
||||
medium: 'bg-yellow-500/8 text-yellow-300 border border-yellow-500/22',
|
||||
low: 'bg-green-500/8 text-green-300 border border-green-500/22',
|
||||
};
|
||||
elements.macroStatus.className = `px-3 py-1 text-sm rounded-full ${sevClassMap[sev]}`;
|
||||
elements.macroStatus.textContent = office.has_macros
|
||||
? 'Macros Present'
|
||||
: ((office.external_refs || []).length > 0 ? 'External Refs' : 'No Macros');
|
||||
|
||||
// Top-level detection notes (one-line summaries)
|
||||
const notes = office.detection_notes || [];
|
||||
elements.macroDetectionNotes.innerHTML = notes.map(note => `
|
||||
<div style="display:flex;align-items:flex-start;gap:6px;margin-bottom:3px;">
|
||||
<span style="color:var(--lb-warn);">⚠</span>
|
||||
<span>${escapeHtml(note)}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
// Detailed sections
|
||||
const macroInfo = document.getElementById('macroInfo');
|
||||
if (!macroInfo) return;
|
||||
const parts = [];
|
||||
|
||||
// External references (T1221 etc.) -- shown FIRST when present
|
||||
// since they're often the only signal for documents that have no VBA.
|
||||
const refs = office.external_refs || [];
|
||||
if (refs.length > 0) {
|
||||
const rows = refs.map(r => [
|
||||
`<span class="lb-tag ${r.relationship === 'attachedTemplate' ? 'critical' : 'medium'}">${escapeHtml(r.relationship)}</span>`,
|
||||
`<span class="lb-mono" style="word-break:break-all;font-size:11px;"><a href="${escapeHtml(r.target)}" target="_blank" rel="noopener noreferrer" style="color:var(--lb-accent-soft);">${escapeHtml(r.target)}</a></span>`,
|
||||
`<span class="lb-mono" style="font-size:11px;color:var(--lb-text-dim);">${escapeHtml(r.rels_file)}</span>`,
|
||||
]);
|
||||
parts.push(renderSection('External References (Remote Targets)', renderTable(['Relationship', 'Target', 'In .rels'], rows)));
|
||||
}
|
||||
|
||||
const a = office.analysis || {};
|
||||
|
||||
// Autoexec triggers
|
||||
if ((a.autoexec || []).length > 0) {
|
||||
const rows = a.autoexec.map(e => [
|
||||
`<span class="lb-tag critical">${escapeHtml(e.keyword || '?')}</span>`,
|
||||
`<span style="font-size:12px;">${escapeHtml(e.description || '')}</span>`,
|
||||
]);
|
||||
parts.push(renderSection(`Auto-Execution Triggers (${a.autoexec.length})`, renderTable(['Keyword', 'Description'], rows)));
|
||||
}
|
||||
|
||||
// Suspicious keywords
|
||||
if ((a.suspicious || []).length > 0) {
|
||||
const rows = a.suspicious.map(e => [
|
||||
`<span class="lb-tag medium">${escapeHtml(e.keyword || '?')}</span>`,
|
||||
`<span style="font-size:12px;">${escapeHtml(e.description || '')}</span>`,
|
||||
]);
|
||||
parts.push(renderSection(`Suspicious Keywords (${a.suspicious.length})`, renderTable(['Keyword', 'Description'], rows)));
|
||||
}
|
||||
|
||||
// IOCs (URLs, IPs, EXEs, etc. that olevba pulled out of the macro body)
|
||||
if ((a.iocs || []).length > 0) {
|
||||
const rows = a.iocs.map(ioc => [
|
||||
`<span class="lb-tag info">${escapeHtml(ioc.type || '?')}</span>`,
|
||||
`<span class="lb-mono" style="word-break:break-all;font-size:11px;">${escapeHtml(ioc.value || '')}</span>`,
|
||||
]);
|
||||
parts.push(renderSection(`IOCs Extracted from Macro (${a.iocs.length})`, renderTable(['Type', 'Value'], rows)));
|
||||
}
|
||||
|
||||
// Hex / Base64 / VBA-encoded strings (decoded by olevba)
|
||||
const stringSets = [
|
||||
['Hex Strings', a.hex_strings || []],
|
||||
['Base64 Strings', a.base64_strings || []],
|
||||
['VBA-Encoded Strings', a.vba_strings || []],
|
||||
];
|
||||
for (const [label, items] of stringSets) {
|
||||
if (items.length === 0) continue;
|
||||
const body = items.map(e => `<div class="lb-mono" style="word-break:break-all;font-size:11px;padding:3px 0;border-bottom:1px solid rgba(255,255,255,0.04);"><strong>${escapeHtml(e.keyword || '')}:</strong> ${escapeHtml(e.description || '')}</div>`).join('');
|
||||
parts.push(renderSection(`${label} (${items.length})`, body, { collapsible: true, summary: `${items.length} item(s) -- click to expand` }));
|
||||
}
|
||||
|
||||
// Per-module VBA source code -- collapsible
|
||||
const modules = office.modules || [];
|
||||
if (modules.length > 0) {
|
||||
const body = modules.map(m => `
|
||||
<div style="margin-top:8px;">
|
||||
<div style="font-size:12px;color:var(--lb-text);margin-bottom:4px;">
|
||||
<span class="lb-mono" style="color:var(--lb-accent-soft);">${escapeHtml(m.vba_filename || '?')}</span>
|
||||
<span class="lb-muted" style="font-size:11px;"> -- ${escapeHtml(m.stream || '')}</span>
|
||||
</div>
|
||||
<pre style="background:rgba(0,0,0,0.3);padding:8px;border:1px solid var(--lb-border);font-size:11px;overflow-x:auto;max-height:240px;overflow-y:auto;white-space:pre-wrap;color:var(--lb-text);">${escapeHtml(m.code || '')}</pre>
|
||||
</div>
|
||||
`).join('');
|
||||
parts.push(renderSection(`VBA Source (${modules.length} module${modules.length !== 1 ? 's' : ''})`, body, { collapsible: true, summary: `${modules.length} module(s) -- click to view source code` }));
|
||||
}
|
||||
|
||||
macroInfo.innerHTML = parts.join('');
|
||||
}
|
||||
|
||||
// -- HTML smuggling rendering --------------------------------------
|
||||
//
|
||||
// Surfaces every non-empty piece of the `html_smuggle_info` structure
|
||||
// produced by app/utils/htmlsmuggle.py:
|
||||
// * Status pill: SMUGGLING / SUSPICIOUS / CLEAN with score
|
||||
// * Detection notes (one-line summaries)
|
||||
// * Score bar + matched-categories pill row
|
||||
// * Matched patterns (table: name + category + weight)
|
||||
// * Surface features (table: feature + value)
|
||||
// * IOCs (download filenames, dataset blobs, largest base64 preview)
|
||||
//
|
||||
// Reuses the renderTable / renderSection / escapeHtml helpers defined
|
||||
// for the office macro renderer.
|
||||
function smuggleSeverityClass(h) {
|
||||
if (h.is_smuggling) return 'critical';
|
||||
if ((h.score || 0) > 0) return 'medium';
|
||||
return 'low';
|
||||
}
|
||||
|
||||
function renderHtmlSmuggleInfo(h) {
|
||||
// Status pill
|
||||
const sev = smuggleSeverityClass(h);
|
||||
const sevClassMap = {
|
||||
critical: 'bg-red-500/8 text-red-300 border border-red-500/22',
|
||||
medium: 'bg-yellow-500/8 text-yellow-300 border border-yellow-500/22',
|
||||
low: 'bg-green-500/8 text-green-300 border border-green-500/22',
|
||||
};
|
||||
if (elements.smuggleStatus) {
|
||||
elements.smuggleStatus.className = `px-3 py-1 text-sm rounded-full ${sevClassMap[sev]}`;
|
||||
const label = h.is_smuggling
|
||||
? `SMUGGLING (score ${h.score}/${h.threshold})`
|
||||
: (h.score > 0 ? `SUSPICIOUS (score ${h.score}/${h.threshold})` : 'CLEAN');
|
||||
elements.smuggleStatus.textContent = label;
|
||||
}
|
||||
|
||||
// Detection notes
|
||||
const notes = h.detection_notes || [];
|
||||
if (elements.smuggleDetectionNotes) {
|
||||
elements.smuggleDetectionNotes.innerHTML = notes.map(note => `
|
||||
<div style="display:flex;align-items:flex-start;gap:6px;margin-bottom:3px;">
|
||||
<span style="color:var(--lb-warn);">⚠</span>
|
||||
<span>${escapeHtml(note)}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
}
|
||||
|
||||
// Detail blocks
|
||||
const host = elements.smuggleInfo;
|
||||
if (!host) return;
|
||||
const parts = [];
|
||||
|
||||
// Score line + matched-category pills
|
||||
const cats = h.matched_categories || {};
|
||||
if (Object.keys(cats).length > 0) {
|
||||
const pills = Object.entries(cats).map(([cat, count]) =>
|
||||
`<span class="lb-tag medium" style="margin-right:4px;">${escapeHtml(cat)} × ${count}</span>`
|
||||
).join(' ');
|
||||
parts.push(renderSection('Pattern Categories', `<div style="padding:4px 0;">${pills}</div>`));
|
||||
}
|
||||
|
||||
// Matched patterns -- the actual signatures that fired
|
||||
const matches = h.matched_patterns || [];
|
||||
if (matches.length > 0) {
|
||||
const rows = matches.map(m => [
|
||||
`<span class="lb-mono" style="font-size:11px;">${escapeHtml(m.name)}</span>`,
|
||||
`<span class="lb-tag info">${escapeHtml(m.category || '?')}</span>`,
|
||||
`<span class="lb-mono" style="font-size:11px;">+${m.weight || 0}</span>`,
|
||||
]);
|
||||
parts.push(renderSection(`Matched Patterns (${matches.length})`, renderTable(['Pattern', 'Category', 'Weight'], rows)));
|
||||
}
|
||||
|
||||
// Surface features
|
||||
const f = h.features || {};
|
||||
if (Object.keys(f).length > 0) {
|
||||
const featureRows = [
|
||||
['File size (bytes)', f.file_size],
|
||||
['Script tags', f.script_tags],
|
||||
['iframe tags', f.iframe_tags],
|
||||
['embed tags', f.embed_tags],
|
||||
['Base64 blob count (>=50 chars)', f.base64_blob_count],
|
||||
['Largest base64 blob (chars)', f.largest_base64_chars],
|
||||
['Has blob()', f.has_blob],
|
||||
['Has atob()', f.has_atob],
|
||||
['Has Uint8Array', f.has_uint8array],
|
||||
['Has URL.createObjectURL', f.has_createobjecturl],
|
||||
['Has <a download="...">', f.has_download_attr],
|
||||
['Has String.fromCharCode', f.has_fromcharcode],
|
||||
].filter(([, v]) => v !== undefined && v !== null && v !== false && v !== 0)
|
||||
.map(([label, v]) => [
|
||||
`<span style="font-size:12px;">${escapeHtml(label)}</span>`,
|
||||
`<span class="lb-mono" style="font-size:12px;">${escapeHtml(String(v))}</span>`,
|
||||
]);
|
||||
if (featureRows.length > 0) {
|
||||
parts.push(renderSection('Surface Features', renderTable(['Feature', 'Value'], featureRows)));
|
||||
}
|
||||
}
|
||||
|
||||
// IOCs
|
||||
const iocs = h.iocs || {};
|
||||
const iocBits = [];
|
||||
if ((iocs.download_filenames || []).length > 0) {
|
||||
const rows = iocs.download_filenames.map(name => [
|
||||
`<span class="lb-tag medium">download=</span>`,
|
||||
`<span class="lb-mono" style="word-break:break-all;font-size:11px;">${escapeHtml(name)}</span>`,
|
||||
]);
|
||||
iocBits.push(renderTable(['Type', 'Value'], rows));
|
||||
}
|
||||
if ((iocs.data_file_attrs || []).length > 0) {
|
||||
const rows = iocs.data_file_attrs.map(d => [
|
||||
`<span class="lb-tag medium">data-file=</span>`,
|
||||
`<span class="lb-mono" style="word-break:break-all;font-size:11px;">${escapeHtml(d)}</span>`,
|
||||
]);
|
||||
iocBits.push(renderTable(['Type', 'Value (truncated)'], rows));
|
||||
}
|
||||
if (iocs.largest_base64_blob && iocs.largest_base64_blob.length > 0) {
|
||||
const b = iocs.largest_base64_blob;
|
||||
iocBits.push(`
|
||||
<div class="lb-mono" style="font-size:11px;padding:4px 0;">
|
||||
<div><strong>Largest base64 blob:</strong> ${b.length} chars</div>
|
||||
<div style="margin-top:4px;color:var(--lb-text-dim);">First 120: <span style="color:var(--lb-text);word-break:break-all;">${escapeHtml(b.preview_first_120)}</span></div>
|
||||
${b.preview_last_120 ? `<div style="margin-top:4px;color:var(--lb-text-dim);">Last 120: <span style="color:var(--lb-text);word-break:break-all;">${escapeHtml(b.preview_last_120)}</span></div>` : ''}
|
||||
</div>
|
||||
`);
|
||||
}
|
||||
if (iocBits.length > 0) {
|
||||
parts.push(renderSection('IOCs', iocBits.join('')));
|
||||
}
|
||||
|
||||
if (h.truncated) {
|
||||
parts.push(`<div class="lb-muted" style="font-size:11px;margin-top:8px;">⚠ Scan was truncated -- file exceeds the 5 MiB cap.</div>`);
|
||||
}
|
||||
|
||||
host.innerHTML = parts.join('');
|
||||
}
|
||||
|
||||
function getRuntimeConfig(buildWith) {
|
||||
|
||||
@@ -75,7 +75,7 @@
|
||||
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M4 6h16M4 12h16M4 18h7"/><path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M14 16l3 3 5-5"/></svg>
|
||||
All
|
||||
</button>
|
||||
<button type="button" class="lb-tab" data-mode="static" data-family="regular">
|
||||
<button type="button" class="lb-tab" data-mode="static" data-family="regular office html">
|
||||
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2"/></svg>
|
||||
Static
|
||||
</button>
|
||||
@@ -363,6 +363,16 @@
|
||||
<div id="macroInfo" style="font-size: 12px; color: var(--lb-text-dim);"></div>
|
||||
</div>
|
||||
|
||||
<!-- HTML smuggling info -->
|
||||
<div id="htmlSmuggleInfo" class="hidden" style="border: 1px solid var(--lb-border); padding: 12px; margin-bottom: 12px;">
|
||||
<div style="display: flex; align-items: center; justify-content: space-between; margin-bottom: 8px;">
|
||||
<span class="lb-eyebrow">HTML Smuggling Analysis</span>
|
||||
<span id="smuggleStatus" class="lb-tag muted"></span>
|
||||
</div>
|
||||
<div id="smuggleDetectionNotes" style="font-size: 12px; color: var(--lb-text-dim); margin-bottom: 6px;"></div>
|
||||
<div id="smuggleInfo" style="font-size: 12px; color: var(--lb-text-dim);"></div>
|
||||
</div>
|
||||
|
||||
<!-- File-specific info -->
|
||||
<div id="fileSpecificInfo" style="font-size: 13px; color: var(--lb-text-dim);"></div>
|
||||
</div>
|
||||
|
||||
@@ -8,8 +8,6 @@ Prefer importing directly from submodules in new code:
|
||||
from .file_io import (
|
||||
FileTypeDetector,
|
||||
detect_file_type,
|
||||
get_lnk_info,
|
||||
get_office_info,
|
||||
get_pe_info,
|
||||
save_uploaded_file,
|
||||
)
|
||||
@@ -19,6 +17,9 @@ from .forensics import (
|
||||
calculate_entropy,
|
||||
get_security_analyzer,
|
||||
)
|
||||
from .htmlsmuggle import get_html_smuggle_info
|
||||
from .lnk import get_lnk_info
|
||||
from .office import get_office_info
|
||||
from .json_helpers import (
|
||||
extract_detection_counts,
|
||||
format_hex,
|
||||
@@ -41,7 +42,7 @@ __all__ = [
|
||||
'allowed_file', 'calculate_entropy', 'calculate_risk', 'calculate_yara_risk',
|
||||
'check_tool', 'detect_file_type', 'extract_detection_counts',
|
||||
'find_file_by_hash', 'format_hex', 'format_size', 'generate_html_report',
|
||||
'get_entropy_risk_level', 'get_lnk_info', 'get_office_info', 'get_pe_info',
|
||||
'get_risk_level', 'get_security_analyzer', 'load_json_file',
|
||||
'save_uploaded_file', 'validate_pid',
|
||||
'get_entropy_risk_level', 'get_html_smuggle_info', 'get_lnk_info',
|
||||
'get_office_info', 'get_pe_info', 'get_risk_level',
|
||||
'get_security_analyzer', 'load_json_file', 'save_uploaded_file', 'validate_pid',
|
||||
]
|
||||
|
||||
+40
-20
@@ -1,5 +1,15 @@
|
||||
# app/utils/file_io.py
|
||||
"""File ingestion: type detection, PE/Office/LNK metadata, upload handling."""
|
||||
"""File ingestion: type detection, upload handling.
|
||||
|
||||
Per-file-type inspectors (PE / Office / LNK / HTML-smuggling) are dispatched
|
||||
from `save_uploaded_file` based on the detected family. Each inspector lives
|
||||
in its own module:
|
||||
|
||||
* PE -- get_pe_info (this module, uses forensics.SecurityAnalyzer)
|
||||
* Office -- utils/office.py (get_office_info)
|
||||
* LNK -- utils/lnk.py (get_lnk_info)
|
||||
* HTML smuggling -- utils/htmlsmuggle.py (get_html_smuggle_info)
|
||||
"""
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
@@ -11,8 +21,10 @@ import struct
|
||||
import pefile
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from ..analyzers.static.lnk_parser import LnkForensics
|
||||
from .forensics import calculate_entropy, get_security_analyzer
|
||||
from .htmlsmuggle import get_html_smuggle_info
|
||||
from .lnk import get_lnk_info
|
||||
from .office import get_office_info
|
||||
from .risk_analyzer import RiskCalculator
|
||||
|
||||
|
||||
@@ -42,6 +54,12 @@ class FileTypeDetector:
|
||||
elif header.startswith(cls.LNK_HEADER):
|
||||
return cls._detect_lnk_type(filepath)
|
||||
|
||||
# HTML / HTM detection -- file-extension based since HTML has no
|
||||
# consistent magic. Cheap to check after the binary-header tests
|
||||
# already missed.
|
||||
if p.suffix.lower() in ('.html', '.htm'):
|
||||
return {"family": "html", "type": p.suffix.lower().lstrip('.')}
|
||||
|
||||
return {"family": "unknown", "type": "unknown"}
|
||||
|
||||
except Exception as e:
|
||||
@@ -142,8 +160,20 @@ class FileTypeDetector:
|
||||
"visio/document.xml": "vsdx",
|
||||
}
|
||||
|
||||
# Flag macro-enabled OOXML by presence of vbaProject.bin --
|
||||
# promotes docx/xlsx/pptx -> docm/xlsm/pptm so the dashboard
|
||||
# Type field reflects what's actually in the container.
|
||||
has_vba = any(n.endswith("vbaproject.bin") for n in names)
|
||||
macro_enabled_map = {
|
||||
"docx": "docm",
|
||||
"xlsx": "xlsm",
|
||||
"pptx": "pptm",
|
||||
}
|
||||
|
||||
for path, file_type in ooxml_types.items():
|
||||
if path in names:
|
||||
if has_vba and file_type in macro_enabled_map:
|
||||
file_type = macro_enabled_map[file_type]
|
||||
return {"family": "office", "type": file_type}
|
||||
|
||||
return {"family": "office", "type": "ooxml-unknown"}
|
||||
@@ -331,24 +361,9 @@ def _build_pe_detection_notes(is_valid_checksum, suspicious_imports,
|
||||
return detection_notes
|
||||
|
||||
|
||||
def get_office_info(filepath, malapi_path):
|
||||
"""Analyze Office macros (delegates to SecurityAnalyzer)."""
|
||||
return get_security_analyzer(malapi_path).analyze_office_macros(filepath)
|
||||
|
||||
|
||||
def get_lnk_info(filepath):
|
||||
"""Analyze a Windows .LNK shortcut for forensic data."""
|
||||
try:
|
||||
lnk = LnkForensics(filepath)
|
||||
if not lnk.is_valid():
|
||||
return {'lnk_info': None}
|
||||
|
||||
forensic_data = lnk.get_forensic_data()
|
||||
return {'lnk_info': forensic_data}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error analyzing LNK file: {e}")
|
||||
return {'lnk_info': None}
|
||||
# Office / LNK / HTML-smuggling inspectors live in their own modules
|
||||
# (imported at the top of this file). PE inspection stays here because it's
|
||||
# tightly coupled to the SecurityAnalyzer cache (MalAPI lookup + entropy).
|
||||
|
||||
|
||||
def _build_entropy_analysis(entropy_value):
|
||||
@@ -456,6 +471,11 @@ def save_uploaded_file(file, config):
|
||||
if 'error' not in lnk_result:
|
||||
file_info.update(lnk_result)
|
||||
|
||||
elif file_type_info['family'] == 'html':
|
||||
# Always update -- get_html_smuggle_info returns a usable dict even
|
||||
# for clean files (just with is_smuggling=false / score=0).
|
||||
file_info.update(get_html_smuggle_info(filepath))
|
||||
|
||||
with open(os.path.join(result_folder, filename, 'file_info.json'), 'w') as f:
|
||||
json.dump(file_info, f)
|
||||
|
||||
|
||||
+7
-43
@@ -1,11 +1,15 @@
|
||||
# app/utils/forensics.py
|
||||
"""PE/Office forensic analysis: entropy, runtime detection, MalAPI lookup."""
|
||||
"""PE forensic analysis: entropy, runtime detection, MalAPI lookup.
|
||||
|
||||
Office / LNK / HTML-smuggling analyzers live in their own modules
|
||||
(`utils/office.py`, `utils/lnk.py`, `utils/htmlsmuggle.py`) so each file-type
|
||||
inspector is self-contained and easy to maintain. This module is now strictly
|
||||
PE-focused.
|
||||
"""
|
||||
import json
|
||||
import math
|
||||
from collections import Counter
|
||||
|
||||
from oletools.olevba import VBA_Parser
|
||||
|
||||
|
||||
# Known runtime imports for compiled languages — used to flag PE imports as
|
||||
# benign-runtime rather than suspicious.
|
||||
@@ -251,46 +255,6 @@ class SecurityAnalyzer:
|
||||
|
||||
return sections_info
|
||||
|
||||
def analyze_office_macros(self, filepath):
|
||||
"""Inspect Office VBA macros for suspicious patterns."""
|
||||
try:
|
||||
vbaparser = VBA_Parser(filepath)
|
||||
detection_notes = []
|
||||
|
||||
info = {
|
||||
'file_type': 'Microsoft Office Document',
|
||||
'has_macros': vbaparser.detect_vba_macros(),
|
||||
'macro_info': None,
|
||||
'detection_notes': detection_notes,
|
||||
}
|
||||
|
||||
if vbaparser.detect_vba_macros():
|
||||
macro_analysis = vbaparser.analyze_macros()
|
||||
info['macro_info'] = macro_analysis
|
||||
|
||||
macro_text = str(macro_analysis).lower()
|
||||
detection_patterns = {
|
||||
'shell': 'Shell command execution detected',
|
||||
'wscript': 'WScript execution detected',
|
||||
'powershell': 'PowerShell execution detected',
|
||||
'http': 'Network communication detected',
|
||||
'auto': 'Auto-execution mechanism detected',
|
||||
'document_open': 'Document open auto-execution',
|
||||
'windowshide': 'Hidden window execution',
|
||||
'createobject': 'COM object creation detected',
|
||||
}
|
||||
|
||||
for pattern, note in detection_patterns.items():
|
||||
if pattern in macro_text:
|
||||
detection_notes.append(note)
|
||||
|
||||
vbaparser.close()
|
||||
return {'office_info': info}
|
||||
except Exception as e:
|
||||
print(f"Error analyzing Office file: {e}")
|
||||
return {'office_info': None}
|
||||
|
||||
|
||||
_security_analyzer_cache = {}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,282 @@
|
||||
# app/utils/htmlsmuggle.py
|
||||
"""HTML-smuggling pattern scanner.
|
||||
|
||||
Runs at upload time on `.html` / `.htm` files (alongside `get_pe_info`,
|
||||
`get_office_info`, `get_lnk_info`). Output lands in `file_info.html_smuggle_info`
|
||||
and is rendered on the upload-result page the same way office_info is.
|
||||
|
||||
Pattern set + scoring model ported from SmuggleShield's `content.js`
|
||||
(https://github.com/RootUp/SmuggleShield). The browser extension catches
|
||||
runtime behaviour (DOM mutation, blob URL revoke, programmatic <a download>
|
||||
click); we catch the file-on-disk equivalent by regex-scanning the raw
|
||||
HTML source.
|
||||
|
||||
Scoring (mirrors SmuggleShield):
|
||||
- Each pattern carries a weight (2-4).
|
||||
- High-weight (>=3) patterns scanned first; early-return when the score
|
||||
crosses the threshold.
|
||||
- Low-weight (<3) patterns scanned only when high-weight pass landed
|
||||
within `threshold - 2` of crossing.
|
||||
- A cheap pre-filter (`atob | blob | base64 | createobjecturl | ...`)
|
||||
skips files that obviously aren't smuggling.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
# (weight, pattern, name, category)
|
||||
_PATTERNS = [
|
||||
# --- Direct base64 -> binary -> blob path ----------------------------
|
||||
(3, r'atob\s*\([^)]+\).*new\s+uint8array', 'atob_to_uint8array', 'encoding'),
|
||||
(3, r"atob\s*\(\s*['\"]([A-Za-z0-9+/=]{100,})['\"].*\)", 'large_base64_atob', 'encoding'),
|
||||
(3, r'new\s+blob\s*\(\s*\[\s*(?:data|atob\s*\()', 'blob_from_atob_data', 'blob'),
|
||||
(4, r"let\s+arrayBuffer\s*=\s*\['0x[0-9a-f]{2}'(?:\s*,\s*'0x[0-9a-f]{2}')+\]", 'hex_array_buffer', 'encoding'),
|
||||
|
||||
# --- Reversed-string fromCharCode obfuscation ------------------------
|
||||
(4, r'\["edoCrahCmorf"(?:\s*\[\s*"split"\s*\]\s*\(\s*""\s*\)\s*\[\s*"reverse"\s*\]\s*\(\s*\)\s*\[\s*"join"\s*\]\s*\(\s*""\s*\))', 'reversed_fromcharcode_obf', 'obfuscation'),
|
||||
(4, r'setTimeout\s*\(\s*\[.*?\]\.map\s*\(\s*.*?=>.*?(?:fromCharCode|edoCrahCmorf).*?\/\s*\d+\s*\)', 'settimeout_fromcharcode', 'obfuscation'),
|
||||
(3, r'String\s*\[\s*(?:"edoCrahCmorf"|[\'"][^\'\"]+[\'"]\.split\([\'"][\'"]\)\.reverse\(\)\.join\([\'"][\'"]\))\s*\]', 'string_reverse_index', 'obfuscation'),
|
||||
|
||||
# --- Blob -> object URL -> download chain ----------------------------
|
||||
(3, r'url\.createobjecturl\s*\(\s*(?:my)?blob\s*\)', 'createobjecturl_from_blob', 'blob'),
|
||||
(3, r'location(?:\s*\[\s*[\'"]href[\'"]\s*\])?\s*=\s*url', 'location_href_assign', 'writer'),
|
||||
(2, r'url\.revokeobjecturl\s*\(\s*url\s*\)', 'revokeobjecturl', 'blob'),
|
||||
(3, r'\.style\s*=\s*[\'"]display:\s*none[\'"].*\.href\s*=.*\.download\s*=', 'hidden_anchor_download', 'writer'),
|
||||
(3, r'\.click\s*\(\s*\).*url\.revokeobjecturl', 'auto_click_then_revoke', 'writer'),
|
||||
(3, r'href\s*=\s*["\']data:(?:application/octet-stream|image/svg\+xml);base64,', 'data_url_octet_stream', 'writer'),
|
||||
|
||||
# --- Bracket-string property access (window["a"+"to"+"b"] etc.) ------
|
||||
(3, r'window\s*\[\s*(?:["\']\w+["\']\s*\+\s*)+["\']\w+["\']\s*\]', 'window_bracket_concat', 'obfuscation'),
|
||||
(4, r'document\s*\[\s*(?:["\']\w+["\']\s*\+\s*)+["\']\w+["\']\s*\]\s*\(\s*window\s*\[\s*(?:[\'"]at[\'"].*[\'"]o[\'"].*[\'"]b[\'"]\s*\]|\s*(?:["\']\w+["\']\s*\+\s*)+["\']\w+["\']\s*\])\s*\([\'"][A-Za-z0-9+/=]+[\'"]\)\s*\)', 'document_bracket_atob', 'obfuscation'),
|
||||
(4, r'var\s+\w+=\w+;?\s*\(function\(\w+,\w+\)\{.*while\(!!\[\]\)\{try\{.*parseint.*\}catch\(\w+\)\{.*\}\}\(.*\)\);?', 'parseint_obfuscator', 'obfuscation'),
|
||||
|
||||
# --- Blob mime-type signatures + writer chain ------------------------
|
||||
(3, r'blob\s*\(\s*\[[^\]]+\]\s*,\s*\{\s*type\s*:\s*[\'"](?:application/octet-stream|text/html|octet/stream)[\'"](?:\s*,\s*encoding\s*:\s*[\'"]base64[\'"])?\s*\}\s*\)', 'blob_with_octet_type', 'blob'),
|
||||
|
||||
# --- WebAssembly / Go runtime smuggling ------------------------------
|
||||
(3, r'webassembly\s*\.\s*(?:instantiate(?:streaming)?|instance)', 'webassembly_instantiate', 'wasm'),
|
||||
(2, r'navigator\.serviceworker\.register', 'service_worker_register', 'wasm'),
|
||||
(2, r'wasm[_-]?exec\.js', 'wasm_exec_js', 'wasm'),
|
||||
(3, r'\.wasm\b', 'wasm_extension_ref', 'wasm'),
|
||||
(3, r'new\s+go\s*\(\s*\)', 'go_runtime_new', 'wasm'),
|
||||
(3, r'go\s*\.\s*run\s*\(', 'go_runtime_run', 'wasm'),
|
||||
|
||||
# --- Embedded srcdoc / iframe + script -------------------------------
|
||||
(3, r'srcdoc\s*=\s*["\'][^"\']*<script', 'srcdoc_with_script', 'writer'),
|
||||
(3, r'<embed[^>]*base64', 'embed_with_base64', 'writer'),
|
||||
|
||||
# --- Decoder helpers + legacy IE save -------------------------------
|
||||
(3, r'function\s+(?:b64toarray|xor|base64toarraybuffer)\s*\([^)]*\)\s*\{[\s\S]*?return\s+(?:bytes\.buffer|result);?\}', 'decoder_helper_func', 'encoding'),
|
||||
(3, r'document\.createelement\([\'"]embed[\'"]\)', 'createelement_embed', 'writer'),
|
||||
(2, r'\.setattribute\([\'"]src[\'"]\s*,\s*.*\)', 'setattribute_src', 'writer'),
|
||||
(3, r'window\.navigator\.mssaveoropenblob\s*\(\s*blob\s*,\s*filename\s*\)', 'mssaveoropenblob', 'writer'),
|
||||
(2, r'(?:window\.)?url\.createobjecturl\s*\(\s*(?:blob|[^)]+)\s*\)', 'generic_createobjecturl', 'blob'),
|
||||
(2, r'(?:a|element)\.download\s*=\s*(?:filename|[\'"][^\'"]+[\'"])', 'anchor_download_attr', 'writer'),
|
||||
(2, r'string\.fromcharcode\(.*\)', 'string_fromcharcode', 'encoding'),
|
||||
(2, r'\.charcodeat\(.*\)', 'charcodeat', 'encoding'),
|
||||
(3, r'document\.getelementbyid\([\'"]passwordid[\'"]\)\.value', 'password_field_lookup', 'writer'),
|
||||
(3, r'import\s*\(\s*url\.createobjecturl\s*\(', 'dynamic_import_objurl', 'wasm'),
|
||||
(3, r'\w+\s*\(\s*\w+\s*\(\s*[\'"][A-Za-z0-9+/=]{50,}[\'"]\s*\)\s*\)', 'nested_call_long_b64', 'encoding'),
|
||||
(2, r'(?:window\.)?atob\s*\(', 'atob_call', 'encoding'),
|
||||
(2, r'uint8[aA]rray\s*\(\s*(?:(?!len)[^)])*\)', 'uint8array_constructor', 'encoding'),
|
||||
(3, r'mssaveoropenblob|mssaveblob', 'mssave_alias', 'writer'),
|
||||
(3, r'base64toarraybuffer', 'b64_to_arraybuffer_helper', 'encoding'),
|
||||
(3, r'xmlhttprequest\(\).*\.responsetype\s*=\s*[\'"]arraybuffer[\'"]', 'xhr_arraybuffer_response', 'encoding'),
|
||||
(3, r'new\s+dataview\(.*\).*\.getuint8\(.*\).*\.setuint8\(', 'dataview_getset_uint8', 'encoding'),
|
||||
(2, r'[^\w](\w+)\s*=\s*(\w+)\s*\^\s*(\w+)', 'xor_operation', 'encoding'),
|
||||
(2, r'\.slice\(\s*\w+\s*-\s*\d+\s*,\s*\w+\s*-\s*\d+\s*\)', 'string_slice_offset', 'obfuscation'),
|
||||
(3, r'for\s*\([^)]+\)\s*\{[^}]*string\.fromcharcode\([^)]+\)', 'loop_fromcharcode', 'encoding'),
|
||||
|
||||
# --- GWT (Google Web Toolkit) smuggling artefacts --------------------
|
||||
(4, r'\$wnd\s*=\s*window;\s*\$doc\s*=\s*\$wnd\.document', 'gwt_wnd_doc', 'gwt'),
|
||||
(4, r'__gwt_(?:isKnownPropertyValue|getMetaProperty|marker|stylesLoaded|scriptsLoaded)', 'gwt_internals', 'gwt'),
|
||||
(3, r'\$strongName\s*=\s*[\'"][0-9A-F]{32}[\'"]', 'gwt_strong_name', 'gwt'),
|
||||
(3, r'\$gwt_version\s*=\s*[\'"][0-9.]+[\'"]', 'gwt_version', 'gwt'),
|
||||
(4, r'(?:function|var)\s+[a-zA-Z$_]+\s*=\s*\{\s*[a-zA-Z$_]+:\s*window,\s*[a-zA-Z$_]+:\s*document\s*\}', 'gwt_window_doc_pair', 'gwt'),
|
||||
(3, r'\b(?:gwtOnLoad|__gwtStatsEvent|gwtOnLoadFunc)\b', 'gwt_onload', 'gwt'),
|
||||
(3, r'\.setAttribute\([\'"]__gwt_property[\'"]', 'gwt_property_attr', 'gwt'),
|
||||
(4, r'document\.createElement\([\'"]script[\'"]\).*?\.src\s*=.*?\.cache\.js', 'gwt_cache_js', 'gwt'),
|
||||
|
||||
# --- Mouse/event-triggered drop chains -------------------------------
|
||||
(4, r'(?:document|window)\.on(?:mousemove|load|mouseover)\s*=\s*function\s*\(\s*\)\s*\{[^}]*?data:application/[^}]*?\.click\(\)[^}]*?(?:removeChild|remove)\(', 'mouse_event_drop', 'writer'),
|
||||
(4, r'(?:window|var|let)\.\w+Triggered\s*=\s*(?:true|false).*?(?:navigator|platform).*?data:application/[^;]+;base64,.*?\.(?:download|click)', 'triggered_flag_drop', 'writer'),
|
||||
(4, r'navigator\[?["\']platform["\']\]?.*?(?:document|window)\.on\w+.*?data:application/', 'platform_event_drop', 'writer'),
|
||||
|
||||
# --- Generic split/concat/reverse obfuscation ------------------------
|
||||
(3, r'\[[\'"][^\'\"]+[\'"]\s*\+\s*[\'"][^\'\"]+[\'"]\]', 'string_concat_index', 'obfuscation'),
|
||||
(3, r"\[\'[a-z]+\'\s*\+\s*\'[a-z]+\'\]", 'concat_lower_index', 'obfuscation'),
|
||||
(3, r"\[\s*(?:[\'\"]\w?[\'\"](?:\s*,\s*)?){4,}\s*\]\.join\s*\(\s*[\'\"]*\s*\)", 'array_join_join', 'obfuscation'),
|
||||
(3, r'const\s+\w+\s*=\s*\[\s*(?:[\'"]\w?[\'"](?:\s*,\s*)?){4,}', 'const_char_array', 'obfuscation'),
|
||||
(4, r'(\[(?:\][^(]*|\[\])[^(]*|\w+\.)constructor\s*\(\s*([\'"])return\s*\w+\2\s*\)', 'constructor_return', 'obfuscation'),
|
||||
(4, r'Function\s*\(\s*[\'"]return\s+\w+[\'"](?:\s*\)\s*\(\s*\)|\(\))', 'function_return', 'obfuscation'),
|
||||
(3, r'\w+\.split\s*\(\s*[\'"][\'\"]?\s*\)\.reverse\s*\(\s*\)\.join\s*\(', 'split_reverse_join', 'obfuscation'),
|
||||
(3, r'\[\s*\w+\.split\s*\(\s*[\'"][\'"]\s*\)\.reverse\s*\(\s*\)', 'array_split_reverse', 'obfuscation'),
|
||||
(3, r'setTimeout\s*\(\s*(?:function|\(\)|[^,]+)\s*(?:=>)?\s*\{[\s\S]{10,}?setTimeout\s*\(', 'nested_settimeout', 'obfuscation'),
|
||||
(4, r'setTimeout\s*\([^{)]*\{[^{}]*setTimeout\s*\([^{)]*\{[^{}]*\}', 'double_settimeout', 'obfuscation'),
|
||||
(4, r'new\s*\([^)]*\[\s*(?:[\'"][^\'\"]+[\'"]\.split|[\'"]\w+[\'"]\.split)', 'new_with_split_index', 'obfuscation'),
|
||||
(3, r'\[[^\]]*(?:join|reverse)[^\]]*\]\s*\(\s*(?:\w+|[\'"][^\'"]*[\'"])\s*\)', 'index_join_reverse', 'obfuscation'),
|
||||
(3, r'\[\s*(?:urlMethod|parts\.join\(\)|[\'"]\w+[\'"]\s*\+)', 'partsjoin_index', 'obfuscation'),
|
||||
(4, r'\w+\s*\[\s*(?:[\'"][^\'\"]+[\'"](?:\s*\+\s*)?)+\s*\]\s*\(\s*\w+\s*\)', 'concat_call', 'obfuscation'),
|
||||
|
||||
# --- "down" + "load" decomposition (extremely common) ----------------
|
||||
(3, r'[\'"]?down[\'"]?\s*\+\s*[\'"]?load[\'"]?', 'down_plus_load', 'obfuscation'),
|
||||
(4, r"\['down' \+ 'load'\]", 'down_load_bracket_exact', 'obfuscation'),
|
||||
(4, r'createElement\s*\(\s*[\'"]a[\'"]\s*\)[^}]*?\[\s*[\'"]\w+[\'"]\s*\+\s*[\'"]\w+[\'"]\s*\]', 'createanchor_concat_attr', 'writer'),
|
||||
(3, r"\['style'\]\['visi' \+ 'bility'\]", 'visibility_concat', 'obfuscation'),
|
||||
|
||||
# --- Chunked-substr + dataset-based payload chains -------------------
|
||||
(3, r'function\s+\w+Chunks\s*\([^)]*\)\s*\{[^{}]*for\s*\([^{}]*\)\s*\{[^{}]*substr', 'chunk_substr_loop', 'encoding'),
|
||||
(2, r'\.substr\s*\(\s*\w+\s*,\s*\w+Size\s*\)', 'substr_size_param', 'encoding'),
|
||||
(4, r'\(async\s*\(\s*\)\s*=>\s*\{\s*(?:let|var|const)\s+d\s*=.*?(?:document\.getElementById|document\.querySelector).*?dataset.*?\.href\s*=\s*d.*?\.download\s*=.*?\.click\s*\(\s*\)', 'async_dataset_click', 'writer'),
|
||||
(4, r'\bdocument\.getElementById\s*\(\s*[\'"]data[\'"]\s*\).*?\.dataset\.file.*?createElement\s*\(\s*[\'"]a[\'"]\s*\).*?\.download\s*=', 'data_div_dataset_anchor', 'writer'),
|
||||
(3, r'<div[^>]*id\s*=\s*["\']data["\'][^>]*data-file\s*=\s*["\'][A-Za-z0-9+/=]{50,}["\'][^>]*>', 'data_div_with_b64', 'writer'),
|
||||
(4, r'<script>\s*\(\s*async\s*\(\s*\)\s*=>\s*\{[^}]*createElement\s*\(\s*[\'"]a[\'"]\s*\)[^}]*\.click\s*\(\s*\)[^}]*\.remove\s*\(\s*\)', 'inline_async_click_remove', 'writer'),
|
||||
(4, r'\b(?:atob|decodeURIComponent)\s*\([^)]*(?:dataset|getAttribute)\s*\.[^)]*\)[^;]*\.href\s*=[^;]*\.download\s*=[^;]*\.click\s*\(\s*\)', 'decode_dataset_click', 'writer'),
|
||||
(4, r'\bdocument\.body\.appendChild\s*\([^)]+\)[^;]*\.click\s*\(\s*\)[^;]*\.remove\s*\(\s*\)', 'append_click_remove', 'writer'),
|
||||
]
|
||||
|
||||
# Quick-reject filter -- skip the full regex pass on obviously-clean HTML.
|
||||
_QUICK_CHECK = re.compile(
|
||||
r'blob|atob|download|base64|arraybuffer|uint8array|createobjecturl|fromcharcode',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_THRESHOLD = 4
|
||||
_MAX_BYTES = 5 * 1024 * 1024 # 5 MiB cap on what we read for the scan
|
||||
|
||||
# Pre-compile patterns once at import time.
|
||||
_RE_FLAGS = re.IGNORECASE | re.DOTALL
|
||||
_COMPILED = [(w, re.compile(p, _RE_FLAGS), n, c) for w, p, n, c in _PATTERNS]
|
||||
_HIGH = [t for t in _COMPILED if t[0] >= 3]
|
||||
_LOW = [t for t in _COMPILED if t[0] < 3]
|
||||
|
||||
|
||||
def get_html_smuggle_info(filepath: str) -> Dict:
|
||||
"""Public entry. Returns `{html_smuggle_info: {...}}` or `{html_smuggle_info: None}`
|
||||
on read error -- mirrors `get_office_info` / `get_lnk_info` shape so file_io can
|
||||
do `file_info.update(result)` without conditionals."""
|
||||
try:
|
||||
size = os.path.getsize(filepath)
|
||||
with open(filepath, 'rb') as f:
|
||||
raw = f.read(_MAX_BYTES)
|
||||
content = raw.decode('utf-8', errors='replace')
|
||||
truncated = size > len(raw)
|
||||
except OSError as e:
|
||||
return {'html_smuggle_info': {'error': f'read failed: {e}'}}
|
||||
|
||||
features = _features(content)
|
||||
iocs = _iocs(content)
|
||||
|
||||
if not _QUICK_CHECK.search(content):
|
||||
return {'html_smuggle_info': _build(False, 0, [], features, iocs, truncated)}
|
||||
|
||||
score, matches = _scan(content, _HIGH, _THRESHOLD)
|
||||
if score < _THRESHOLD and score >= max(0, _THRESHOLD - 2):
|
||||
extra_score, extra_matches = _scan(content, _LOW, _THRESHOLD - score)
|
||||
score += extra_score
|
||||
matches += extra_matches
|
||||
|
||||
return {'html_smuggle_info': _build(score >= _THRESHOLD, score, matches, features, iocs, truncated)}
|
||||
|
||||
|
||||
def _scan(content: str, patterns, max_score: int):
|
||||
score = 0
|
||||
matches: List[Dict] = []
|
||||
for weight, rx, name, category in patterns:
|
||||
if rx.search(content):
|
||||
score += weight
|
||||
matches.append({'name': name, 'category': category, 'weight': weight})
|
||||
if score >= max_score:
|
||||
break
|
||||
return score, matches
|
||||
|
||||
|
||||
def _features(content: str) -> Dict:
|
||||
"""Surface-level counts -- mirror SmuggleShield's MLDetector feature set."""
|
||||
base64_lengths = [
|
||||
len(m.group(0))
|
||||
for m in re.finditer(r'[A-Za-z0-9+/=]{50,}', content)
|
||||
]
|
||||
return {
|
||||
'file_size': len(content),
|
||||
'has_blob': bool(re.search(r'\bblob\s*\(', content, re.IGNORECASE)),
|
||||
'has_atob': bool(re.search(r'\batob\s*\(', content, re.IGNORECASE)),
|
||||
'has_uint8array': bool(re.search(r'\buint8array\b', content, re.IGNORECASE)),
|
||||
'has_createobjecturl': bool(re.search(r'createobjecturl', content, re.IGNORECASE)),
|
||||
'has_download_attr': bool(re.search(r'\bdownload\s*=\s*[\'"][^\'"]+[\'"]', content, re.IGNORECASE)),
|
||||
'has_fromcharcode': bool(re.search(r'fromcharcode', content, re.IGNORECASE)),
|
||||
'script_tags': len(re.findall(r'<script\b', content, re.IGNORECASE)),
|
||||
'iframe_tags': len(re.findall(r'<iframe\b', content, re.IGNORECASE)),
|
||||
'embed_tags': len(re.findall(r'<embed\b', content, re.IGNORECASE)),
|
||||
'base64_blob_count': len(base64_lengths),
|
||||
'largest_base64_chars': max(base64_lengths) if base64_lengths else 0,
|
||||
}
|
||||
|
||||
|
||||
def _iocs(content: str) -> Dict:
|
||||
"""Pull operator-readable artifacts -- attempted download filenames,
|
||||
the largest embedded base64 blob, dataset-based payload tags."""
|
||||
download_names = list({
|
||||
m.group(1)
|
||||
for m in re.finditer(r'\bdownload\s*=\s*[\'"]([^\'"]{1,100})[\'"]', content, re.IGNORECASE)
|
||||
})[:20]
|
||||
|
||||
largest_b64 = ''
|
||||
for m in re.finditer(r'[A-Za-z0-9+/=]{200,}', content):
|
||||
blob = m.group(0)
|
||||
if len(blob) > len(largest_b64):
|
||||
largest_b64 = blob
|
||||
if len(largest_b64) > 50000:
|
||||
break
|
||||
|
||||
data_file_attrs = list({
|
||||
m.group(1)[:200]
|
||||
for m in re.finditer(r'\bdata-file\s*=\s*[\'"]([A-Za-z0-9+/=]{20,})[\'"]', content, re.IGNORECASE)
|
||||
})[:10]
|
||||
|
||||
return {
|
||||
'download_filenames': download_names,
|
||||
'data_file_attrs': data_file_attrs,
|
||||
'largest_base64_blob': {
|
||||
'length': len(largest_b64),
|
||||
'preview_first_120': largest_b64[:120],
|
||||
'preview_last_120': largest_b64[-120:] if len(largest_b64) > 120 else '',
|
||||
} if largest_b64 else None,
|
||||
}
|
||||
|
||||
|
||||
def _build(is_smuggling: bool, score: int, matches, features, iocs, truncated: bool) -> Dict:
|
||||
by_category: Dict[str, int] = {}
|
||||
for m in matches:
|
||||
by_category[m['category']] = by_category.get(m['category'], 0) + 1
|
||||
|
||||
notes: List[str] = []
|
||||
if is_smuggling:
|
||||
notes.append(
|
||||
f"HTML smuggling detected -- pattern score {score} >= threshold {_THRESHOLD} "
|
||||
f"({len(matches)} pattern{'s' if len(matches) != 1 else ''} fired)"
|
||||
)
|
||||
elif score > 0:
|
||||
notes.append(f"Suspicious patterns present but below threshold ({score}/{_THRESHOLD})")
|
||||
if features.get('largest_base64_chars', 0) >= 1000:
|
||||
notes.append(
|
||||
f"Large base64 blob present ({features['largest_base64_chars']} chars) "
|
||||
f"-- typical of smuggled binary payload"
|
||||
)
|
||||
if features.get('has_download_attr') and features.get('has_blob'):
|
||||
notes.append("Combination of <a download> + Blob -- classic smuggling-writer chain")
|
||||
if features.get('has_atob') and features.get('has_uint8array'):
|
||||
notes.append("atob() + Uint8Array decode chain present")
|
||||
|
||||
return {
|
||||
'is_smuggling': is_smuggling,
|
||||
'score': score,
|
||||
'threshold': _THRESHOLD,
|
||||
'matched_patterns': matches,
|
||||
'matched_categories': by_category,
|
||||
'features': features,
|
||||
'iocs': iocs,
|
||||
'truncated': truncated,
|
||||
'detection_notes': notes,
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
# app/utils/lnk.py
|
||||
"""Windows shortcut (.lnk) analyzer.
|
||||
|
||||
Runs at upload time on .lnk files (alongside `get_pe_info`, `get_office_info`,
|
||||
`get_html_smuggle_info`). Output lands in `file_info.lnk_info`.
|
||||
|
||||
Heavy lifting is in `app.analyzers.static.lnk_parser.LnkForensics`; this
|
||||
module is a thin wrapper that adapts the parser to the file_io drop-in
|
||||
contract (returns `{lnk_info: {...}}` ready for `file_info.update(...)`).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict
|
||||
|
||||
from ..analyzers.static.lnk_parser import LnkForensics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_lnk_info(filepath: str) -> Dict:
|
||||
"""Public entry. Returns `{lnk_info: <dict or None>}`."""
|
||||
try:
|
||||
lnk = LnkForensics(filepath)
|
||||
if not lnk.is_valid():
|
||||
return {'lnk_info': None}
|
||||
return {'lnk_info': lnk.get_forensic_data()}
|
||||
except Exception as e:
|
||||
logger.warning(f"LNK analysis failed on {filepath}: {e}")
|
||||
return {'lnk_info': None}
|
||||
@@ -0,0 +1,220 @@
|
||||
# app/utils/office.py
|
||||
"""Office document analyzer.
|
||||
|
||||
Runs at upload time on Word / Excel / RTF / legacy CFBF binaries (alongside
|
||||
`get_pe_info`, `get_lnk_info`, `get_html_smuggle_info`). Output lands in
|
||||
`file_info.office_info`.
|
||||
|
||||
Two analysis branches:
|
||||
|
||||
1. olevba -- VBA / XLM macros embedded in the file. Pulls per-module
|
||||
source, autoexec triggers, suspicious keyword hits, IOCs.
|
||||
|
||||
2. OOXML rels inspection -- external `attachedTemplate` / `oleObject` /
|
||||
`subDocument` / `frame` references. Catches T1221 (Remote Template
|
||||
Injection) which is invisible to olevba because the malicious VBA
|
||||
lives in a remote .dotm, not in the file itself. Atomic Red Team's
|
||||
`Calculator.docx` is the canonical example.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import Dict, List
|
||||
|
||||
from oletools.olevba import VBA_Parser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Relationship Types we care about when they target an external (HTTP/UNC)
|
||||
# resource. `attachedTemplate` is the T1221 vector. The others pull remote
|
||||
# content the same way; less common but the same class of risk.
|
||||
_INTERESTING_RELS = (
|
||||
'attachedTemplate',
|
||||
'oleObject',
|
||||
'subDocument',
|
||||
'frame',
|
||||
'image', # rare but seen in malicious docs that fetch tracking pixels
|
||||
'hyperlink',
|
||||
)
|
||||
|
||||
|
||||
def get_office_info(filepath: str, malapi_path=None) -> Dict:
|
||||
"""Public entry. Returns `{office_info: {...}}` -- mirrors `get_lnk_info` /
|
||||
`get_html_smuggle_info` shape so file_io can do `file_info.update(result)`
|
||||
without conditionals.
|
||||
|
||||
`malapi_path` is accepted for back-compat with the old SecurityAnalyzer
|
||||
delegation but isn't used here -- the office analyzer doesn't need
|
||||
MalAPI lookups.
|
||||
"""
|
||||
info = {
|
||||
'file_type': 'Microsoft Office Document',
|
||||
'has_macros': False,
|
||||
'modules': [], # [{stream, vba_filename, code}]
|
||||
'analysis': {
|
||||
'autoexec': [], # [{keyword, description}] auto-execution triggers
|
||||
'suspicious': [], # [{keyword, description}] suspicious keyword hits
|
||||
'iocs': [], # [{type, value}] extracted URLs / IPs / EXEs / etc.
|
||||
'hex_strings': [],
|
||||
'base64_strings': [],
|
||||
'vba_strings': [],
|
||||
},
|
||||
'external_refs': [], # external relationship targets (T1221 etc.)
|
||||
'detection_notes': [],
|
||||
}
|
||||
|
||||
_run_olevba(filepath, info)
|
||||
_run_external_rels(filepath, info)
|
||||
|
||||
return {'office_info': info}
|
||||
|
||||
|
||||
def _run_olevba(filepath: str, info: Dict) -> None:
|
||||
"""Branch 1 -- VBA / XLM macro analysis via oletools.olevba."""
|
||||
try:
|
||||
vbaparser = VBA_Parser(filepath)
|
||||
except Exception as e:
|
||||
logger.warning(f"olevba init failed on {filepath}: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
if not vbaparser.detect_vba_macros():
|
||||
return
|
||||
|
||||
info['has_macros'] = True
|
||||
|
||||
# Per-module source code: (filename, stream_path, vba_filename, vba_code)
|
||||
for _, stream, vba_fname, vba_code in vbaparser.extract_macros():
|
||||
if vba_code:
|
||||
info['modules'].append({
|
||||
'stream': stream,
|
||||
'vba_filename': vba_fname,
|
||||
'code': vba_code,
|
||||
})
|
||||
|
||||
# Structured analysis -- olevba returns (kw_type, keyword, description)
|
||||
for kw_type, keyword, description in vbaparser.analyze_macros():
|
||||
kt = (kw_type or '').lower()
|
||||
entry = {'keyword': keyword, 'description': description}
|
||||
if kt == 'autoexec':
|
||||
info['analysis']['autoexec'].append(entry)
|
||||
elif kt == 'suspicious':
|
||||
info['analysis']['suspicious'].append(entry)
|
||||
elif kt == 'iocs':
|
||||
info['analysis']['iocs'].append({'type': keyword, 'value': description})
|
||||
elif kt == 'hex string':
|
||||
info['analysis']['hex_strings'].append(entry)
|
||||
elif kt == 'base64 string':
|
||||
info['analysis']['base64_strings'].append(entry)
|
||||
elif kt in ('vba string', 'vba_string'):
|
||||
info['analysis']['vba_strings'].append(entry)
|
||||
|
||||
a = info['analysis']
|
||||
if a['autoexec']:
|
||||
info['detection_notes'].append(
|
||||
f"{len(a['autoexec'])} auto-execution trigger"
|
||||
f"{'s' if len(a['autoexec']) != 1 else ''} detected"
|
||||
)
|
||||
if a['suspicious']:
|
||||
info['detection_notes'].append(
|
||||
f"{len(a['suspicious'])} suspicious keyword"
|
||||
f"{'s' if len(a['suspicious']) != 1 else ''} in macro body"
|
||||
)
|
||||
if a['iocs']:
|
||||
info['detection_notes'].append(
|
||||
f"{len(a['iocs'])} IOC"
|
||||
f"{'s' if len(a['iocs']) != 1 else ''} extracted from macro"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"olevba analysis failed on {filepath}: {e}")
|
||||
finally:
|
||||
try:
|
||||
vbaparser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _run_external_rels(filepath: str, info: Dict) -> None:
|
||||
"""Branch 2 -- T1221 / external-relationship inspection."""
|
||||
try:
|
||||
external = _scan_external_relationships(filepath)
|
||||
except Exception as e:
|
||||
logger.warning(f"External-rels scan failed on {filepath}: {e}")
|
||||
return
|
||||
|
||||
if not external:
|
||||
return
|
||||
|
||||
info['external_refs'] = external
|
||||
|
||||
t1221 = [r for r in external if r['relationship'] == 'attachedTemplate']
|
||||
if t1221:
|
||||
info['detection_notes'].append(
|
||||
f"MITRE T1221: Remote Template Injection -- {len(t1221)} "
|
||||
f"external `attachedTemplate` reference"
|
||||
f"{'s' if len(t1221) != 1 else ''}. "
|
||||
f"Malicious VBA likely lives in the remote target, not in this file."
|
||||
)
|
||||
|
||||
ole_remote = [r for r in external if r['relationship'] == 'oleObject']
|
||||
if ole_remote:
|
||||
info['detection_notes'].append(
|
||||
f"{len(ole_remote)} external OLE-object reference"
|
||||
f"{'s' if len(ole_remote) != 1 else ''} -- remote-fetched embedded payload"
|
||||
)
|
||||
|
||||
subdoc = [r for r in external if r['relationship'] == 'subDocument']
|
||||
if subdoc:
|
||||
info['detection_notes'].append(
|
||||
f"{len(subdoc)} external subDocument reference"
|
||||
f"{'s' if len(subdoc) != 1 else ''}"
|
||||
)
|
||||
|
||||
|
||||
def _scan_external_relationships(filepath: str) -> List[Dict]:
|
||||
"""Walk every `*.rels` file inside an OOXML container and return the list
|
||||
of relationships whose `TargetMode` is `External` AND whose Type is one
|
||||
of `_INTERESTING_RELS`. Returns `[]` for non-zip files (legacy CFBF
|
||||
.doc/.xls binaries).
|
||||
"""
|
||||
if not zipfile.is_zipfile(filepath):
|
||||
return []
|
||||
|
||||
findings: List[Dict] = []
|
||||
try:
|
||||
with zipfile.ZipFile(filepath) as z:
|
||||
rels_files = [n for n in z.namelist() if n.endswith('.rels')]
|
||||
for rels_name in rels_files:
|
||||
try:
|
||||
data = z.read(rels_name)
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
root = ET.fromstring(data)
|
||||
except ET.ParseError:
|
||||
continue
|
||||
|
||||
for rel in root.iter():
|
||||
tag = rel.tag.rsplit('}', 1)[-1] if '}' in rel.tag else rel.tag
|
||||
if tag != 'Relationship':
|
||||
continue
|
||||
if rel.attrib.get('TargetMode', '').lower() != 'external':
|
||||
continue
|
||||
rel_type = rel.attrib.get('Type', '')
|
||||
target = rel.attrib.get('Target', '')
|
||||
rel_name = rel_type.rsplit('/', 1)[-1] if '/' in rel_type else rel_type
|
||||
if rel_name not in _INTERESTING_RELS:
|
||||
continue
|
||||
findings.append({
|
||||
'rels_file': rels_name,
|
||||
'relationship': rel_name,
|
||||
'target': target,
|
||||
'target_mode': 'External',
|
||||
'full_type': rel_type,
|
||||
})
|
||||
except zipfile.BadZipFile:
|
||||
pass
|
||||
|
||||
return findings
|
||||
Reference in New Issue
Block a user