Commit efff934e authored by allen.wang's avatar allen.wang

fix:优化报表效果

parent 1027cb42
{
"browser": {
"launchOptions": {
"args": ["--no-proxy-server"]
}
}
}
param(
[Parameter(Mandatory = $true)]
[string]$Slide,
[Parameter(Mandatory = $true)]
[string]$ReportMonth,
[string]$ConfigPath = "",
[int]$ReportYear = 0,
[int]$CompareYear = 0,
[ValidateSet("cumulative", "single")]
[string]$MonthMode = "cumulative",
[string]$TemplatePath = "",
[string]$OutputPath = ""
)
$ErrorActionPreference = "Stop"
$root = Split-Path -Parent $PSScriptRoot
function Resolve-ProjectPath {
param([string]$PathValue)
if (-not $PathValue) {
return $PathValue
}
if ([System.IO.Path]::IsPathRooted($PathValue)) {
return $PathValue
}
return [System.IO.Path]::GetFullPath((Join-Path $root $PathValue))
}
function Resolve-PythonCommand {
if (Get-Command py -ErrorAction SilentlyContinue) {
return @("py", "-3")
}
if (Get-Command python -ErrorAction SilentlyContinue) {
return @("python")
}
throw "Python launcher not found. Please install py or python."
}
if (-not $ConfigPath) {
$ConfigPath = "config.yaml"
}
if (-not $TemplatePath) {
$TemplatePath = "Report.pptx"
}
if (-not $OutputPath) {
$OutputPath = "output\generated-$($Slide.ToLower())-$ReportMonth-recrop.pptx"
}
$ConfigPath = Resolve-ProjectPath $ConfigPath
$TemplatePath = Resolve-ProjectPath $TemplatePath
$OutputPath = Resolve-ProjectPath $OutputPath
if (-not (Test-Path -LiteralPath $ConfigPath)) {
throw "ConfigPath not found: $ConfigPath"
}
if (-not (Test-Path -LiteralPath $TemplatePath)) {
throw "TemplatePath not found: $TemplatePath"
}
$pythonArgs = @(
"$root\scripts\recrop_from_raw.py",
"--config", "$ConfigPath",
"--slide", "$Slide",
"--report-month", "$ReportMonth",
"--month-mode", "$MonthMode"
)
if ($ReportYear -gt 0) {
$pythonArgs += @("--report-year", "$ReportYear")
}
if ($CompareYear -gt 0) {
$pythonArgs += @("--compare-year", "$CompareYear")
}
$pythonCommand = Resolve-PythonCommand
if ($pythonCommand.Count -eq 2) {
$opsOutput = & $pythonCommand[0] $pythonCommand[1] @pythonArgs 2>&1
} else {
$opsOutput = & $pythonCommand[0] @pythonArgs 2>&1
}
if ($LASTEXITCODE -ne 0) {
$details = ($opsOutput | ForEach-Object { "$_" }) -join [Environment]::NewLine
throw "recrop_from_raw.py failed with exit code $LASTEXITCODE`n$details"
}
$opsPath = ($opsOutput | Select-Object -Last 1).ToString().Trim()
if (-not $opsPath) {
throw "recrop_from_raw.py did not return an operations path."
}
powershell -ExecutionPolicy Bypass -File "$root\bin\vip-report-render.ps1" -TemplatePath "$TemplatePath" -OutputPath "$OutputPath" -OperationsPath "$opsPath" | Out-Null
if ($LASTEXITCODE -ne 0) {
throw "vip-report-render.ps1 failed with exit code $LASTEXITCODE"
}
Write-Output $OutputPath
Pillow
PyMySQL
PyYAML
lxml
matplotlib
numpy
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any
import yaml
from tableau_export import normalize_image_size
SUPPORTED_SLIDES = {"S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S13"}
MONTHLY_SALES_SLIDES = {"S02", "S03"}
INVENTORY_MONTHLY_SLIDES = {"S04", "S05", "S06", "S07", "S08"}
TOP_PRODUCTS_SLIDES = {"S09", "S10"}
WAREHOUSE_SLIDES = {"S13"}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Re-crop already-downloaded Tableau images and render one slide operation set."
)
parser.add_argument(
"--config",
default=str((Path(__file__).resolve().parents[1] / "config.yaml")),
help="Path to config.yaml",
)
parser.add_argument("--slide", required=True, help="Single slide code, for example S02")
parser.add_argument("--report-month", required=True, help="Report month, for example 三月 / 3 / 03")
parser.add_argument("--report-year", type=int, default=0, help="Report year override")
parser.add_argument("--compare-year", type=int, default=0, help="Compare year override")
parser.add_argument(
"--month-mode",
choices=["cumulative", "single"],
default="cumulative",
help="Monthly-sales month mode, default cumulative",
)
return parser.parse_args()
def load_config(config_path: Path) -> dict[str, Any]:
if not config_path.exists():
raise FileNotFoundError(f"Config not found: {config_path}")
return yaml.safe_load(config_path.read_text(encoding="utf-8"))
def normalize_slide(slide: str) -> str:
value = slide.strip().upper()
if not value.startswith("S"):
value = f"S{int(value):02d}"
if value not in SUPPORTED_SLIDES:
raise ValueError(f"Unsupported slide: {slide}. Supported: {','.join(sorted(SUPPORTED_SLIDES))}")
return value
def resolve_years(args: argparse.Namespace, config: dict[str, Any]) -> tuple[int, int]:
report_cfg = config.get("report", {})
report_year = int(args.report_year or report_cfg.get("year", 2026))
compare_year = int(args.compare_year or report_cfg.get("compare_year", report_year - 1))
return report_year, compare_year
def load_manifest(manifest_path: Path) -> dict[str, Any]:
if not manifest_path.exists():
return {}
return json.loads(manifest_path.read_text(encoding="utf-8"))
def build_raw_index(manifest: dict[str, Any]) -> dict[str, Path]:
raw_by_capture: dict[str, Path] = {}
for capture in manifest.get("source", {}).get("captures", []):
capture_id = str(capture.get("capture_id", "")).strip()
raw_path = capture.get("raw_screenshot")
if not capture_id or not raw_path:
continue
resolved = Path(raw_path)
if resolved.exists():
raw_by_capture[capture_id] = resolved
for asset in manifest.get("assets", []):
capture_id = str(asset.get("source_capture_id") or asset.get("capture_id") or "").strip()
raw_path = asset.get("raw_screenshot")
if not capture_id or not raw_path:
continue
resolved = Path(raw_path)
if resolved.exists():
raw_by_capture[capture_id] = resolved
return raw_by_capture
def resolve_raw_path(
*,
capture_id: str,
captures_by_id: dict[str, dict[str, Any]],
raw_by_capture: dict[str, Path],
workdir: Path,
) -> Path:
if capture_id in raw_by_capture and raw_by_capture[capture_id].exists():
return raw_by_capture[capture_id]
capture = captures_by_id.get(capture_id)
if capture:
candidate = (workdir / capture["raw_screenshot_name"]).resolve()
if candidate.exists():
raw_by_capture[capture_id] = candidate
return candidate
raise FileNotFoundError(
f"Raw screenshot not found for capture_id={capture_id}. "
"Please run Tableau sync for this slide once first."
)
def normalize_s04_overall_raw_if_needed(
*,
slide: str,
capture_id: str,
source_path: Path,
capture_spec: dict[str, Any] | None,
) -> Path:
if slide != "S04" or capture_id != "overall" or not capture_spec:
return source_path
normalize_size = capture_spec.get("normalize_download_size")
if not normalize_size:
return source_path
normalize_image_size(source_path, normalize_size)
return source_path
def append_replace_image(
operations: dict[str, list[Any]],
*,
slide: int,
shape_id: int,
shape_name: str,
image_path: Path,
) -> None:
operations["replace_images"].append(
{
"slide": slide,
"shape_id": shape_id,
"shape_name": shape_name,
"image_path": str(image_path),
}
)
def recrop_monthly_sales(
*,
slide: str,
report_month: str,
report_year: int,
compare_year: int,
month_mode: str,
workdir: Path,
raw_by_capture: dict[str, Path],
operations: dict[str, list[Any]],
) -> None:
import sync_monthly_sales_assets as monthly_sales
specs = monthly_sales.build_specs(report_month, report_year, compare_year, month_mode)
captures_by_id = {item["capture_id"]: item for item in specs["captures"]}
filtered_assets = [item for item in specs["assets"] if item["slide_code"] == slide]
asset_dir = workdir / "assets" / "monthly-sales"
asset_dir.mkdir(parents=True, exist_ok=True)
for asset in filtered_assets:
source_path = resolve_raw_path(
capture_id=asset["capture_id"],
captures_by_id=captures_by_id,
raw_by_capture=raw_by_capture,
workdir=workdir,
)
target_path = asset_dir / f"{asset['asset_name']}.png"
if "composite" in asset:
monthly_sales.compose_panels(source_path, target_path, asset["composite"])
else:
monthly_sales.crop_image(
source_path,
target_path,
asset["crop"],
resize_to=asset.get("resize_to"),
)
append_replace_image(
operations,
slide=asset["slide"],
shape_id=asset["shape_id"],
shape_name=asset["shape_name"],
image_path=target_path,
)
def recrop_inventory_monthly(
*,
slide: str,
report_month: str,
report_year: int,
compare_year: int,
workdir: Path,
raw_by_capture: dict[str, Path],
operations: dict[str, list[Any]],
) -> None:
import sync_inventory_monthly_assets as inventory
specs = inventory.build_specs(report_month, report_year, compare_year)
captures_by_id = {item["capture_id"]: item for item in specs["captures"]}
filtered_assets = [item for item in specs["assets"] if item["slide_code"] == slide]
asset_dir = workdir / "assets" / "inventory-monthly"
asset_dir.mkdir(parents=True, exist_ok=True)
for asset in filtered_assets:
source_path = resolve_raw_path(
capture_id=asset["capture_id"],
captures_by_id=captures_by_id,
raw_by_capture=raw_by_capture,
workdir=workdir,
)
source_path = normalize_s04_overall_raw_if_needed(
slide=slide,
capture_id=asset["capture_id"],
source_path=source_path,
capture_spec=captures_by_id.get(asset["capture_id"]),
)
target_path = asset_dir / f"{asset['asset_name']}.png"
inventory.crop_image(
source_path,
target_path,
asset["crop"],
resize_to=asset.get("resize_to"),
)
append_replace_image(
operations,
slide=asset["slide"],
shape_id=asset["shape_id"],
shape_name=asset["shape_name"],
image_path=target_path,
)
def recrop_warehouse(
*,
slide: str,
report_month: str,
report_year: int,
compare_year: int,
workdir: Path,
raw_by_capture: dict[str, Path],
operations: dict[str, list[Any]],
) -> None:
import sync_warehouse_100060_assets as warehouse
month_label = warehouse.normalize_month_label(report_month)
month_number = warehouse.month_label_to_number(month_label)
specs = warehouse.build_specs(month_label, month_number, report_year, compare_year)
captures_by_id = {item["capture_id"]: item for item in specs["captures"]}
filtered_assets = [item for item in specs["assets"] if item["slide_code"] == slide]
asset_dir = workdir / "assets" / "warehouse-100060"
asset_dir.mkdir(parents=True, exist_ok=True)
for asset in filtered_assets:
source_path = resolve_raw_path(
capture_id=asset["capture_id"],
captures_by_id=captures_by_id,
raw_by_capture=raw_by_capture,
workdir=workdir,
)
target_path = asset_dir / f"{asset['asset_name']}.png"
warehouse.crop_image(
source_path,
target_path,
asset["crop"],
resize_to=asset.get("resize_to"),
)
append_replace_image(
operations,
slide=asset["slide"],
shape_id=asset["shape_id"],
shape_name=asset["shape_name"],
image_path=target_path,
)
def recrop_top_products(
*,
slide: str,
workdir: Path,
manifest: dict[str, Any],
raw_by_capture: dict[str, Path],
operations: dict[str, list[Any]],
) -> None:
import sync_top_products_assets as top_products
manifest_assets = [
item for item in manifest.get("assets", []) if str(item.get("slide_code", "")).upper() == slide
]
if not manifest_assets:
raise FileNotFoundError(
f"No existing top-products manifest assets for {slide}. "
"Please run top-products Tableau sync once first."
)
chart_spec = top_products.SLIDE_SPECS[slide]
captures_by_id = {chart_spec["capture_id"]: {"raw_screenshot_name": chart_spec["raw_screenshot_name"]}}
chart_source = resolve_raw_path(
capture_id=chart_spec["capture_id"],
captures_by_id=captures_by_id,
raw_by_capture=raw_by_capture,
workdir=workdir,
)
for asset in manifest_assets:
target_path = Path(asset["asset_path"]).resolve()
target_path.parent.mkdir(parents=True, exist_ok=True)
if asset.get("asset_name") == chart_spec["asset_name"]:
top_products.crop_image(
chart_source,
target_path,
chart_spec["crop"],
resize_to=chart_spec.get("resize_to"),
)
elif not target_path.exists():
raise FileNotFoundError(
f"Top-products dependent asset is missing: {target_path}. "
"Please run top-products Tableau sync once first."
)
append_replace_image(
operations,
slide=int(asset["slide"]),
shape_id=int(asset["shape_id"]),
shape_name=str(asset["shape_name"]),
image_path=target_path,
)
def main() -> None:
args = parse_args()
slide = normalize_slide(args.slide)
script_path = Path(__file__).resolve()
scripts_dir = script_path.parent
if str(scripts_dir) not in sys.path:
sys.path.insert(0, str(scripts_dir))
config_path = Path(args.config).resolve()
config = load_config(config_path)
report_year, compare_year = resolve_years(args, config)
workdir = Path(config["paths"]["workdir"])
if not workdir.is_absolute():
workdir = (config_path.parent / workdir).resolve()
manifest_path_map = {
"monthly-sales": workdir / "data" / "monthly-sales" / "monthly-sales-assets.live.json",
"inventory-monthly": workdir / "data" / "inventory-monthly" / "inventory-monthly-assets.live.json",
"top-products": workdir / "data" / "top-products" / "top-products-assets.live.json",
"warehouse-100060": workdir / "data" / "warehouse-100060" / "warehouse-100060-assets.live.json",
}
if slide in MONTHLY_SALES_SLIDES:
manifest = load_manifest(manifest_path_map["monthly-sales"])
elif slide in INVENTORY_MONTHLY_SLIDES:
manifest = load_manifest(manifest_path_map["inventory-monthly"])
elif slide in TOP_PRODUCTS_SLIDES:
manifest = load_manifest(manifest_path_map["top-products"])
elif slide in WAREHOUSE_SLIDES:
manifest = load_manifest(manifest_path_map["warehouse-100060"])
else:
raise ValueError(f"Unsupported slide: {slide}")
raw_by_capture = build_raw_index(manifest)
operations: dict[str, list[Any]] = {
"replace_text": [],
"replace_tables": [],
"replace_charts": [],
"replace_images": [],
}
if slide in MONTHLY_SALES_SLIDES:
recrop_monthly_sales(
slide=slide,
report_month=args.report_month,
report_year=report_year,
compare_year=compare_year,
month_mode=args.month_mode,
workdir=workdir,
raw_by_capture=raw_by_capture,
operations=operations,
)
elif slide in INVENTORY_MONTHLY_SLIDES:
recrop_inventory_monthly(
slide=slide,
report_month=args.report_month,
report_year=report_year,
compare_year=compare_year,
workdir=workdir,
raw_by_capture=raw_by_capture,
operations=operations,
)
elif slide in TOP_PRODUCTS_SLIDES:
recrop_top_products(
slide=slide,
workdir=workdir,
manifest=manifest,
raw_by_capture=raw_by_capture,
operations=operations,
)
elif slide in WAREHOUSE_SLIDES:
recrop_warehouse(
slide=slide,
report_month=args.report_month,
report_year=report_year,
compare_year=compare_year,
workdir=workdir,
raw_by_capture=raw_by_capture,
operations=operations,
)
output_ops_path = workdir / f"render-ops.recrop.{slide.lower()}.json"
output_ops_path.write_text(json.dumps(operations, ensure_ascii=False, indent=2), encoding="utf-8")
print(str(output_ops_path))
if __name__ == "__main__":
main()
from __future__ import annotations
import json
from pathlib import Path
from PIL import Image
DEFAULT_STALE_DIALOG_SELECTORS = [
'[data-tb-test-id="FileDownload-Dlg-Dialog-Glass-Root"]',
'[data-tb-test-id="FileDownload-Dlg-Dialog-Floater-Root"]',
'[data-tb-test-id="detailedErrorDialog-Dialog-Glass-Root"]',
'[data-tb-test-id="detailedErrorDialog-Dialog-Glass"]',
]
def build_export_image_js(
inner_frame_fragment: str,
output_path: str,
*,
viewport: dict[str, int] | None = None,
tableau_wait_ms: int = 90000,
download_wait_ms: int = 60000,
dialog_selectors: list[str] | None = None,
) -> str:
"""Build Playwright code that exports the currently filtered Tableau view as an image."""
payload = json.dumps(
{
"inner_frame_fragment": inner_frame_fragment,
"output_path": output_path,
"viewport": viewport or {},
"tableau_wait_ms": tableau_wait_ms,
"download_wait_ms": download_wait_ms,
"dialog_selectors": dialog_selectors or DEFAULT_STALE_DIALOG_SELECTORS,
},
ensure_ascii=False,
)
return f"""async function(page) {{
const spec = {payload};
if (spec.viewport && spec.viewport.width && spec.viewport.height) {{
await page.setViewportSize({{
width: Number(spec.viewport.width),
height: Number(spec.viewport.height),
}});
}}
await page.waitForLoadState('domcontentloaded').catch(() => null);
async function inspectFrame(frame, preferredFragment) {{
const frameUrl = typeof frame.url === 'function' ? frame.url() : '';
try {{
const snapshot = await frame.evaluate(() => {{
const vizCount = window.tableau?.VizManager?.getVizs?.().length || 0;
return {{
url: location.href,
title: document.title,
readyState: document.readyState,
hasTableau: !!(window.tableau && window.tableau.VizManager),
vizCount,
}};
}});
return {{
...snapshot,
frameUrl,
matchesTargetFrame: !!(preferredFragment && frameUrl && frameUrl.includes(preferredFragment)),
}};
}} catch (error) {{
return {{
url: frameUrl,
title: null,
readyState: null,
hasTableau: false,
vizCount: 0,
frameUrl,
matchesTargetFrame: !!(preferredFragment && frameUrl && frameUrl.includes(preferredFragment)),
error: String(error && error.message || error),
}};
}}
}}
async function locateVizFrame(timeoutMs, preferredFragment) {{
const deadline = Date.now() + timeoutMs;
let lastSnapshots = [];
while (Date.now() < deadline) {{
const rankedFrames = page.frames()
.map((frame) => {{
const frameUrl = typeof frame.url === 'function' ? frame.url() : '';
const isPreferred = !!(preferredFragment && frameUrl.includes(preferredFragment));
return {{
frame,
rank: isPreferred ? 0 : (frame === page.mainFrame() ? 1 : 2),
}};
}})
.sort((left, right) => left.rank - right.rank);
lastSnapshots = [];
for (const item of rankedFrames) {{
const snapshot = await inspectFrame(item.frame, preferredFragment);
lastSnapshots.push(snapshot);
if (snapshot.vizCount > 0) {{
return {{ frame: item.frame, snapshots: lastSnapshots }};
}}
}}
const stillOnSignin =
(page.url() || '').includes('/#/signin') ||
lastSnapshots.some((snapshot) => (snapshot.url || '').includes('/#/signin'));
if (stillOnSignin) {{
throw new Error('Tableau page is still on sign-in: ' + JSON.stringify({{ pageUrl: page.url(), frames: lastSnapshots }}));
}}
const hasChromeError =
(page.url() || '').startsWith('chrome-error://') ||
lastSnapshots.some((snapshot) => (snapshot.url || '').startsWith('chrome-error://'));
if (hasChromeError) {{
throw new Error('Tableau page failed to load: ' + JSON.stringify({{ pageUrl: page.url(), frames: lastSnapshots }}));
}}
await page.waitForTimeout(1000);
}}
throw new Error('Timed out waiting for Tableau viz: ' + JSON.stringify({{ pageUrl: page.url(), frames: lastSnapshots }}));
}}
const located = await locateVizFrame(Number(spec.tableau_wait_ms || 90000), spec.inner_frame_fragment || '');
const targetFrame = located.frame;
await targetFrame.evaluate((selectors) => {{
for (const selector of selectors || []) {{
document.querySelector(selector)?.remove();
}}
}}, spec.dialog_selectors).catch(() => null);
const exportResult = await targetFrame.evaluate(async (config) => {{
const viz = window.tableau?.VizManager?.getVizs?.()?.[0];
if (!viz) {{
throw new Error('Tableau viz is not ready for export.');
}}
if (typeof viz.showExportImageDialog !== 'function') {{
throw new Error('Tableau export image dialog is not available.');
}}
const workbook = typeof viz.getWorkbook === 'function' ? viz.getWorkbook() : null;
const activeSheet = workbook && typeof workbook.getActiveSheet === 'function'
? workbook.getActiveSheet()
: null;
for (const selector of config.dialog_selectors || []) {{
document.querySelector(selector)?.remove();
}}
viz.showExportImageDialog();
return {{
frameUrl: location.href,
activeSheet: activeSheet && typeof activeSheet.getName === 'function'
? activeSheet.getName()
: null,
}};
}}, {{
dialog_selectors: spec.dialog_selectors,
}});
const downloadScopes = [
page,
targetFrame,
...page.frames().filter((frame) => frame !== targetFrame),
];
const linkCandidates = [
{{
selector: '[role="dialog"] a.tabDownloadFileButton, [role="dialog"] button.tabDownloadFileButton',
text: null,
}},
{{
selector: '[role="dialog"] button, [role="dialog"] a',
text: /^(下载|Download)$/i,
}},
{{
selector: 'a.tabDownloadFileButton, button.tabDownloadFileButton',
text: null,
}},
];
let link = null;
for (const scope of downloadScopes) {{
for (const candidate of linkCandidates) {{
const locator =
candidate.text
? scope.locator(candidate.selector).filter({{ hasText: candidate.text }}).first()
: scope.locator(candidate.selector).first();
try {{
await locator.waitFor({{ state: 'visible', timeout: 5000 }});
link = locator;
break;
}} catch (error) {{}}
}}
if (link) {{
break;
}}
}}
if (!link) {{
throw new Error('Export download button was not found after opening the Tableau image export dialog.');
}}
const [download] = await Promise.all([
page.waitForEvent('download', {{ timeout: Number(spec.download_wait_ms || 60000) }}),
link.click({{ force: true }}),
]);
await download.saveAs(spec.output_path);
return {{
...exportResult,
pageUrl: page.url(),
outputPath: spec.output_path,
suggestedFilename: download.suggestedFilename(),
frameSnapshots: located.snapshots,
}};
}}
"""
def normalize_image_size(path: Path, size: dict[str, int] | tuple[int, int]) -> None:
"""Resize an exported Tableau image to the expected crop canvas when needed."""
if isinstance(size, dict):
target_size = (int(size["width"]), int(size["height"]))
else:
target_size = (int(size[0]), int(size[1]))
with Image.open(path) as image:
if image.size == target_size:
return
resized = image.resize(target_size, Image.Resampling.LANCZOS)
resized.save(path)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment