Ancestral ranges
A mammal phylogeny with ancestral ranges reconstructed by Fitch maximum parsimony. Branch width encodes subtended terminals; node pies show the count-weighted range distribution (final phase). Toggle the layout, link curve, and coordinate mode. Hover a node pie or a branch to read out that clade’s reconstructed regions as a table of occurrence counts and shares.
fps 0frame 0 ms
Layout
Links
Coords
Tips256
import { schemeCategory10 } from "d3-scale-chromatic";import { scaleOrdinal, scaleSqrt } from "d3-scale";import { link as d3link, linkRadial, curveLinear, curveStepBefore, curveBumpX, pointRadial } from "d3-shape";import type { HierarchyPointNode, HierarchyPointLink } from "d3-hierarchy";import { plot, h } from "@mapequation/d3gl/map";import { LabelLayer, type LabelAnchor } from "@mapequation/d3gl/labels";import type { ImperativeSetup } from "../types.js";import type { TreeNode } from "../shared/tree.js";import { layoutRectangular, layoutRadial, nodeXY, type LayoutMode } from "../shared/layout.js";import { makeMammalTree, assignBioregions, REGION_NAMES } from "../shared/mammals-data.js";import { calcMaximumParsimony, aggregateClusters, aggregateSpeciesCount } from "../shared/parsimony.js";
const LINE_MIN = 1, LINE_MAX = 22; // branch-width range when scaling by subtended terminals
type SizeMode = "world" | "screen";type CurveMode = "linear" | "step" | "bump";type PNode = HierarchyPointNode<TreeNode>;type PLink = HierarchyPointLink<TreeNode>;
const regionColor = scaleOrdinal<number, string>() .domain(REGION_NAMES.map((_, i) => i)) .range(schemeCategory10 as string[]);
interface Wedge { cx: number; cy: number; r: number; a0: number; a1: number; clusterId: number; count: number; single: boolean; node: PNode; }interface PieSpec { cx: number; cy: number; rBase: number; node: PNode; slices: { clusterId: number; count: number; a0: number; a1: number }[]; }
/** * A link-drawing closure for the chosen layout/curve. Radial layouts are origin-centred by * d3-shape's `pointRadial`/`linkRadial`; each radial closure does `ctx.translate(ox, oy)` once * (d3gl's path context supports the canonical canvas translate) to land the fan at the canvas * centre `center` ([ox, oy]). The view transform stays at IDENTITY, so the user's zoom survives * layout/curve toggles. Rectangular needs no offset (center is [0, 0]). */function makeLinkDraw(mode: LayoutMode, curve: CurveMode, center: [number, number]): (ctx: CanvasRenderingContext2D, l: PLink) => void { const [ox, oy] = center; if (mode === "rectangular") { const factory = curve === "linear" ? curveLinear : curve === "step" ? curveStepBefore : curveBumpX; const gen = d3link<PLink, PNode>(factory).x((d) => d.y).y((d) => d.x); return (ctx, l) => { gen.context(ctx); gen(l); }; } if (curve === "bump") { // linkRadial emits curves around (0,0); translate the context so the figure lands centred. const gen = linkRadial<PLink, PNode>().angle((d) => d.x).radius((d) => d.y); return (ctx, l) => { ctx.translate(ox, oy); gen.context(ctx); gen(l); }; } if (curve === "linear") { return (ctx, l) => { ctx.translate(ox, oy); const [sx, sy] = pointRadial(l.source.x, l.source.y); const [tx, ty] = pointRadial(l.target.x, l.target.y); ctx.moveTo(sx, sy); ctx.lineTo(tx, ty); }; } // radial "step": arc along the parent radius to the child angle, then a radial line out. return (ctx, l) => { ctx.translate(ox, oy); const r0 = l.source.y; const sa = l.source.x - Math.PI / 2; const ta = l.target.x - Math.PI / 2; ctx.moveTo(r0 * Math.cos(sa), r0 * Math.sin(sa)); ctx.arc(0, 0, r0, sa, ta, ta < sa); const [tx, ty] = pointRadial(l.target.x, l.target.y); ctx.lineTo(tx, ty); };}
/** The node's displayed distribution: the reconstructed range set (membership), each region * sized by its aggregated occurrence count (sorted by count). Falls back to equal slices if * every set region has zero count (a region reconstructed only from a sibling clade). */function pieSlices(node: PNode): { clusterId: number; count: number; a0: number; a1: number }[] { const counts = new Map((node.data.clusters?.clusters ?? []).map((r) => [r.clusterId, r.count])); const regs = (node.data.ranges?.clusters ?? []) .map((r) => ({ clusterId: r.clusterId, count: counts.get(r.clusterId) ?? 0 })) .sort((a, b) => b.count - a.count || a.clusterId - b.clusterId); if (regs.length === 0) return []; const tot = regs.reduce((s, r) => s + r.count, 0); let a = -Math.PI / 2; return regs.map((r) => { const frac = tot > 0 ? r.count / tot : 1 / regs.length; const a0 = a, a1 = a + frac * 2 * Math.PI; a = a1; return { clusterId: r.clusterId, count: r.count, a0, a1 }; });}
/** The dominant region of a node's DISPLAYED range — the highest-count slice within the * reconstructed range (so the branch color matches the pie, never a region outside it). */function topRegion(node: PNode): number | undefined { return pieSlices(node)[0]?.clusterId;}
/** Hover-tooltip body: a small table of the node's reconstructed regions, each row sized * exactly as its pie wedge (share = wedge angle / 2π) and labelled with its occurrence * count. The header names a tip species or, for an internal node, its subtended clade * size. Built with d3gl's `h` hyperscript so the engine's shared tooltip renders it. */function rangeTable(node: PNode): HTMLElement | null { const slices = pieSlices(node); if (slices.length === 0) return null; const header = node.children ? `${node.data.speciesCount ?? 1} species` : node.data.name; return h("div", null, [ h("div", { class: "mb-1 font-semibold" }, header), h("table", { class: "border-collapse" }, slices.map((s) => { const share = Math.round(((s.a1 - s.a0) / (2 * Math.PI)) * 100); return h("tr", null, [ h("td", { class: "pr-1.5" }, h("span", { class: "inline-block h-2.5 w-2.5 rounded-sm align-middle", style: `background:${regionColor(s.clusterId)}`, })), h("td", { class: "pr-2.5" }, REGION_NAMES[s.clusterId] ?? `#${s.clusterId}`), h("td", { class: "pr-1.5 text-right tabular-nums" }, s.count), h("td", { class: "text-right tabular-nums opacity-60" }, `${share}%`), ]); })), ]);}
/** Constant screen-px offset from the node: rightward (rectangular) or outward along the * radius (radial). Vertical centering for rectangular is folded in as -height/2. */function labelOffset(mode: LayoutMode, angle: number, gap: number, height: number): [number, number] { if (mode !== "radial") return [gap, -height / 2]; const a = angle - Math.PI / 2; // pointRadial's outward direction return [Math.cos(a) * gap, Math.sin(a) * gap];}
/** * A mammal phylogeny with ancestral ranges reconstructed by Fitch maximum * parsimony: branch width encodes subtended terminals, node pies show the * count-weighted range distribution (always on). Reads `layout` * (rectangular | radial), `curve` (linear | step | bump), and `coords` * (screen | world) from the harness options. Pure d3gl; the harness owns the * controls, backend, export, and zoom. * * Hovering a node pie or a branch shows a region tooltip (`rangeTable`) and a * highlight, driven by the engine's pick path (a per-layer hit index + a * `pick()` on each pointermove). */export const setup: ImperativeSetup = (host, { width, height, backend }) => { const W = width, H = height;
// HTML label overlay over the canvas (host is positioned `relative` by the harness). const labelEl = document.createElement("div"); labelEl.className = "absolute inset-0 pointer-events-none overflow-hidden text-[11px] leading-[14px] text-[#333]";
const chart = plot(host, { width: W, height: H, backend }); host.appendChild(labelEl); const labels = new LabelLayer(labelEl, (a) => a.text);
// Label anchors are rebuilt by `render`; the zoom handler keeps them aligned with the GPU // geometry. The transform stays at identity in both layouts (radial centering is baked into // the world coordinates below), so the user's zoom/pan is never reset by an option change. let anchors: LabelAnchor[] = []; let view: { k: number; x: number; y: number } = { k: 1, x: 0, y: 0 }; const updateLabels = (t = view): void => { view = t; labels.update(anchors, t, { width: W, height: H }); }; // Scroll to zoom, drag to pan; keep the HTML tip labels aligned with the GPU geometry. chart.enableZoom([0.5, 40], (t) => updateLabels(t));
return { engine: chart, // (Re)build all option-dependent layers on the existing chart. Never touches the transform, // so toggling layout/curve/coords or growing the tree preserves the current zoom/pan. render: (options) => { const tips = 2 ** ((options.tips as number) ?? 6); // 2^exp tips (exp 5..9 → 32..512) const layoutMode = (options.layout as LayoutMode) ?? "rectangular"; const curve = (options.curve as CurveMode) ?? "step"; const sizeMode = (options.coords as SizeMode) ?? "screen";
// Build the tree, occurrence-count distribution, and the final Fitch phase (unconditional). const tree = makeMammalTree(tips, 1); const cps = assignBioregions(tree, REGION_NAMES.length, 1); aggregateClusters(tree, cps); calcMaximumParsimony(tree, cps); aggregateSpeciesCount(tree);
const root = layoutMode === "rectangular" ? layoutRectangular(tree, W, H, "linear") // Radial is a half-circle "sunset" fan (Fig. 3a): leaves span π, centred on north. : layoutRadial(tree, W, H, "linear", 50, Math.PI, -Math.PI / 2); const links = root.links(); const tipNodes = root.leaves(); const totalSpecies = root.data.speciesCount ?? 1;
// Bake the radial centering into WORLD coordinates so the fan is centred at the IDENTITY // transform (no centring setTransform → zoom survives a layout toggle). Rectangular keeps // d3's origin (offset [0,0]). The half-circle fan's vertical centre is (H + R) / 2. const R = layoutMode === "radial" ? Math.max(...tipNodes.map((n) => n.y)) : 0; const center: [number, number] = layoutMode === "radial" ? [W / 2, (H + R) / 2] : [0, 0]; const [ox, oy] = center; const xy = (n: PNode): [number, number] => { const [x, y] = nodeXY(n, layoutMode); return [x + ox, y + oy]; };
const widthScale = scaleSqrt().domain([1, totalSpecies]).range([LINE_MIN, LINE_MAX]); const widthBase = (n: PNode): number => widthScale(n.data.speciesCount ?? 1); // World mode: pie diameter = the incoming branch width (scales with zoom). Screen mode: // a fixed pixel size so even small-clade nodes stay visible when zoomed in. const SCREEN_PIE_R = 8; const pieR = (n: PNode): number => (sizeMode === "screen" ? SCREEN_PIE_R : widthBase(n) / 2);
const pieSpecs: PieSpec[] = root.descendants().map((n) => { const [cx, cy] = xy(n); return { cx, cy, rBase: pieR(n), node: n, slices: pieSlices(n) }; }).filter((p) => p.slices.length > 0);
const GAP = 8; const radial = layoutMode === "radial"; anchors = tipNodes.map((n, i) => { const [px, py] = xy(n); const h = 14; return { id: `t${i}`, refX: px, refY: py, text: n.data.name, width: n.data.name.length * 6.2 + 6, height: h, priority: n.data.speciesCount ?? 1, offset: labelOffset(layoutMode, n.x, GAP, h), // Radial: declare the reading angle; d3gl derives the CSS transform AND the oriented // collision box from it, so rotated tip labels pack by their true on-screen footprint. ...(radial ? { rotation: n.x - Math.PI / 2, textAnchor: "start" as const, keepUpright: true } : {}), }; });
const drawLink = makeLinkDraw(layoutMode, curve, center);
chart.layer("links", links, { draw: (ctx, l) => drawLink(ctx, l), // Color each branch by the child clade's most-occurring bioregion. stroke: (l: PLink) => { const t = topRegion(l.target); return t == null ? "#777" : regionColor(t); }, lineWidth: (l: PLink) => widthBase(l.target), sizeMode, id: (_l, i) => i, // Hovering a branch darkens it and reads out its child clade's range distribution. hover: { stroke: "#111" }, tooltip: (l: PLink) => rangeTable(l.target), });
const wedges: Wedge[] = []; for (const p of pieSpecs) { const single = p.slices.length === 1; for (const s of p.slices) wedges.push({ cx: p.cx, cy: p.cy, r: p.rBase, a0: s.a0, a1: s.a1, clusterId: s.clusterId, count: s.count, single, node: p.node }); } chart.layer("pies", wedges, { draw: (ctx, w) => { // Single-region node: a full circle. closePath() so the subpath is closed and the // WebGL fill tessellator (which fills only closed subpaths) renders it like Canvas/SVG. if (w.single) { ctx.moveTo(w.cx + w.r, w.cy); ctx.arc(w.cx, w.cy, w.r, 0, 2 * Math.PI); ctx.closePath(); } else { ctx.moveTo(w.cx, w.cy); ctx.arc(w.cx, w.cy, w.r, w.a0, w.a1); ctx.closePath(); } }, fill: (w: Wedge) => regionColor(w.clusterId), stroke: "#ffffff", lineWidth: (w: Wedge) => (w.single ? 0 : Math.min(0.5, w.r * 0.16)), anchor: (w: Wedge) => [w.cx, w.cy], // pin the pie; screen mode keeps it constant-size sizeMode, // Screen mode: declutter overlapping fixed-size pies on zoom (bigger clades win). declutter: sizeMode === "screen" ? SCREEN_PIE_R * 2 + 2 : undefined, id: (_w, i) => i, // Hovering a pie outlines the wedge and reads out the whole node's range distribution. hover: { stroke: "#222", lineWidth: 1 }, tooltip: (w: Wedge) => rangeTable(w.node), });
chart.render(); updateLabels(); // re-place labels at the CURRENT transform (preserved across option changes) }, dispose: () => labels.destroy(), };};import type { RegionSet } from "./parsimony.js";
export interface TreeNode { name: string; group: number; /** Branch length = parent.time - this.time (time elapsed along the branch). */ length: number; /** Age before present, in [0, 1]. Tips are at 0 (the present); the root is oldest. */ time: number; children?: TreeNode[]; /** Reconstructed ancestral range *set* (membership): species' present regions at tips, * the most-parsimonious ancestral set at internal nodes. Set by calcMaximumParsimony(). */ ranges?: RegionSet; /** Occurrence-count *distribution*: leaf counts summed up the tree, sorted by descending * count. Sizes the pie wedges. Set by parsimony.aggregateClusters(). */ clusters?: RegionSet; /** Number of subtended terminals (Fig. 3 branch-thickness metric). Set by * parsimony.aggregateSpeciesCount(). */ speciesCount?: number;}
/** Deterministic LCG so trees are stable across renders (no Math.random in render path). */function lcg(seed: number) { let s = seed >>> 0; return () => (s = (s * 1664525 + 1013904223) >>> 0) / 0xffffffff; }
/** * Build a roughly-balanced bifurcating tree with exactly `tips` leaves by * recursively splitting the tip count in two (with a little jitter so it looks * natural rather than perfectly symmetric). Branch lengths are random — a * phylogram, not a cladogram. Deterministic for a given seed. */export function makeTree(tips: number, seed = 42): TreeNode { const rnd = lcg(seed); let leaf = 0; let groupCounter = 0; // Ultrametric ("dated") tree: every tip is at time 0 (the present); internal // nodes carry a divergence age younger than their parent's. `build` takes this // node's age and the parent's age (to compute the branch length). const build = (n: number, age: number, parentAge: number, group: number): TreeNode => { const length = parentAge - age; if (n <= 1) { leaf++; return { name: `tip_${leaf}`, group, length, time: 0 }; } const jitter = (rnd() - 0.5) * n * 0.4; const left = Math.max(1, Math.min(n - 1, Math.round(n / 2 + jitter))); // Occasionally start a new colour group so tips cluster by clade. const g1 = rnd() < 0.22 ? ++groupCounter % 10 : group; const g2 = rnd() < 0.22 ? ++groupCounter % 10 : group; // Children diverge at younger ages (closer to the present); leaves sit at 0. const ageL = left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); const ageR = n - left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); return { name: "node", group, length, time: age, children: [build(left, ageL, age, g1), build(n - left, ageR, age, g2)], }; }; return build(Math.max(2, tips), 1, 1, 0);}import { hierarchy, cluster, type HierarchyPointNode } from "d3-hierarchy";import { pointRadial } from "d3-shape";import { scaleLinear, scaleSymlog } from "d3-scale";import type { TreeNode } from "./tree.js";
export type LayoutMode = "rectangular" | "radial";export type TimeScaleKind = "linear" | "log";
/** * Map a node's age (time before present; tips = 0, root = maxAge) to a position. * `present` is the coordinate for age 0, `root` the coordinate for the oldest node. * `scaleSymlog` is used for "log" because it is defined at 0 (unlike scaleLog). */function timePosition(kind: TimeScaleKind, maxAge: number, present: number, root: number): (age: number) => number { if (kind === "log") { const s = scaleSymlog().domain([0, maxAge]).range([present, root]); return (age) => s(age); } const s = scaleLinear().domain([0, maxAge]).range([present, root]); return (age) => s(age);}
/** * Rectangular dated phylogram. Uses d3's HierarchyPointNode coordinates directly: * `x` = vertical leaf-spacing axis, `y` = horizontal time axis. Tips (age 0) align at * the right (the present); the root (oldest) sits at the left. */export function layoutRectangular( root: TreeNode, width: number, height: number, time: TimeScaleKind, pad = 40,): HierarchyPointNode<TreeNode> { const h = cluster<TreeNode>().size([height - 2 * pad, 1])(hierarchy(root, (d) => d.children)); const maxAge = h.data.time || 1; const pos = timePosition(time, maxAge, width - pad, pad); // age 0 → right, age max → left h.each((n) => { n.x += pad; n.y = pos(n.data.time); }); return h;}
/** * Radial dated phylogram. d3 convention: `x` = angle (0..2π), `y` = radius (= time). * Tips (age 0) sit on the outer rim, the root at the centre. Cartesian positions come * from `d3.pointRadial(x, y)` and are origin-centred — the caller centres the view with * a `translate(CX, CY)` transform (which also lets `d3.linkRadial()` work unmodified). */export function layoutRadial( root: TreeNode, width: number, height: number, time: TimeScaleKind, pad = 30, angleExtent = 2 * Math.PI, angleStart = 0,): HierarchyPointNode<TreeNode> { const h = cluster<TreeNode>().size([angleExtent, 1])(hierarchy(root, (d) => d.children)); const maxAge = h.data.time || 1; // A partial fan (e.g. a half-circle "sunset") can use a larger radius: the leaves span only // part of the disc, so it is bounded by half the width and the full height rather than the // inscribed circle. const half = angleExtent < 2 * Math.PI - 1e-9; const R = (half ? Math.min(width / 2, height) : Math.min(width, height) / 2) - pad; const pos = timePosition(time, maxAge, R, 0); // age 0 → R (rim), age max → 0 (centre) h.each((n) => { n.x += angleStart; // shift the fan's angular origin (e.g. centre a half-fan on "north") n.y = pos(n.data.time); }); return h;}
/** Cartesian world coordinates for a node, for points / labels / hit-testing. */export function nodeXY(n: HierarchyPointNode<TreeNode>, mode: LayoutMode): [number, number] { return mode === "radial" ? pointRadial(n.x, n.y) : [n.y, n.x];}import type { TreeNode } from "./tree.js";import type { ClustersPerSpecies } from "./parsimony.js";
/** * Synthetic data for the Infomap Bioregions example. A procedurally generated mammal * phylogeny (binomial names, dated branches) plus per-species bioregion distributions * that cluster phylogenetically — most species in one region, some spilling into a * second — so the Fitch reconstruction (and, later, the binned occurrence field) has * real structure. Everything is deterministic for a given seed. */
/** The six WWF-style biogeographic realms, used as default bioregions. */export const REGION_NAMES = ["Nearctic", "Neotropic", "Palearctic", "Afrotropic", "Indomalaya", "Australasia"];
/** mulberry32 — small, fast, seedable PRNG so trees/assignments are reproducible. */function mulberry32(seed: number): () => number { let a = seed >>> 0; return () => { a |= 0; a = (a + 0x6d2b79f5) | 0; let t = Math.imul(a ^ (a >>> 15), 1 | a); t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; return ((t ^ (t >>> 14)) >>> 0) / 4294967296; };}
const GENERA = [ "Petrogale", "Macropus", "Mus", "Rattus", "Sorex", "Myotis", "Canis", "Felis", "Bos", "Cervus", "Sciurus", "Lemur", "Pteropus", "Dasyurus", "Tupaia", "Echimys", "Crocidura", "Apodemus", "Microtus", "Tamias", "Vulpes", "Mustela", "Lepus", "Ovis", "Equus", "Marmota", "Galago", "Nycticebus", "Tarsius", "Bradypus",];const EPITHETS = [ "xanthopus", "robustus", "major", "minor", "orientalis", "borealis", "australis", "silvestris", "montanus", "fuscus", "albus", "niger", "rufus", "gracilis", "elegans", "domesticus", "campestris", "nivalis", "palustris", "arboreus", "littoralis", "insularis", "pictus", "vagans", "concolor", "sylvaticus",];
/** * Build a dated bifurcating tree with exactly `nTips` leaves (same age model as * tree.makeTree: tips at time 0, internal nodes older). Leaves get binomial names; a * clade tends to share a genus (switching with small probability), echoing real * taxonomy. Deterministic for `seed`. */export function makeMammalTree(nTips: number, seed = 1): TreeNode { const rnd = mulberry32(seed); const pick = <T>(arr: readonly T[]): T => arr[Math.floor(rnd() * arr.length)]!; const used = new Set<string>(); const uniqueName = (genus: string): string => { let name = `${genus} ${pick(EPITHETS)}`; while (used.has(name)) name = `${genus} ${pick(EPITHETS)}${used.size}`; used.add(name); return name; };
const build = (n: number, age: number, parentAge: number, genus: string, group: number): TreeNode => { const length = parentAge - age; if (n <= 1) return { name: uniqueName(genus), group, length, time: 0 }; const jitter = (rnd() - 0.5) * n * 0.4; const left = Math.max(1, Math.min(n - 1, Math.round(n / 2 + jitter))); const genusL = rnd() < 0.3 ? pick(GENERA) : genus; // occasionally start a new genus const genusR = rnd() < 0.3 ? pick(GENERA) : genus; const groupL = rnd() < 0.22 ? (group + 1) % 10 : group; const groupR = rnd() < 0.22 ? (group + 1) % 10 : group; const ageL = left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); const ageR = n - left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); return { name: "node", group, length, time: age, children: [build(left, ageL, age, genusL, groupL), build(n - left, ageR, age, genusR, groupR)], }; }; return build(Math.max(2, nTips), 1, 1, pick(GENERA), 0);}
/** * Assign each leaf a bioregion distribution that clusters phylogenetically: a clade * inherits a "home" region, shifting to a random region with small probability * (vicariance). ~18% of species also spill into a second region. `count` is a synthetic * number of occurrences — large in the home region, small in the spillover one — so the * aggregated distribution (and the pie wedges) vary in size. Returns the * `clustersPerSpecies` map consumed by `calcMaximumParsimony` / `aggregateClusters`. * Deterministic for `seed`. */export function assignBioregions(root: TreeNode, nRegions = REGION_NAMES.length, seed = 1): ClustersPerSpecies { const rnd = mulberry32((seed ^ 0x9e3779b9) >>> 0); const out: ClustersPerSpecies = {}; const walk = (node: TreeNode, home: number): void => { const h = rnd() < 0.15 ? Math.floor(rnd() * nRegions) : home; // vicariance shift if (!node.children) { const clusters = [{ clusterId: h, count: 5 + Math.floor(rnd() * 26) }]; // home: 5–30 occurrences if (rnd() < 0.18) { const other = (h + 1 + Math.floor(rnd() * (nRegions - 1))) % nRegions; // spillover: 1–6 clusters.push({ clusterId: other, count: 1 + Math.floor(rnd() * 6) }); } out[node.name] = { totCount: clusters.reduce((s, c) => s + c.count, 0), clusters }; return; } node.children.forEach((c) => walk(c, h)); }; walk(root, Math.floor(rnd() * nRegions)); return out;}import type { TreeNode } from "./tree.js";
/** * Fitch maximum-parsimony reconstruction of ancestral bioregion ranges (Fitch 1971), * plus occurrence-count aggregation. A region is `{clusterId, count}` — a bioregion id * and a count of species occurrences in it. Two distinct per-node products: * * - `node.ranges` — the reconstructed ancestral *range set* (membership): at a tip the * species' present regions, at an internal node the most-parsimonious * ancestral set. Counts here are presence (1); membership is the point. * - `node.clusters`— the occurrence-count *distribution*: leaf counts summed up the tree, * sorted by descending count. Used to size the pie wedges. * * Fitch's two-phase algorithm. Preliminary phase (post-order): a leaf's set is its * regions; an internal node's set is the intersection of its children's sets, or — when * that is empty — their union (flagged `byUnion`). Final phase (pre-order), applying the * classic rules to each non-root node given its already-final parent set: * * I. If the preliminary set contains all regions of the parent's final set, go to II, * else go to III. * II. (diminished ambiguity) Drop every region not in the parent's final set — i.e. keep * the intersection with the parent. Done. * III. If the preliminary set was formed by a union of its children's sets, go to IV, * else go to V. * IV. (expanded ambiguity) Add every parent-final region not already present — i.e. the * union with the parent. Done. * V. (encompassing ambiguity) Add any region not already present that is in BOTH the * parent's final set AND at least one child's preliminary set. Done. * * NOTE: this fixes a bug in the bioregions1 reference, whose final phase tests * `node.byUnion` (always undefined — the flag lives on `node.clusters.byUnion`), so Rule IV * never fires and Rule V adds nothing. That produces incorrect sets for Fitch figs 2d/2f * (e.g. fig 2d node 0 wrongly collapses to {A,C} instead of the correct {A,C,G}). We follow * the written rules above, verified against the most-parsimonious reconstructions. */export interface Region { clusterId: number; count: number; }export interface RegionSet { totCount: number; clusters: Region[]; byUnion?: boolean; }export type ClustersPerSpecies = Record<string, RegionSet>;
const idSet = (s: Region[]): Set<number> => new Set(s.map((r) => r.clusterId));/** Regions of `a` also present in `b` (by clusterId), preserving `a`'s order. */function intersectBy(a: Region[], b: Region[]): Region[] { const bi = idSet(b); return a.filter((r) => bi.has(r.clusterId)); }/** Regions of `a`, then regions of `b` not already in `a`. */function unionBy(a: Region[], b: Region[]): Region[] { const ai = idSet(a); return a.concat(b.filter((r) => !ai.has(r.clusterId))); }const set = (clusters: Region[], byUnion?: boolean): RegionSet => ({ totCount: clusters.length, clusters, byUnion });
function visitPostOrder(node: TreeNode, cb: (n: TreeNode) => void): void { node.children?.forEach((c) => visitPostOrder(c, cb)); cb(node);}function visitPreOrder(node: TreeNode, cb: (n: TreeNode, parent: TreeNode | null) => void, parent: TreeNode | null = null): void { cb(node, parent); node.children?.forEach((c) => visitPreOrder(c, cb, node));}
/** Bottom-up pass: leaves presence-count their regions; each internal node takes the * intersection of its children's sets, falling back to the union (flagged) if empty. */export function calcMaximumParsimonyPreliminaryPhase(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { visitPostOrder(root, (node) => { if (!node.children) { const cl = clustersPerSpecies[node.name]; node.ranges = set(cl ? cl.clusters.map((r) => ({ clusterId: r.clusterId, count: 1 })) : []); return; } const childSets = node.children.map((c) => c.ranges!.clusters); let anc = childSets.reduce((acc, s) => intersectBy(acc, s)); const byUnion = anc.length === 0; if (byUnion) anc = childSets.reduce((acc, s) => unionBy(acc, s), [] as Region[]); node.ranges = set(anc, byUnion); }); return root;}
/** Top-down pass applying Fitch's rules I–V (see the module doc). Root and leaves are * already final. Children are visited after their parent, so a node's Rule-V lookup of * its children's *preliminary* sets reads them before they are overwritten. */export function calcMaximumParsimonyFinalPhase(root: TreeNode): TreeNode { visitPreOrder(root, (node, parent) => { if (!parent || !node.children) return; const prelim = node.ranges!.clusters; const pfinal = parent.ranges!.clusters; if (intersectBy(prelim, pfinal).length === pfinal.length) { node.ranges = set(intersectBy(prelim, pfinal)); // I → II (diminished) } else if (node.ranges!.byUnion) { node.ranges = set(unionBy(prelim, pfinal)); // III → IV (expanded) } else { // III → V (encompassing): add parent regions present in ≥1 child's preliminary set. const childrenUnion = node.children.map((c) => c.ranges!.clusters).reduce((a, b) => unionBy(a, b), [] as Region[]); node.ranges = set(unionBy(prelim, intersectBy(pfinal, childrenUnion))); } }); return root;}
export function calcMaximumParsimony(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { calcMaximumParsimonyPreliminaryPhase(root, clustersPerSpecies); calcMaximumParsimonyFinalPhase(root); return root;}
/** * Sum occurrence counts up the tree into `node.clusters`, sorted by descending count (ties * broken by clusterId). A leaf's counts come from the data; an internal node's count for a * region is the sum over its descendants. Mirrors bioregions1 `_aggregateClusters`, minus * the fraction-threshold "rest" grouping. */export function aggregateClusters(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { const sorted = (clusters: Region[], totCount: number): RegionSet => ({ totCount, clusters: [...clusters].sort((a, b) => b.count - a.count || a.clusterId - b.clusterId), }); visitPostOrder(root, (node) => { if (!node.children) { const cl = clustersPerSpecies[node.name]; const clusters = cl ? cl.clusters.map((r) => ({ clusterId: r.clusterId, count: r.count })) : []; node.clusters = sorted(clusters, clusters.reduce((s, r) => s + r.count, 0)); return; } const agg = new Map<number, number>(); let totCount = 0; for (const child of node.children) { for (const r of child.clusters!.clusters) { agg.set(r.clusterId, (agg.get(r.clusterId) ?? 0) + r.count); totCount += r.count; } } node.clusters = sorted([...agg].map(([clusterId, count]) => ({ clusterId, count })), totCount); }); return root;}
/** * Post-order aggregation of `speciesCount` = number of subtended terminals (the Fig. 3 * branch-thickness metric). With no `presence` map every leaf counts as one; with one, * a leaf counts only if present (absent species contribute 0). */export function aggregateSpeciesCount(root: TreeNode, presence?: Record<string, number>): TreeNode { visitPostOrder(root, (node) => { if (!node.children) { node.speciesCount = presence ? (presence[node.name] ? 1 : 0) : 1; return; } node.speciesCount = node.children.reduce((sum, c) => sum + (c.speciesCount ?? 0), 0); }); return root;}