Phylogenetic tree
The smallest tree example: lay out a 64-tip phylogram with d3-hierarchy’s cluster, then draw
the branches with a d3.link() generator straight into the d3gl context. Scroll to zoom, drag to
pan. The two variants below render the same tree but differ in how node and line sizes behave
under zoom — world coordinates vs. screen coordinates.
World-space nodes
Section titled “World-space nodes”Node radii and branch widths are in world units, so they scale with the view: zoom in and the dots and lines grow with everything else.
fps 0frame 0 ms
import { schemeCategory10 } from "d3-scale-chromatic";import { link as d3link, curveStepBefore } from "d3-shape";import type { HierarchyPointNode, HierarchyPointLink } from "d3-hierarchy";import { plot } from "@mapequation/d3gl/map";import type { ImperativeSetup } from "../types.js";import { makeTree, type TreeNode } from "../shared/tree.js";import { layoutRectangular, nodeXY } from "../shared/layout.js";
type PNode = HierarchyPointNode<TreeNode>;type PLink = HierarchyPointLink<TreeNode>;
/** * The smallest tree example: a 64-tip rectangular phylogram laid out with * `d3-hierarchy`'s cluster, with `d3.link(curveStepBefore)` branches and * `schemeCategory10` tip nodes. The `coords` option (`"world"` | `"screen"`) * selects the size mode — world units scale with zoom, screen pixels stay * constant. Pure d3gl; the harness owns backend, export, and the coords toggle. */export const setup: ImperativeSetup = (host, { width, height, backend, options }) => { const sizeMode = (options.coords as "world" | "screen") ?? "world"; const root = layoutRectangular(makeTree(64), width, height, "linear"); const links = root.links(); const tips = root.leaves();
// Rectangular step links: a d3-shape link generator drawing straight into the d3gl context. const gen = d3link<PLink, PNode>(curveStepBefore).x((d) => d.y).y((d) => d.x);
const chart = plot(host, { width, height, backend }); chart.layer("links", links, { draw: (ctx, l) => { gen.context(ctx); gen(l); }, stroke: "#555", lineWidth: 0.8, sizeMode, }); chart.points("nodes", tips, { x: (n) => nodeXY(n, "rectangular")[0], y: (n) => nodeXY(n, "rectangular")[1], radius: 2.6, fill: (n) => schemeCategory10[n.data.group % 10] ?? "#888", sizeMode, }); chart.enableZoom([0.5, 40]); chart.render();
return chart;};import type { RegionSet } from "./parsimony.js";
export interface TreeNode { name: string; group: number; /** Branch length = parent.time - this.time (time elapsed along the branch). */ length: number; /** Age before present, in [0, 1]. Tips are at 0 (the present); the root is oldest. */ time: number; children?: TreeNode[]; /** Reconstructed ancestral range *set* (membership): species' present regions at tips, * the most-parsimonious ancestral set at internal nodes. Set by calcMaximumParsimony(). */ ranges?: RegionSet; /** Occurrence-count *distribution*: leaf counts summed up the tree, sorted by descending * count. Sizes the pie wedges. Set by parsimony.aggregateClusters(). */ clusters?: RegionSet; /** Number of subtended terminals (Fig. 3 branch-thickness metric). Set by * parsimony.aggregateSpeciesCount(). */ speciesCount?: number;}
/** Deterministic LCG so trees are stable across renders (no Math.random in render path). */function lcg(seed: number) { let s = seed >>> 0; return () => (s = (s * 1664525 + 1013904223) >>> 0) / 0xffffffff; }
/** * Build a roughly-balanced bifurcating tree with exactly `tips` leaves by * recursively splitting the tip count in two (with a little jitter so it looks * natural rather than perfectly symmetric). Branch lengths are random — a * phylogram, not a cladogram. Deterministic for a given seed. */export function makeTree(tips: number, seed = 42): TreeNode { const rnd = lcg(seed); let leaf = 0; let groupCounter = 0; // Ultrametric ("dated") tree: every tip is at time 0 (the present); internal // nodes carry a divergence age younger than their parent's. `build` takes this // node's age and the parent's age (to compute the branch length). const build = (n: number, age: number, parentAge: number, group: number): TreeNode => { const length = parentAge - age; if (n <= 1) { leaf++; return { name: `tip_${leaf}`, group, length, time: 0 }; } const jitter = (rnd() - 0.5) * n * 0.4; const left = Math.max(1, Math.min(n - 1, Math.round(n / 2 + jitter))); // Occasionally start a new colour group so tips cluster by clade. const g1 = rnd() < 0.22 ? ++groupCounter % 10 : group; const g2 = rnd() < 0.22 ? ++groupCounter % 10 : group; // Children diverge at younger ages (closer to the present); leaves sit at 0. const ageL = left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); const ageR = n - left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); return { name: "node", group, length, time: age, children: [build(left, ageL, age, g1), build(n - left, ageR, age, g2)], }; }; return build(Math.max(2, tips), 1, 1, 0);}import { hierarchy, cluster, type HierarchyPointNode } from "d3-hierarchy";import { pointRadial } from "d3-shape";import { scaleLinear, scaleSymlog } from "d3-scale";import type { TreeNode } from "./tree.js";
export type LayoutMode = "rectangular" | "radial";export type TimeScaleKind = "linear" | "log";
/** * Map a node's age (time before present; tips = 0, root = maxAge) to a position. * `present` is the coordinate for age 0, `root` the coordinate for the oldest node. * `scaleSymlog` is used for "log" because it is defined at 0 (unlike scaleLog). */function timePosition(kind: TimeScaleKind, maxAge: number, present: number, root: number): (age: number) => number { if (kind === "log") { const s = scaleSymlog().domain([0, maxAge]).range([present, root]); return (age) => s(age); } const s = scaleLinear().domain([0, maxAge]).range([present, root]); return (age) => s(age);}
/** * Rectangular dated phylogram. Uses d3's HierarchyPointNode coordinates directly: * `x` = vertical leaf-spacing axis, `y` = horizontal time axis. Tips (age 0) align at * the right (the present); the root (oldest) sits at the left. */export function layoutRectangular( root: TreeNode, width: number, height: number, time: TimeScaleKind, pad = 40,): HierarchyPointNode<TreeNode> { const h = cluster<TreeNode>().size([height - 2 * pad, 1])(hierarchy(root, (d) => d.children)); const maxAge = h.data.time || 1; const pos = timePosition(time, maxAge, width - pad, pad); // age 0 → right, age max → left h.each((n) => { n.x += pad; n.y = pos(n.data.time); }); return h;}
/** * Radial dated phylogram. d3 convention: `x` = angle (0..2π), `y` = radius (= time). * Tips (age 0) sit on the outer rim, the root at the centre. Cartesian positions come * from `d3.pointRadial(x, y)` and are origin-centred — the caller centres the view with * a `translate(CX, CY)` transform (which also lets `d3.linkRadial()` work unmodified). */export function layoutRadial( root: TreeNode, width: number, height: number, time: TimeScaleKind, pad = 30, angleExtent = 2 * Math.PI, angleStart = 0,): HierarchyPointNode<TreeNode> { const h = cluster<TreeNode>().size([angleExtent, 1])(hierarchy(root, (d) => d.children)); const maxAge = h.data.time || 1; // A partial fan (e.g. a half-circle "sunset") can use a larger radius: the leaves span only // part of the disc, so it is bounded by half the width and the full height rather than the // inscribed circle. const half = angleExtent < 2 * Math.PI - 1e-9; const R = (half ? Math.min(width / 2, height) : Math.min(width, height) / 2) - pad; const pos = timePosition(time, maxAge, R, 0); // age 0 → R (rim), age max → 0 (centre) h.each((n) => { n.x += angleStart; // shift the fan's angular origin (e.g. centre a half-fan on "north") n.y = pos(n.data.time); }); return h;}
/** Cartesian world coordinates for a node, for points / labels / hit-testing. */export function nodeXY(n: HierarchyPointNode<TreeNode>, mode: LayoutMode): [number, number] { return mode === "radial" ? pointRadial(n.x, n.y) : [n.y, n.x];}import type { TreeNode } from "./tree.js";
/** * Fitch maximum-parsimony reconstruction of ancestral bioregion ranges (Fitch 1971), * plus occurrence-count aggregation. A region is `{clusterId, count}` — a bioregion id * and a count of species occurrences in it. Two distinct per-node products: * * - `node.ranges` — the reconstructed ancestral *range set* (membership): at a tip the * species' present regions, at an internal node the most-parsimonious * ancestral set. Counts here are presence (1); membership is the point. * - `node.clusters`— the occurrence-count *distribution*: leaf counts summed up the tree, * sorted by descending count. Used to size the pie wedges. * * Fitch's two-phase algorithm. Preliminary phase (post-order): a leaf's set is its * regions; an internal node's set is the intersection of its children's sets, or — when * that is empty — their union (flagged `byUnion`). Final phase (pre-order), applying the * classic rules to each non-root node given its already-final parent set: * * I. If the preliminary set contains all regions of the parent's final set, go to II, * else go to III. * II. (diminished ambiguity) Drop every region not in the parent's final set — i.e. keep * the intersection with the parent. Done. * III. If the preliminary set was formed by a union of its children's sets, go to IV, * else go to V. * IV. (expanded ambiguity) Add every parent-final region not already present — i.e. the * union with the parent. Done. * V. (encompassing ambiguity) Add any region not already present that is in BOTH the * parent's final set AND at least one child's preliminary set. Done. * * NOTE: this fixes a bug in the bioregions1 reference, whose final phase tests * `node.byUnion` (always undefined — the flag lives on `node.clusters.byUnion`), so Rule IV * never fires and Rule V adds nothing. That produces incorrect sets for Fitch figs 2d/2f * (e.g. fig 2d node 0 wrongly collapses to {A,C} instead of the correct {A,C,G}). We follow * the written rules above, verified against the most-parsimonious reconstructions. */export interface Region { clusterId: number; count: number; }export interface RegionSet { totCount: number; clusters: Region[]; byUnion?: boolean; }export type ClustersPerSpecies = Record<string, RegionSet>;
const idSet = (s: Region[]): Set<number> => new Set(s.map((r) => r.clusterId));/** Regions of `a` also present in `b` (by clusterId), preserving `a`'s order. */function intersectBy(a: Region[], b: Region[]): Region[] { const bi = idSet(b); return a.filter((r) => bi.has(r.clusterId)); }/** Regions of `a`, then regions of `b` not already in `a`. */function unionBy(a: Region[], b: Region[]): Region[] { const ai = idSet(a); return a.concat(b.filter((r) => !ai.has(r.clusterId))); }const set = (clusters: Region[], byUnion?: boolean): RegionSet => ({ totCount: clusters.length, clusters, byUnion });
function visitPostOrder(node: TreeNode, cb: (n: TreeNode) => void): void { node.children?.forEach((c) => visitPostOrder(c, cb)); cb(node);}function visitPreOrder(node: TreeNode, cb: (n: TreeNode, parent: TreeNode | null) => void, parent: TreeNode | null = null): void { cb(node, parent); node.children?.forEach((c) => visitPreOrder(c, cb, node));}
/** Bottom-up pass: leaves presence-count their regions; each internal node takes the * intersection of its children's sets, falling back to the union (flagged) if empty. */export function calcMaximumParsimonyPreliminaryPhase(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { visitPostOrder(root, (node) => { if (!node.children) { const cl = clustersPerSpecies[node.name]; node.ranges = set(cl ? cl.clusters.map((r) => ({ clusterId: r.clusterId, count: 1 })) : []); return; } const childSets = node.children.map((c) => c.ranges!.clusters); let anc = childSets.reduce((acc, s) => intersectBy(acc, s)); const byUnion = anc.length === 0; if (byUnion) anc = childSets.reduce((acc, s) => unionBy(acc, s), [] as Region[]); node.ranges = set(anc, byUnion); }); return root;}
/** Top-down pass applying Fitch's rules I–V (see the module doc). Root and leaves are * already final. Children are visited after their parent, so a node's Rule-V lookup of * its children's *preliminary* sets reads them before they are overwritten. */export function calcMaximumParsimonyFinalPhase(root: TreeNode): TreeNode { visitPreOrder(root, (node, parent) => { if (!parent || !node.children) return; const prelim = node.ranges!.clusters; const pfinal = parent.ranges!.clusters; if (intersectBy(prelim, pfinal).length === pfinal.length) { node.ranges = set(intersectBy(prelim, pfinal)); // I → II (diminished) } else if (node.ranges!.byUnion) { node.ranges = set(unionBy(prelim, pfinal)); // III → IV (expanded) } else { // III → V (encompassing): add parent regions present in ≥1 child's preliminary set. const childrenUnion = node.children.map((c) => c.ranges!.clusters).reduce((a, b) => unionBy(a, b), [] as Region[]); node.ranges = set(unionBy(prelim, intersectBy(pfinal, childrenUnion))); } }); return root;}
export function calcMaximumParsimony(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { calcMaximumParsimonyPreliminaryPhase(root, clustersPerSpecies); calcMaximumParsimonyFinalPhase(root); return root;}
/** * Sum occurrence counts up the tree into `node.clusters`, sorted by descending count (ties * broken by clusterId). A leaf's counts come from the data; an internal node's count for a * region is the sum over its descendants. Mirrors bioregions1 `_aggregateClusters`, minus * the fraction-threshold "rest" grouping. */export function aggregateClusters(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { const sorted = (clusters: Region[], totCount: number): RegionSet => ({ totCount, clusters: [...clusters].sort((a, b) => b.count - a.count || a.clusterId - b.clusterId), }); visitPostOrder(root, (node) => { if (!node.children) { const cl = clustersPerSpecies[node.name]; const clusters = cl ? cl.clusters.map((r) => ({ clusterId: r.clusterId, count: r.count })) : []; node.clusters = sorted(clusters, clusters.reduce((s, r) => s + r.count, 0)); return; } const agg = new Map<number, number>(); let totCount = 0; for (const child of node.children) { for (const r of child.clusters!.clusters) { agg.set(r.clusterId, (agg.get(r.clusterId) ?? 0) + r.count); totCount += r.count; } } node.clusters = sorted([...agg].map(([clusterId, count]) => ({ clusterId, count })), totCount); }); return root;}
/** * Post-order aggregation of `speciesCount` = number of subtended terminals (the Fig. 3 * branch-thickness metric). With no `presence` map every leaf counts as one; with one, * a leaf counts only if present (absent species contribute 0). */export function aggregateSpeciesCount(root: TreeNode, presence?: Record<string, number>): TreeNode { visitPostOrder(root, (node) => { if (!node.children) { node.speciesCount = presence ? (presence[node.name] ? 1 : 0) : 1; return; } node.speciesCount = node.children.reduce((sum, c) => sum + (c.speciesCount ?? 0), 0); }); return root;}Screen-space nodes
Section titled “Screen-space nodes”The same module with coords: "screen". Node radii and line widths are now in screen pixels and
stay constant as you zoom — only their positions move with the view, so glyphs stay legible at any
zoom level (handy when zooming into a dense subtree).
fps 0frame 0 ms
import { schemeCategory10 } from "d3-scale-chromatic";import { link as d3link, curveStepBefore } from "d3-shape";import type { HierarchyPointNode, HierarchyPointLink } from "d3-hierarchy";import { plot } from "@mapequation/d3gl/map";import type { ImperativeSetup } from "../types.js";import { makeTree, type TreeNode } from "../shared/tree.js";import { layoutRectangular, nodeXY } from "../shared/layout.js";
type PNode = HierarchyPointNode<TreeNode>;type PLink = HierarchyPointLink<TreeNode>;
/** * The smallest tree example: a 64-tip rectangular phylogram laid out with * `d3-hierarchy`'s cluster, with `d3.link(curveStepBefore)` branches and * `schemeCategory10` tip nodes. The `coords` option (`"world"` | `"screen"`) * selects the size mode — world units scale with zoom, screen pixels stay * constant. Pure d3gl; the harness owns backend, export, and the coords toggle. */export const setup: ImperativeSetup = (host, { width, height, backend, options }) => { const sizeMode = (options.coords as "world" | "screen") ?? "world"; const root = layoutRectangular(makeTree(64), width, height, "linear"); const links = root.links(); const tips = root.leaves();
// Rectangular step links: a d3-shape link generator drawing straight into the d3gl context. const gen = d3link<PLink, PNode>(curveStepBefore).x((d) => d.y).y((d) => d.x);
const chart = plot(host, { width, height, backend }); chart.layer("links", links, { draw: (ctx, l) => { gen.context(ctx); gen(l); }, stroke: "#555", lineWidth: 0.8, sizeMode, }); chart.points("nodes", tips, { x: (n) => nodeXY(n, "rectangular")[0], y: (n) => nodeXY(n, "rectangular")[1], radius: 2.6, fill: (n) => schemeCategory10[n.data.group % 10] ?? "#888", sizeMode, }); chart.enableZoom([0.5, 40]); chart.render();
return chart;};import type { RegionSet } from "./parsimony.js";
export interface TreeNode { name: string; group: number; /** Branch length = parent.time - this.time (time elapsed along the branch). */ length: number; /** Age before present, in [0, 1]. Tips are at 0 (the present); the root is oldest. */ time: number; children?: TreeNode[]; /** Reconstructed ancestral range *set* (membership): species' present regions at tips, * the most-parsimonious ancestral set at internal nodes. Set by calcMaximumParsimony(). */ ranges?: RegionSet; /** Occurrence-count *distribution*: leaf counts summed up the tree, sorted by descending * count. Sizes the pie wedges. Set by parsimony.aggregateClusters(). */ clusters?: RegionSet; /** Number of subtended terminals (Fig. 3 branch-thickness metric). Set by * parsimony.aggregateSpeciesCount(). */ speciesCount?: number;}
/** Deterministic LCG so trees are stable across renders (no Math.random in render path). */function lcg(seed: number) { let s = seed >>> 0; return () => (s = (s * 1664525 + 1013904223) >>> 0) / 0xffffffff; }
/** * Build a roughly-balanced bifurcating tree with exactly `tips` leaves by * recursively splitting the tip count in two (with a little jitter so it looks * natural rather than perfectly symmetric). Branch lengths are random — a * phylogram, not a cladogram. Deterministic for a given seed. */export function makeTree(tips: number, seed = 42): TreeNode { const rnd = lcg(seed); let leaf = 0; let groupCounter = 0; // Ultrametric ("dated") tree: every tip is at time 0 (the present); internal // nodes carry a divergence age younger than their parent's. `build` takes this // node's age and the parent's age (to compute the branch length). const build = (n: number, age: number, parentAge: number, group: number): TreeNode => { const length = parentAge - age; if (n <= 1) { leaf++; return { name: `tip_${leaf}`, group, length, time: 0 }; } const jitter = (rnd() - 0.5) * n * 0.4; const left = Math.max(1, Math.min(n - 1, Math.round(n / 2 + jitter))); // Occasionally start a new colour group so tips cluster by clade. const g1 = rnd() < 0.22 ? ++groupCounter % 10 : group; const g2 = rnd() < 0.22 ? ++groupCounter % 10 : group; // Children diverge at younger ages (closer to the present); leaves sit at 0. const ageL = left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); const ageR = n - left <= 1 ? 0 : age * (0.3 + rnd() * 0.5); return { name: "node", group, length, time: age, children: [build(left, ageL, age, g1), build(n - left, ageR, age, g2)], }; }; return build(Math.max(2, tips), 1, 1, 0);}import { hierarchy, cluster, type HierarchyPointNode } from "d3-hierarchy";import { pointRadial } from "d3-shape";import { scaleLinear, scaleSymlog } from "d3-scale";import type { TreeNode } from "./tree.js";
export type LayoutMode = "rectangular" | "radial";export type TimeScaleKind = "linear" | "log";
/** * Map a node's age (time before present; tips = 0, root = maxAge) to a position. * `present` is the coordinate for age 0, `root` the coordinate for the oldest node. * `scaleSymlog` is used for "log" because it is defined at 0 (unlike scaleLog). */function timePosition(kind: TimeScaleKind, maxAge: number, present: number, root: number): (age: number) => number { if (kind === "log") { const s = scaleSymlog().domain([0, maxAge]).range([present, root]); return (age) => s(age); } const s = scaleLinear().domain([0, maxAge]).range([present, root]); return (age) => s(age);}
/** * Rectangular dated phylogram. Uses d3's HierarchyPointNode coordinates directly: * `x` = vertical leaf-spacing axis, `y` = horizontal time axis. Tips (age 0) align at * the right (the present); the root (oldest) sits at the left. */export function layoutRectangular( root: TreeNode, width: number, height: number, time: TimeScaleKind, pad = 40,): HierarchyPointNode<TreeNode> { const h = cluster<TreeNode>().size([height - 2 * pad, 1])(hierarchy(root, (d) => d.children)); const maxAge = h.data.time || 1; const pos = timePosition(time, maxAge, width - pad, pad); // age 0 → right, age max → left h.each((n) => { n.x += pad; n.y = pos(n.data.time); }); return h;}
/** * Radial dated phylogram. d3 convention: `x` = angle (0..2π), `y` = radius (= time). * Tips (age 0) sit on the outer rim, the root at the centre. Cartesian positions come * from `d3.pointRadial(x, y)` and are origin-centred — the caller centres the view with * a `translate(CX, CY)` transform (which also lets `d3.linkRadial()` work unmodified). */export function layoutRadial( root: TreeNode, width: number, height: number, time: TimeScaleKind, pad = 30, angleExtent = 2 * Math.PI, angleStart = 0,): HierarchyPointNode<TreeNode> { const h = cluster<TreeNode>().size([angleExtent, 1])(hierarchy(root, (d) => d.children)); const maxAge = h.data.time || 1; // A partial fan (e.g. a half-circle "sunset") can use a larger radius: the leaves span only // part of the disc, so it is bounded by half the width and the full height rather than the // inscribed circle. const half = angleExtent < 2 * Math.PI - 1e-9; const R = (half ? Math.min(width / 2, height) : Math.min(width, height) / 2) - pad; const pos = timePosition(time, maxAge, R, 0); // age 0 → R (rim), age max → 0 (centre) h.each((n) => { n.x += angleStart; // shift the fan's angular origin (e.g. centre a half-fan on "north") n.y = pos(n.data.time); }); return h;}
/** Cartesian world coordinates for a node, for points / labels / hit-testing. */export function nodeXY(n: HierarchyPointNode<TreeNode>, mode: LayoutMode): [number, number] { return mode === "radial" ? pointRadial(n.x, n.y) : [n.y, n.x];}import type { TreeNode } from "./tree.js";
/** * Fitch maximum-parsimony reconstruction of ancestral bioregion ranges (Fitch 1971), * plus occurrence-count aggregation. A region is `{clusterId, count}` — a bioregion id * and a count of species occurrences in it. Two distinct per-node products: * * - `node.ranges` — the reconstructed ancestral *range set* (membership): at a tip the * species' present regions, at an internal node the most-parsimonious * ancestral set. Counts here are presence (1); membership is the point. * - `node.clusters`— the occurrence-count *distribution*: leaf counts summed up the tree, * sorted by descending count. Used to size the pie wedges. * * Fitch's two-phase algorithm. Preliminary phase (post-order): a leaf's set is its * regions; an internal node's set is the intersection of its children's sets, or — when * that is empty — their union (flagged `byUnion`). Final phase (pre-order), applying the * classic rules to each non-root node given its already-final parent set: * * I. If the preliminary set contains all regions of the parent's final set, go to II, * else go to III. * II. (diminished ambiguity) Drop every region not in the parent's final set — i.e. keep * the intersection with the parent. Done. * III. If the preliminary set was formed by a union of its children's sets, go to IV, * else go to V. * IV. (expanded ambiguity) Add every parent-final region not already present — i.e. the * union with the parent. Done. * V. (encompassing ambiguity) Add any region not already present that is in BOTH the * parent's final set AND at least one child's preliminary set. Done. * * NOTE: this fixes a bug in the bioregions1 reference, whose final phase tests * `node.byUnion` (always undefined — the flag lives on `node.clusters.byUnion`), so Rule IV * never fires and Rule V adds nothing. That produces incorrect sets for Fitch figs 2d/2f * (e.g. fig 2d node 0 wrongly collapses to {A,C} instead of the correct {A,C,G}). We follow * the written rules above, verified against the most-parsimonious reconstructions. */export interface Region { clusterId: number; count: number; }export interface RegionSet { totCount: number; clusters: Region[]; byUnion?: boolean; }export type ClustersPerSpecies = Record<string, RegionSet>;
const idSet = (s: Region[]): Set<number> => new Set(s.map((r) => r.clusterId));/** Regions of `a` also present in `b` (by clusterId), preserving `a`'s order. */function intersectBy(a: Region[], b: Region[]): Region[] { const bi = idSet(b); return a.filter((r) => bi.has(r.clusterId)); }/** Regions of `a`, then regions of `b` not already in `a`. */function unionBy(a: Region[], b: Region[]): Region[] { const ai = idSet(a); return a.concat(b.filter((r) => !ai.has(r.clusterId))); }const set = (clusters: Region[], byUnion?: boolean): RegionSet => ({ totCount: clusters.length, clusters, byUnion });
function visitPostOrder(node: TreeNode, cb: (n: TreeNode) => void): void { node.children?.forEach((c) => visitPostOrder(c, cb)); cb(node);}function visitPreOrder(node: TreeNode, cb: (n: TreeNode, parent: TreeNode | null) => void, parent: TreeNode | null = null): void { cb(node, parent); node.children?.forEach((c) => visitPreOrder(c, cb, node));}
/** Bottom-up pass: leaves presence-count their regions; each internal node takes the * intersection of its children's sets, falling back to the union (flagged) if empty. */export function calcMaximumParsimonyPreliminaryPhase(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { visitPostOrder(root, (node) => { if (!node.children) { const cl = clustersPerSpecies[node.name]; node.ranges = set(cl ? cl.clusters.map((r) => ({ clusterId: r.clusterId, count: 1 })) : []); return; } const childSets = node.children.map((c) => c.ranges!.clusters); let anc = childSets.reduce((acc, s) => intersectBy(acc, s)); const byUnion = anc.length === 0; if (byUnion) anc = childSets.reduce((acc, s) => unionBy(acc, s), [] as Region[]); node.ranges = set(anc, byUnion); }); return root;}
/** Top-down pass applying Fitch's rules I–V (see the module doc). Root and leaves are * already final. Children are visited after their parent, so a node's Rule-V lookup of * its children's *preliminary* sets reads them before they are overwritten. */export function calcMaximumParsimonyFinalPhase(root: TreeNode): TreeNode { visitPreOrder(root, (node, parent) => { if (!parent || !node.children) return; const prelim = node.ranges!.clusters; const pfinal = parent.ranges!.clusters; if (intersectBy(prelim, pfinal).length === pfinal.length) { node.ranges = set(intersectBy(prelim, pfinal)); // I → II (diminished) } else if (node.ranges!.byUnion) { node.ranges = set(unionBy(prelim, pfinal)); // III → IV (expanded) } else { // III → V (encompassing): add parent regions present in ≥1 child's preliminary set. const childrenUnion = node.children.map((c) => c.ranges!.clusters).reduce((a, b) => unionBy(a, b), [] as Region[]); node.ranges = set(unionBy(prelim, intersectBy(pfinal, childrenUnion))); } }); return root;}
export function calcMaximumParsimony(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { calcMaximumParsimonyPreliminaryPhase(root, clustersPerSpecies); calcMaximumParsimonyFinalPhase(root); return root;}
/** * Sum occurrence counts up the tree into `node.clusters`, sorted by descending count (ties * broken by clusterId). A leaf's counts come from the data; an internal node's count for a * region is the sum over its descendants. Mirrors bioregions1 `_aggregateClusters`, minus * the fraction-threshold "rest" grouping. */export function aggregateClusters(root: TreeNode, clustersPerSpecies: ClustersPerSpecies): TreeNode { const sorted = (clusters: Region[], totCount: number): RegionSet => ({ totCount, clusters: [...clusters].sort((a, b) => b.count - a.count || a.clusterId - b.clusterId), }); visitPostOrder(root, (node) => { if (!node.children) { const cl = clustersPerSpecies[node.name]; const clusters = cl ? cl.clusters.map((r) => ({ clusterId: r.clusterId, count: r.count })) : []; node.clusters = sorted(clusters, clusters.reduce((s, r) => s + r.count, 0)); return; } const agg = new Map<number, number>(); let totCount = 0; for (const child of node.children) { for (const r of child.clusters!.clusters) { agg.set(r.clusterId, (agg.get(r.clusterId) ?? 0) + r.count); totCount += r.count; } } node.clusters = sorted([...agg].map(([clusterId, count]) => ({ clusterId, count })), totCount); }); return root;}
/** * Post-order aggregation of `speciesCount` = number of subtended terminals (the Fig. 3 * branch-thickness metric). With no `presence` map every leaf counts as one; with one, * a leaf counts only if present (absent species contribute 0). */export function aggregateSpeciesCount(root: TreeNode, presence?: Record<string, number>): TreeNode { visitPostOrder(root, (node) => { if (!node.children) { node.speciesCount = presence ? (presence[node.name] ? 1 : 0) : 1; return; } node.speciesCount = node.children.reduce((sum, c) => sum + (c.speciesCount ?? 0), 0); }); return root;}