Run Infomap on GraphRAG-style tablesΒΆ
GraphRAG workflows commonly build hierarchical communities with Leiden. This notebook runs Infomap on the same entities.parquet and relationships.parquet shape, writes GraphRAG-style community outputs, and compares the result with a small Leiden baseline when igraph is installed.
from pathlib import Path
import tempfile
import pandas as pd
from infomap.graphrag import read_graphrag, run_graphrag_communities
try:
import igraph as ig
except ImportError:
ig = None
work_dir = Path(tempfile.mkdtemp(prefix="infomap-graphrag-"))
input_dir = work_dir / "input"
output_dir = work_dir / "infomap"
input_dir.mkdir()
entities = pd.DataFrame(
{
"id": ["a", "b", "c", "d", "e", "f"],
"title": ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta"],
}
)
relationships = pd.DataFrame(
{
"id": ["ab", "bc", "ca", "de", "ef", "fd", "cd"],
"source": ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Gamma"],
"target": ["Beta", "Gamma", "Alpha", "Epsilon", "Zeta", "Delta", "Delta"],
"weight": [2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0],
}
)
entities.to_parquet(input_dir / "entities.parquet")
relationships.to_parquet(input_dir / "relationships.parquet")
graph = read_graphrag(
input_dir / "entities.parquet", input_dir / "relationships.parquet"
)
relationships.assign(source_node=graph.sources, target_node=graph.targets)
| id | source | target | weight | source_node | target_node | |
|---|---|---|---|---|---|---|
| 0 | ab | Alpha | Beta | 2.0 | 1 | 2 |
| 1 | bc | Beta | Gamma | 2.0 | 2 | 3 |
| 2 | ca | Gamma | Alpha | 2.0 | 3 | 1 |
| 3 | de | Delta | Epsilon | 3.0 | 4 | 5 |
| 4 | ef | Epsilon | Zeta | 3.0 | 5 | 6 |
| 5 | fd | Zeta | Delta | 3.0 | 6 | 4 |
| 6 | cd | Gamma | Delta | 1.0 | 3 | 4 |
result = run_graphrag_communities(
input_dir=input_dir,
output_dir=output_dir,
silent=True,
seed=123,
num_trials=5,
)
result.infomap.codelength
1.9831517459081185
infomap_nodes = result.nodes
infomap_nodes
| node_id | entity_id | entity_title | module_id | module_path | level | flow | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | a | Alpha | 1 | [1] | 1 | 0.12500 |
| 1 | 2 | b | Beta | 1 | [1] | 1 | 0.12500 |
| 2 | 3 | c | Gamma | 1 | [1] | 1 | 0.15625 |
| 3 | 4 | d | Delta | 2 | [2] | 1 | 0.21875 |
| 4 | 5 | e | Epsilon | 2 | [2] | 1 | 0.18750 |
| 5 | 6 | f | Zeta | 2 | [2] | 1 | 0.18750 |
infomap_communities = result.communities
infomap_communities
| id | human_readable_id | community | parent | children | level | title | entity_ids | relationship_ids | text_unit_ids | period | size | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | infomap-1 | 1 | 1 | -1 | [] | 0 | Infomap community 1 | [a, b, c] | [ab, bc, ca] | [] | None | 3 |
| 1 | infomap-2 | 2 | 2 | -1 | [] | 0 | Infomap community 2 | [d, e, f] | [de, ef, fd] | [] | None | 3 |
def _top_level_groups(communities):
return [
sorted(entity_ids)
for entity_ids in communities.loc[communities["level"] == 0, "entity_ids"]
]
comparison_rows = [
{
"method": "Infomap",
"number of communities": len(infomap_communities),
"levels": int(infomap_communities["level"].nunique()),
"largest community size": int(infomap_communities["size"].max()),
"entity groups at top level": _top_level_groups(infomap_communities),
}
]
if ig is None:
print("Install python-igraph to run the Leiden comparison.")
else:
leiden_graph = ig.Graph.TupleList(
relationships[["source", "target", "weight"]].itertuples(
index=False, name=None
),
directed=False,
edge_attrs=["weight"],
)
leiden_partition = leiden_graph.community_leiden(
weights="weight",
objective_function="modularity",
)
leiden_groups = [
sorted(leiden_graph.vs[vertex]["name"] for vertex in community)
for community in leiden_partition
]
comparison_rows.append(
{
"method": "Leiden",
"number of communities": len(leiden_partition),
"levels": 1,
"largest community size": max(len(group) for group in leiden_groups),
"entity groups at top level": leiden_groups,
}
)
pd.DataFrame(comparison_rows)
| method | number of communities | levels | largest community size | entity groups at top level | |
|---|---|---|---|---|---|
| 0 | Infomap | 2 | 1 | 3 | [[a, b, c], [d, e, f]] |
| 1 | Leiden | 2 | 1 | 3 | [[Alpha, Beta, Gamma], [Delta, Epsilon, Zeta]] |