Block propagation - 2026-03-31

Show all code

Analysis of block propagation timing relative to block size on the wire, with corrected timing that isolates network propagation from block building overhead.

Terminology:

First seen (raw): Time from slot start until the first sentry observes the block. Includes block building time + network latency.
Winning bid: Time when the MEV relay received the winning bid for the block. Marks when the block was "ready" to broadcast.
First seen (corrected): For MEV blocks with bid timing: first_seen - winning_bid. Isolates network propagation time.
Propagation spread: Time between when the first sentry saw the block and when the last sentry saw it.
Wire size: Block size after Snappy compression, as transmitted over libp2p gossipsub.

Show code

# This notebook joins two data sources:
# 1. block_propagation_by_size: block sizes and propagation timing
# 2. block_production_timeline: MEV winning bid timing
display_sql("block_propagation_by_size", target_date)

View query

WITH
-- Get MEV slot list (slots with relay payload delivery)
mev_slots AS (
    SELECT DISTINCT slot
    FROM mev_relay_proposer_payload_delivered FINAL
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-31' AND slot_start_date_time < '2026-03-31'::date + INTERVAL 1 DAY
),

-- Block metadata (size, proposer)
block_meta AS (
    SELECT DISTINCT
        slot,
        block_root AS block,
        proposer_index,
        block_total_bytes,
        block_total_bytes_compressed
    FROM canonical_beacon_block FINAL
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-31' AND slot_start_date_time < '2026-03-31'::date + INTERVAL 1 DAY
),

-- Proposer entity mapping
proposer_entity AS (
    SELECT index, entity
    FROM ethseer_validator_entity FINAL
    WHERE meta_network_name = 'mainnet'
),

-- Propagation timing aggregated across all sentries
propagation AS (
    SELECT
        slot,
        block,
        min(propagation_slot_start_diff) AS first_seen_ms,
        max(propagation_slot_start_diff) AS last_seen_ms,
        quantile(0.5)(propagation_slot_start_diff) AS median_ms,
        count() AS sentry_count
    FROM libp2p_gossipsub_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-03-31' AND slot_start_date_time < '2026-03-31'::date + INTERVAL 1 DAY
      AND propagation_slot_start_diff < 12000
    GROUP BY slot, block
)

SELECT
    p.slot AS slot,
    bm.block_total_bytes AS uncompressed_bytes,
    bm.block_total_bytes_compressed AS compressed_bytes,
    bm.proposer_index,
    coalesce(pe.entity, 'Unknown') AS proposer_entity,
    -- Use IN for reliable MEV detection on distributed tables
    if(p.slot GLOBAL IN mev_slots, 'MEV', 'Local') AS builder_type,
    p.first_seen_ms AS first_seen_ms,
    p.last_seen_ms AS last_seen_ms,
    p.median_ms AS median_ms,
    p.sentry_count AS sentry_count
FROM propagation p
GLOBAL LEFT JOIN block_meta bm ON p.slot = bm.slot AND p.block = bm.block
GLOBAL LEFT JOIN proposer_entity pe ON bm.proposer_index = pe.index
WHERE bm.block_total_bytes IS NOT NULL
ORDER BY p.slot

Show code

# Load both datasets and join on slot
df_size = load_parquet("block_propagation_by_size", target_date)
df_timeline = load_parquet("block_production_timeline", target_date)

# Join: size data + winning bid timing from timeline
df = df_size.merge(
    df_timeline[["slot", "winning_bid_ms"]],
    on="slot",
    how="left"
)

# Add derived columns
df["spread_ms"] = df["last_seen_ms"] - df["first_seen_ms"]
df["compression_ratio"] = df["uncompressed_bytes"] / df["compressed_bytes"]
df["compressed_kib"] = df["compressed_bytes"] / 1024
df["uncompressed_kib"] = df["uncompressed_bytes"] / 1024

# Corrected first seen: subtract winning bid time for MEV blocks with bid timing
df["corrected_first_seen_ms"] = np.where(
    (df["builder_type"] == "MEV") & df["winning_bid_ms"].notna(),
    df["first_seen_ms"] - df["winning_bid_ms"],
    df["first_seen_ms"]
)

# Size buckets for binning (in KiB)
df["size_bucket"] = pd.cut(
    df["compressed_kib"],
    bins=[0, 50, 100, 150, float("inf")],
    labels=["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
)
SIZE_ORDER = ["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]

# Builder category with 3 levels
def categorize_builder(row):
    if row["builder_type"] == "Local":
        return "Local"
    elif pd.notna(row["winning_bid_ms"]):
        return "MEV (with bid timing)"
    else:
        return "MEV (no bid timing)"

df["builder_category"] = df.apply(categorize_builder, axis=1)

# Category ordering and colors (excluding "MEV (no bid timing)" from plots)
CATEGORY_ORDER = ["Local", "MEV (with bid timing)"]
CATEGORY_COLORS = {
    "Local": "#3498db",
    "MEV (with bid timing)": "#9b59b6",
}

# Summary
print(f"Total blocks: {len(df):,}")
for cat in ["Local", "MEV (with bid timing)"]:
    count = (df["builder_category"] == cat).sum()
    pct = count / len(df) * 100
    print(f"  {cat}: {count:,} ({pct:.1f}%)")

# Info: MEV blocks without bid timing (excluded from category-based plots)
mev_no_bid = (df["builder_category"] == "MEV (no bid timing)").sum()
if mev_no_bid > 0:
    print(f"\nNote: {mev_no_bid:,} MEV blocks ({mev_no_bid/len(df)*100:.1f}%) lack bid timing data and are excluded from builder category comparisons.")

Total blocks: 5,393
  Local: 385 (7.1%)
  MEV (with bid timing): 2,187 (40.6%)

Note: 2,821 MEV blocks (52.3%) lack bid timing data and are excluded from builder category comparisons.

Size distribution by builder type¶

Histogram comparing the block size distribution between MEV and local blocks. MEV blocks tend to be larger due to MEV extraction strategies.

Show code

fig = px.histogram(
    df,
    x="compressed_kib",
    color="builder_type",
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    nbins=50,
    barmode="overlay",
    opacity=0.7,
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Block size on wire (KiB)"),
    yaxis=dict(title="Block count"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

# Summary
for bt in ["Local", "MEV"]:
    subset = df[df["builder_type"] == bt]
    print(f"{bt}: median size {subset['compressed_kib'].median():.1f} KiB, "
          f"mean {subset['compressed_kib'].mean():.1f} KiB")

Local: median size 50.8 KiB, mean 59.2 KiB
MEV: median size 83.5 KiB, mean 89.0 KiB

Compression ratio¶

Scatter plot showing the relationship between uncompressed SSZ block size and compressed (snappy) wire size. The dashed line shows the linear regression; the dotted line shows 1:1 (no compression).

Show code

fig = go.Figure()

# Scatter points colored by builder type
for bt, color in [("Local", "#3498db"), ("MEV", "#9b59b6")]:
    subset = df[df["builder_type"] == bt]
    fig.add_trace(go.Scatter(
        x=subset["uncompressed_kib"],
        y=subset["compressed_kib"],
        mode="markers",
        name=bt,
        marker=dict(color=color, opacity=0.4, size=5),
        hovertemplate="<b>Slot %{text}</b><br>Uncompressed: %{x:.1f} KiB<br>Compressed: %{y:.1f} KiB<extra></extra>",
        text=subset["slot"],
    ))

# Regression line (all data)
slope, intercept, r_value, p_value, std_err = stats.linregress(
    df["uncompressed_kib"], df["compressed_kib"]
)
x_range = np.array([df["uncompressed_kib"].min(), df["uncompressed_kib"].max()])
y_pred = slope * x_range + intercept

fig.add_trace(go.Scatter(
    x=x_range,
    y=y_pred,
    mode="lines",
    name=f"Regression (R\u00b2={r_value**2:.3f})",
    line=dict(color="#2ecc71", width=2, dash="dash"),
))

# 1:1 reference line (no compression)
fig.add_trace(go.Scatter(
    x=x_range,
    y=x_range,
    mode="lines",
    name="1:1 (no compression)",
    line=dict(color="gray", width=1, dash="dot"),
))

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Uncompressed block size (KiB)"),
    yaxis=dict(title="Compressed block size (KiB, on wire)"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Print compression stats
print(f"Compression ratio: mean {df['compression_ratio'].mean():.2f}x, "
      f"median {df['compression_ratio'].median():.2f}x")
print(f"Regression: compressed = {slope:.3f} \u00d7 uncompressed + {intercept:.1f}")
print(f"R\u00b2 = {r_value**2:.4f}")

Compression ratio: mean 2.12x, median 2.08x
Regression: compressed = 0.454 × uncompressed + 4.6
R² = 0.8809

MEV timing breakdown¶

For MEV blocks with bid timing data, we can decompose the raw first seen time into block building time (winning bid) and network propagation time.

Winning bid timing distribution¶

Distribution of winning bid timing (ms from slot start) for MEV blocks. This shows when blocks are "ready" to broadcast.

Show code

df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"]

fig = px.histogram(
    df_mev_bid,
    x="winning_bid_ms",
    nbins=50,
    color_discrete_sequence=["#9b59b6"],
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Winning bid (ms from slot start)"),
    yaxis=dict(title="Block count"),
    height=400,
)
fig.show(config={"responsive": True})

print(f"Winning bid timing (n={len(df_mev_bid):,}):")
print(f"  Median: {df_mev_bid['winning_bid_ms'].median():.0f}ms")
print(f"  P5-P95: {df_mev_bid['winning_bid_ms'].quantile(0.05):.0f}ms - {df_mev_bid['winning_bid_ms'].quantile(0.95):.0f}ms")

Winning bid timing (n=2,187):
  Median: 956ms
  P5-P95: 145ms - 2541ms

Block building vs network time¶

Stacked bar showing the breakdown of first seen into block building time (winning bid) and network propagation time (corrected first seen) for MEV blocks.

Show code

df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"].copy()

# Aggregate by size bucket
breakdown = df_mev_bid.groupby("size_bucket", observed=True).agg(
    building_time=("winning_bid_ms", "median"),
    network_time=("corrected_first_seen_ms", "median"),
    count=("slot", "count"),
).reset_index()

fig = go.Figure()

fig.add_trace(go.Bar(
    y=breakdown["size_bucket"],
    x=breakdown["building_time"],
    name="Block building (winning bid)",
    orientation="h",
    marker_color="#e74c3c",
))

fig.add_trace(go.Bar(
    y=breakdown["size_bucket"],
    x=breakdown["network_time"],
    name="Network propagation",
    orientation="h",
    marker_color="#2ecc71",
))

fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Time (ms, median)"),
    yaxis=dict(title="Block size on wire (KiB)", categoryorder="array", categoryarray=SIZE_ORDER[::-1]),
    barmode="stack",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

# Print breakdown
print("Median timing breakdown (MEV blocks with bid timing):")
for _, row in breakdown.iterrows():
    total = row["building_time"] + row["network_time"]
    pct_building = row["building_time"] / total * 100
    print(f"  {row['size_bucket']}: {row['building_time']:.0f}ms building ({pct_building:.0f}%) + "
          f"{row['network_time']:.0f}ms network = {total:.0f}ms total (n={row['count']:,})")

Median timing breakdown (MEV blocks with bid timing):
  < 50 KiB: 962ms building (64%) + 530ms network = 1491ms total (n=270)
  50-100 KiB: 953ms building (62%) + 578ms network = 1531ms total (n=992)
  100-150 KiB: 974ms building (63%) + 576ms network = 1551ms total (n=660)
  >= 150 KiB: 943ms building (61%) + 602ms network = 1545ms total (n=265)

Raw vs corrected comparison¶

Comparison of raw first seen (from slot start) vs corrected first seen (from winning bid) for MEV blocks. The corrected metric isolates network propagation time.

Show code

df_mev_bid = df[df["builder_category"] == "MEV (with bid timing)"].copy()

fig = go.Figure()

# Raw first seen
fig.add_trace(go.Box(
    y=df_mev_bid["first_seen_ms"],
    name="Raw (from slot start)",
    marker_color="#9b59b6",
    boxmean=True,
))

# Corrected first seen
fig.add_trace(go.Box(
    y=df_mev_bid["corrected_first_seen_ms"],
    name="Corrected (from winning bid)",
    marker_color="#2ecc71",
    boxmean=True,
))

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    yaxis=dict(title="First seen (ms)"),
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=450,
)
fig.show(config={"responsive": True})

print(f"MEV blocks with bid timing (n={len(df_mev_bid):,}):")
print(f"  Raw first seen: median {df_mev_bid['first_seen_ms'].median():.0f}ms, P95 {df_mev_bid['first_seen_ms'].quantile(0.95):.0f}ms")
print(f"  Corrected: median {df_mev_bid['corrected_first_seen_ms'].median():.0f}ms, P95 {df_mev_bid['corrected_first_seen_ms'].quantile(0.95):.0f}ms")
print(f"  Winning bid: median {df_mev_bid['winning_bid_ms'].median():.0f}ms")

MEV blocks with bid timing (n=2,187):
  Raw first seen: median 1573ms, P95 3088ms
  Corrected: median 577ms, P95 1737ms
  Winning bid: median 956ms

Corrected first seen vs block size¶

Scatter plot using corrected first seen. For MEV blocks with bid timing, this shows pure network propagation time. For Local blocks and MEV blocks without bid timing, this equals raw first seen.

Show code

# Filter to categories we want to plot and sort so Local points render last (on top)
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)].copy()
render_order = {"MEV (with bid timing)": 0, "Local": 1}
df_sorted = df_plot.sort_values("builder_category", key=lambda x: x.map(render_order))

fig = px.scatter(
    df_sorted,
    x="corrected_first_seen_ms",
    y="compressed_kib",
    color="builder_category",
    category_orders={"builder_category": CATEGORY_ORDER},
    color_discrete_map=CATEGORY_COLORS,
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "first_seen_ms": ":.0f", "corrected_first_seen_ms": ":.0f"},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder category",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

Corrected first seen by size bucket¶

Box plot comparing corrected first seen between builder categories across size buckets.

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

# Filter to categories we want to plot
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)]

fig = px.box(
    df_plot,
    y="size_bucket",
    x="corrected_first_seen_ms",
    color="builder_category",
    orientation="h",
    category_orders={"size_bucket": SIZE_ORDER[::-1], "builder_category": CATEGORY_ORDER},
    color_discrete_map=CATEGORY_COLORS,
)
fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder category",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Summary stats
for cat in CATEGORY_ORDER:
    subset = df[df["builder_category"] == cat]
    if len(subset) > 0:
        print(f"{cat}: median {subset['corrected_first_seen_ms'].median():.0f}ms, "
              f"P95 {subset['corrected_first_seen_ms'].quantile(0.95):.0f}ms, n={len(subset):,}")

Local: median 1434ms, P95 2700ms, n=385
MEV (with bid timing): median 577ms, P95 1737ms, n=2,187

Corrected first seen density by builder type¶

Density heatmaps showing the distribution of corrected first seen timing vs block size for Local and MEV blocks.

Show code

# Calculate axis ranges (trim outliers)
x_max = df["corrected_first_seen_ms"].quantile(0.99)
y_max = df["compressed_kib"].quantile(0.99)

# Filter to categories with enough data for meaningful heatmaps
df_heatmap = df[df["builder_category"].isin(["Local", "MEV (with bid timing)"])]

fig = px.density_heatmap(
    df_heatmap,
    x="corrected_first_seen_ms",
    y="compressed_kib",
    facet_col="builder_category",
    facet_col_spacing=0.08,
    category_orders={"builder_category": ["Local", "MEV (with bid timing)"]},
    nbinsx=40,
    nbinsy=40,
    range_x=[0, x_max],
    range_y=[0, y_max],
    color_continuous_scale="Plasma",
)
fig.update_layout(
    margin=dict(l=60, r=30, t=40, b=60),
    height=450,
    coloraxis_colorbar=dict(title="Count"),
)
fig.for_each_annotation(lambda a: a.update(
    text=a.text.replace("builder_category=", ""),
    font_size=12,
))
fig.for_each_xaxis(lambda x: x.update(title="Corrected first seen (ms)"))
fig.for_each_yaxis(lambda y: y.update(title="Wire size (KiB)"))
fig.show(config={"responsive": True})

Regional propagation analysis¶

Comparison of block first-seen timing across geographic regions from two data sources:

Sentries: EthPandaOps libp2p gossipsub monitoring (~50-100 globally distributed nodes)
Contributoor: Community beacon API event collection (~875 nodes, primarily data centers)

Both sources capture when blocks are first observed by nodes in each region. Contributoor nodes tend to show faster times due to being primarily in well-connected data centers.

Show code

# Load regional propagation data from both sources
try:
    df_region_sentries = load_parquet("block_propagation_by_region", target_date)
    has_sentries = True
except FileNotFoundError:
    has_sentries = False
    print("Note: Sentries regional data not available")

try:
    df_region_contributoor = load_parquet("block_propagation_by_region_contributoor", target_date)
    has_contributoor = True
except FileNotFoundError:
    has_contributoor = False
    print("Note: Contributoor regional data not available")

REGION_LABELS = {"EU": "Europe", "NA": "North America", "AS": "Asia", "OC": "Oceania"}
REGION_ORDER = ["EU", "NA", "AS", "OC"]

def add_region_derived_columns(df_in):
    """Add size bucket and corrected timing columns matching the main notebook."""
    df_out = df_in.copy()
    
    # Size buckets
    df_out["compressed_kib"] = df_out["compressed_bytes"] / 1024
    df_out["size_bucket"] = pd.cut(
        df_out["compressed_kib"],
        bins=[0, 50, 100, 150, float("inf")],
        labels=SIZE_ORDER
    )
    df_out["region_label"] = df_out["region"].map(REGION_LABELS)
    
    # Join with timeline data to get winning bid timing
    df_out = df_out.merge(
        df_timeline[["slot", "winning_bid_ms"]],
        on="slot",
        how="left"
    )
    
    # Corrected first seen: subtract winning bid time for MEV blocks with bid timing
    df_out["corrected_first_seen_ms"] = np.where(
        (df_out["builder_type"] == "MEV") & df_out["winning_bid_ms"].notna(),
        df_out["first_seen_ms"] - df_out["winning_bid_ms"],
        df_out["first_seen_ms"]
    )
    
    return df_out

if has_sentries:
    df_region_sentries = add_region_derived_columns(df_region_sentries)
if has_contributoor:
    df_region_contributoor = add_region_derived_columns(df_region_contributoor)

# Print summary
for name, df_r, has_data in [
    ("Sentries", df_region_sentries if has_sentries else None, has_sentries),
    ("Contributoor", df_region_contributoor if has_contributoor else None, has_contributoor),
]:
    if has_data:
        print(f"\n{name}:")
        for region in REGION_ORDER:
            r = df_r[df_r["region"] == region]
            print(f"  {REGION_LABELS[region]}: {len(r):,} slot-regions, "
                  f"median corrected first seen {r['corrected_first_seen_ms'].median():.0f}ms")

Sentries:
  Europe: 5,393 slot-regions, median corrected first seen 1358ms
  North America: 5,393 slot-regions, median corrected first seen 1477ms
  Asia: 5,393 slot-regions, median corrected first seen 1548ms
  Oceania: 5,393 slot-regions, median corrected first seen 1540ms

Contributoor:
  Europe: 11,418 slot-regions, median corrected first seen 1361ms
  North America: 11,418 slot-regions, median corrected first seen 1388ms
  Asia: 11,418 slot-regions, median corrected first seen 1468ms
  Oceania: 11,418 slot-regions, median corrected first seen 1516ms

Regional timing distribution (corrected)¶

Box plots showing corrected first seen timing distribution by region, comparing Sentries (libp2p) and Contributoor (beacon API) data sources. Faceted by builder type (MEV vs Local).

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

if has_sentries or has_contributoor:
    # Combine regional data with source labels
    dfs_to_concat = []
    if has_sentries:
        df_s = df_region_sentries.copy()
        df_s["source"] = "Sentries"
        dfs_to_concat.append(df_s)
    if has_contributoor:
        df_c = df_region_contributoor.copy()
        df_c["source"] = "Contributoor"
        dfs_to_concat.append(df_c)
    
    df_regional_combined = pd.concat(dfs_to_concat, ignore_index=True)
    df_regional_combined["region_label"] = df_regional_combined["region"].map(REGION_LABELS)
    
    fig = px.box(
        df_regional_combined,
        x="region_label",
        y="corrected_first_seen_ms",
        color="source",
        facet_col="builder_type",
        color_discrete_map={"Sentries": "#3498db", "Contributoor": "#2ecc71"},
        category_orders={
            "region_label": [REGION_LABELS[r] for r in REGION_ORDER],
            "builder_type": ["MEV", "Local"],
        },
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=40, b=60),
        xaxis_title="Region",
        yaxis_title="Corrected first seen (ms)",
        legend_title="Data source",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=450,
    )
    fig.for_each_annotation(lambda a: a.update(text=a.text.replace("builder_type=", "")))
    fig.show(config={"responsive": True})
    
    # Print summary stats
    print("Corrected first seen by region (ms):")
    for bt in ["MEV", "Local"]:
        print(f"\n  {bt}:")
        for region in REGION_ORDER:
            for source in ["Sentries", "Contributoor"]:
                subset = df_regional_combined[
                    (df_regional_combined["region"] == region) & 
                    (df_regional_combined["source"] == source) &
                    (df_regional_combined["builder_type"] == bt)
                ]["corrected_first_seen_ms"]
                if len(subset) > 0:
                    print(f"    {REGION_LABELS[region]} ({source}): "
                          f"P50={subset.median():.0f}ms, P95={subset.quantile(0.95):.0f}ms (n={len(subset):,})")
else:
    print("No regional data available")

Corrected first seen by region (ms):

  MEV:
    Europe (Sentries): P50=1350ms, P95=2844ms (n=5,008)
    Europe (Contributoor): P50=1353ms, P95=2845ms (n=10,597)
    North America (Sentries): P50=1471ms, P95=2962ms (n=5,008)
    North America (Contributoor): P50=1381ms, P95=2899ms (n=10,597)
    Asia (Sentries): P50=1544ms, P95=3050ms (n=5,008)
    Asia (Contributoor): P50=1465ms, P95=2966ms (n=10,597)
    Oceania (Sentries): P50=1535ms, P95=3040ms (n=5,008)
    Oceania (Contributoor): P50=1513ms, P95=3031ms (n=10,597)

  Local:
    Europe (Sentries): P50=1436ms, P95=2700ms (n=385)
    Europe (Contributoor): P50=1434ms, P95=2675ms (n=821)
    North America (Sentries): P50=1559ms, P95=2778ms (n=385)
    North America (Contributoor): P50=1489ms, P95=2723ms (n=821)
    Asia (Sentries): P50=1584ms, P95=2817ms (n=385)
    Asia (Contributoor): P50=1522ms, P95=2698ms (n=821)
    Oceania (Sentries): P50=1594ms, P95=2782ms (n=385)
    Oceania (Contributoor): P50=1585ms, P95=2732ms (n=821)

Regional CDF comparison by size (corrected timing)¶

Cumulative distribution functions (CDFs) showing corrected first seen timing by geographic region, faceted by block size bucket and builder type.

How to read these charts:

X-axis: Corrected first seen time in milliseconds. For MEV blocks, this is first_seen - winning_bid (isolating network propagation). For Local blocks, this equals raw first seen.
Y-axis: Percentile (0-100%). A point at (300ms, 50%) means 50% of blocks were seen within 300ms.
Steeper curves = faster, more consistent propagation. The curve climbing quickly to 100% indicates tight timing.
Right-shifted curves = slower propagation in that region.
P50 line (horizontal dotted): Where curves cross this line shows median timing per region.

Visual encoding:

Color = Geographic region (EU=blue, NA=green, AS=red, OC=orange)
Line style = Data source (solid=Sentries libp2p gossipsub, dashed=Contributoor beacon API)
Columns = Builder type (MEV vs Local)
Rows = Block size bucket (smallest at top, largest at bottom)

What to look for:

Do larger blocks show more spread between regions? (Size impact on propagation)
Does Contributoor (dashed) consistently show faster times than Sentries (solid)? (Data center vs diverse node placement)
Which regions lag behind as block size increases? (Geographic disadvantage for large blocks)

Show code

if has_sentries or has_contributoor:
    from plotly.subplots import make_subplots
    
    REGION_COLORS = {
        "EU": "#3498db",  # Blue
        "NA": "#2ecc71",  # Green
        "AS": "#e74c3c",  # Red
        "OC": "#f39c12",  # Orange
    }
    
    SOURCE_DASH = {
        "Sentries": "solid",
        "Contributoor": "dash",
    }
    
    BUILDER_TYPES = ["MEV", "Local"]
    
    # Create 4x2 subplots: rows = size buckets, cols = builder types
    fig = make_subplots(
        rows=len(SIZE_ORDER), cols=len(BUILDER_TYPES),
        subplot_titles=[f"{bt}" for bt in BUILDER_TYPES] + [""] * (len(SIZE_ORDER) - 1) * 2,
        row_titles=SIZE_ORDER,
        horizontal_spacing=0.06,
        vertical_spacing=0.06,
    )
    
    percentiles = np.arange(0, 101, 1)
    
    def add_cdf_traces(df_r, source_name, builder_type, size_bucket, row, col, show_legend):
        subset = df_r[
            (df_r["builder_type"] == builder_type) & 
            (df_r["size_bucket"] == size_bucket)
        ]
        for region in REGION_ORDER:
            region_data = subset[subset["region"] == region]["corrected_first_seen_ms"]
            if len(region_data) >= 10:  # Need enough data for meaningful CDF
                values = np.percentile(region_data, percentiles)
                fig.add_trace(
                    go.Scatter(
                        x=values,
                        y=percentiles,
                        mode="lines",
                        name=f"{REGION_LABELS[region]} ({source_name})",
                        line=dict(
                            color=REGION_COLORS[region], 
                            width=2,
                            dash=SOURCE_DASH[source_name],
                        ),
                        showlegend=show_legend,
                        legendgroup=f"{region}_{source_name}",
                        hovertemplate=f"{REGION_LABELS[region]} ({source_name})<br>%{{x:.0f}}ms = P%{{y}}<extra></extra>",
                    ),
                    row=row, col=col,
                )
    
    # Add traces for each combination
    for row_idx, size_bucket in enumerate(SIZE_ORDER, 1):
        for col_idx, builder_type in enumerate(BUILDER_TYPES, 1):
            # Only show legend on first subplot
            show_legend = (row_idx == 1 and col_idx == 1)
            
            if has_sentries:
                add_cdf_traces(df_region_sentries, "Sentries", builder_type, size_bucket, row_idx, col_idx, show_legend)
            if has_contributoor:
                add_cdf_traces(df_region_contributoor, "Contributoor", builder_type, size_bucket, row_idx, col_idx, show_legend)
            
            # Add P50 reference line
            fig.add_hline(y=50, line_dash="dot", line_color="gray", line_width=1, row=row_idx, col=col_idx)
    
    fig.update_layout(
        margin=dict(l=100, r=30, t=50, b=60),
        height=1100,
        legend=dict(
            orientation="h", 
            yanchor="bottom", 
            y=1.02, 
            xanchor="center", 
            x=0.5,
            font_size=10,
        ),
    )
    
    # Update axes
    for col in [1, 2]:
        fig.update_xaxes(title_text="Corrected first seen (ms)", row=len(SIZE_ORDER), col=col)
    fig.update_yaxes(title_text="Percentile", col=1)
    
    fig.show(config={"responsive": True})
    
    # Print summary statistics
    print("Line styles: solid = Sentries (libp2p), dashed = Contributoor (beacon API)")
    print("\nP50 (median) timing by size bucket and builder type:\n")
    for size_bucket in SIZE_ORDER:
        print(f"  {size_bucket}:")
        for bt in BUILDER_TYPES:
            for source, df_r, has_data in [
                ("Sentries", df_region_sentries if has_sentries else None, has_sentries),
                ("Contributoor", df_region_contributoor if has_contributoor else None, has_contributoor),
            ]:
                if has_data:
                    subset = df_r[(df_r["builder_type"] == bt) & (df_r["size_bucket"] == size_bucket)]
                    if len(subset) >= 10:
                        medians = []
                        for region in REGION_ORDER:
                            r = subset[subset["region"] == region]["corrected_first_seen_ms"]
                            if len(r) > 0:
                                medians.append(f"{REGION_LABELS[region][:2]}:{r.median():.0f}")
                        if medians:
                            print(f"    {bt} ({source}): {', '.join(medians)} ms")
else:
    print("No regional data available")

Line styles: solid = Sentries (libp2p), dashed = Contributoor (beacon API)

P50 (median) timing by size bucket and builder type:

  < 50 KiB:
    MEV (Sentries): Eu:1376, No:1448, As:1504, Oc:1506 ms
    MEV (Contributoor): Eu:1373, No:1383, As:1447, Oc:1483 ms
    Local (Sentries): Eu:1363, No:1444, As:1463, Oc:1458 ms
    Local (Contributoor): Eu:1364, No:1359, As:1417, Oc:1428 ms
  50-100 KiB:
    MEV (Sentries): Eu:1361, No:1476, As:1546, Oc:1537 ms
    MEV (Contributoor): Eu:1362, No:1387, As:1469, Oc:1515 ms
    Local (Sentries): Eu:1501, No:1628, As:1680, Oc:1720 ms
    Local (Contributoor): Eu:1498, No:1560, As:1617, Oc:1701 ms
  100-150 KiB:
    MEV (Sentries): Eu:1326, No:1474, As:1570, Oc:1550 ms
    MEV (Contributoor): Eu:1328, No:1372, As:1474, Oc:1532 ms
    Local (Sentries): Eu:1580, No:1733, As:1825, Oc:1842 ms
    Local (Contributoor): Eu:1580, No:1653, As:1734, Oc:1802 ms
  >= 150 KiB:
    MEV (Sentries): Eu:1265, No:1464, As:1535, Oc:1550 ms
    MEV (Contributoor): Eu:1279, No:1341, As:1443, Oc:1496 ms
    Local (Sentries): Eu:1742, No:1911, As:1946, Oc:1890 ms
    Local (Contributoor): Eu:1737, No:1813, As:1742, Oc:1840 ms

First-seen "winner" by region (corrected timing)¶

For each slot, which region observed the block first using corrected timing? This shows the percentage of slots where each region was the first to see the block after accounting for block building time.

Show code

if has_sentries or has_contributoor:
    def compute_winner_stats(df_r, source_name):
        """Compute which region saw each slot first using corrected timing."""
        # Reset index, sort by slot and corrected first seen
        df_sorted = df_r.reset_index(drop=True).sort_values(["slot", "corrected_first_seen_ms"])
        # Keep first (fastest) per slot
        winner_per_slot = df_sorted.drop_duplicates(subset="slot", keep="first")
        
        # Count wins per region
        region_wins = winner_per_slot["region"].value_counts()
        total_slots = winner_per_slot["slot"].nunique()
        
        rows = []
        for region in REGION_ORDER:
            wins = region_wins.get(region, 0)
            rows.append({
                "source": source_name,
                "region": region,
                "region_label": REGION_LABELS[region],
                "win_count": wins,
                "win_pct": wins / total_slots * 100 if total_slots > 0 else 0,
            })
        return rows
    
    rows = []
    if has_sentries:
        rows.extend(compute_winner_stats(df_region_sentries, "Sentries"))
    if has_contributoor:
        rows.extend(compute_winner_stats(df_region_contributoor, "Contributoor"))
    
    df_winners = pd.DataFrame(rows)
    
    fig = px.bar(
        df_winners,
        x="region_label",
        y="win_pct",
        color="source",
        barmode="group",
        color_discrete_map={"Sentries": "#3498db", "Contributoor": "#2ecc71"},
        category_orders={"region_label": [REGION_LABELS[r] for r in REGION_ORDER]},
        text=df_winners["win_pct"].apply(lambda x: f"{x:.1f}%"),
    )
    fig.update_traces(textposition="outside")
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis_title="Region",
        yaxis_title="% of slots first seen (corrected)",
        legend_title="Data source",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=400,
    )
    fig.show(config={"responsive": True})
    
    # Print summary
    print("Region 'wins' (first to see block, corrected timing):")
    for source in ["Sentries", "Contributoor"]:
        subset = df_winners[df_winners["source"] == source]
        if len(subset) > 0:
            print(f"\n  {source}:")
            for _, row in subset.sort_values("win_pct", ascending=False).iterrows():
                print(f"    {row['region_label']}: {row['win_pct']:.1f}% ({row['win_count']:,} slots)")
else:
    print("No regional data available")

Region 'wins' (first to see block, corrected timing):

  Sentries:
    Europe: 95.2% (5,133 slots)
    Asia: 2.0% (108 slots)
    North America: 1.9% (100 slots)
    Oceania: 1.0% (52 slots)

  Contributoor:
    Europe: 65.7% (3,544 slots)
    North America: 26.6% (1,436 slots)
    Asia: 6.5% (351 slots)
    Oceania: 1.1% (62 slots)

Region × size interaction (corrected timing)¶

Median corrected first seen timing by region and block size bucket. Shows whether larger blocks disproportionately impact certain regions after accounting for block building time.

Show code

if has_sentries or has_contributoor:
    def compute_region_size_matrix(df_r):
        """Compute median corrected first seen by region and size bucket."""
        return df_r.groupby(["region", "size_bucket"], observed=True)["corrected_first_seen_ms"].median().unstack()
    
    # Compute matrices for available sources
    matrices = {}
    if has_sentries:
        matrices["Sentries"] = compute_region_size_matrix(df_region_sentries)
    if has_contributoor:
        matrices["Contributoor"] = compute_region_size_matrix(df_region_contributoor)
    
    # Create side-by-side heatmaps
    n_sources = len(matrices)
    fig = make_subplots(
        rows=1, cols=n_sources,
        subplot_titles=list(matrices.keys()),
        horizontal_spacing=0.1,
    )
    
    # Shared color scale
    all_values = np.concatenate([m.values.flatten() for m in matrices.values()])
    all_values = all_values[~np.isnan(all_values)]
    vmin, vmax = np.percentile(all_values, [5, 95])
    
    for i, (source, matrix) in enumerate(matrices.items(), 1):
        # Reorder rows to match REGION_ORDER
        matrix = matrix.reindex(REGION_ORDER)
        
        fig.add_trace(
            go.Heatmap(
                z=matrix.values,
                x=[str(c) for c in matrix.columns],
                y=[REGION_LABELS[r] for r in matrix.index],
                colorscale="Plasma",
                zmin=vmin,
                zmax=vmax,
                text=matrix.values.round(0).astype(int),
                texttemplate="%{text}",
                textfont={"size": 11},
                showscale=(i == n_sources),
                colorbar=dict(title="ms") if i == n_sources else None,
                hovertemplate="Region: %{y}<br>Size: %{x}<br>Median: %{z:.0f}ms<extra></extra>",
            ),
            row=1, col=i,
        )
    
    fig.update_layout(
        margin=dict(l=100, r=30, t=60, b=60),
        height=350,
    )
    fig.update_xaxes(title_text="Block size on wire", row=1)
    fig.update_yaxes(title_text="Region", col=1)
    fig.show(config={"responsive": True})
    
    # Print the data
    print("Median corrected first seen (ms) by region and size bucket:\n")
    for source, matrix in matrices.items():
        print(f"{source}:")
        matrix = matrix.reindex(REGION_ORDER)
        for region in REGION_ORDER:
            row = matrix.loc[region]
            values = ", ".join([f"{row[c]:.0f}" for c in SIZE_ORDER if c in row.index and pd.notna(row[c])])
            print(f"  {REGION_LABELS[region]}: {values}")
        print()
else:
    print("No regional data available")

Median corrected first seen (ms) by region and size bucket:

Sentries:
  Europe: 1374, 1369, 1335, 1293
  North America: 1446, 1481, 1481, 1486
  Asia: 1494, 1552, 1583, 1564
  Oceania: 1495, 1543, 1562, 1584

Contributoor:
  Europe: 1372, 1369, 1343, 1293
  North America: 1379, 1395, 1390, 1356
  Asia: 1440, 1472, 1480, 1465
  Oceania: 1474, 1520, 1539, 1528

Propagation spread¶

Propagation spread is the time between when the first sentry saw the block and when the last sentry saw it. Larger blocks should take longer to propagate across all sentries.

Spread by size (box plot)¶

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

Show code

fig = px.box(
    df,
    y="size_bucket",
    x="spread_ms",
    color="builder_type",
    orientation="h",
    category_orders={"size_bucket": SIZE_ORDER[::-1], "builder_type": ["MEV", "Local"]},
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
)
fig.update_layout(
    margin=dict(l=100, r=30, t=30, b=60),
    xaxis=dict(title="Propagation spread (last seen - first seen, ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

Spread vs size (scatter)¶

Scatter view showing individual blocks.

Show code

fig = px.scatter(
    df,
    x="spread_ms",
    y="compressed_kib",
    color="builder_type",
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "spread_ms": ":.0f", "corrected_first_seen_ms": ":.0f"},
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Propagation spread (ms)"),
    yaxis=dict(title="Block size on wire (KiB)"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

Corrected first seen by proposer entity¶

Top proposer entities ranked by median corrected first seen timing. Diamond markers show P95 timing.

Show code

entity_stats = df.groupby("proposer_entity").agg(
    block_count=("slot", "count"),
    p50_ms=("corrected_first_seen_ms", "median"),
    p75_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.75)),
    p90_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.90)),
    p95_ms=("corrected_first_seen_ms", lambda x: x.quantile(0.95)),
    mev_pct=("builder_type", lambda x: (x == "MEV").mean() * 100),
).reset_index()

# Filter to entities with 20+ blocks, get top 20 by block count
entity_stats = entity_stats[entity_stats["block_count"] >= 20]
top_by_count = entity_stats.nlargest(20, "block_count").sort_values("p50_ms")

fig = go.Figure()

# P50 bars (median)
fig.add_trace(go.Bar(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p50_ms"],
    orientation="h",
    name="P50 (median)",
    marker_color="#3498db",
    text=top_by_count["block_count"].apply(lambda x: f"{x:,}"),
    textposition="outside",
    hovertemplate="<b>%{y}</b><br>P50: %{x:.0f}ms<br>Blocks: %{text}<extra></extra>",
))

# P75 markers
fig.add_trace(go.Scatter(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p75_ms"],
    mode="markers",
    name="P75",
    marker=dict(color="#f39c12", size=8, symbol="circle"),
    hovertemplate="<b>%{y}</b><br>P75: %{x:.0f}ms<extra></extra>",
))

# P90 markers
fig.add_trace(go.Scatter(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p90_ms"],
    mode="markers",
    name="P90",
    marker=dict(color="#e67e22", size=8, symbol="square"),
    hovertemplate="<b>%{y}</b><br>P90: %{x:.0f}ms<extra></extra>",
))

# P95 markers
fig.add_trace(go.Scatter(
    y=top_by_count["proposer_entity"],
    x=top_by_count["p95_ms"],
    mode="markers",
    name="P95",
    marker=dict(color="#e74c3c", size=8, symbol="diamond"),
    hovertemplate="<b>%{y}</b><br>P95: %{x:.0f}ms<extra></extra>",
))

fig.update_layout(
    margin=dict(l=150, r=60, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title=""),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=600,
    barmode="overlay",
)
fig.show(config={"responsive": True})

Top 10 proposer entities (density)¶

Corrected first seen vs block size density for MEV blocks, faceted by top 10 proposer entities (by block count, descending).

Show code

df_mev = df[df["builder_type"] == "MEV"].copy()

# Replace empty/null proposer entities with "(unknown)"
df_mev["proposer_entity"] = df_mev["proposer_entity"].fillna("(unknown)").replace("", "(unknown)")

# Get top 10 entities by block count (value_counts returns descending order)
entity_counts = df_mev["proposer_entity"].value_counts().head(10)
top_entities = entity_counts.index.tolist()
df_top = df_mev[df_mev["proposer_entity"].isin(top_entities)].copy()

# Create legend labels with block counts, ordered descending by count
legend_labels = {entity: f"{entity} ({count:,})" for entity, count in entity_counts.items()}
df_top["entity_label"] = df_top["proposer_entity"].map(legend_labels)
label_order = [legend_labels[e] for e in top_entities]  # Descending order by count

# Density heatmap by entity (faceted)
x_max = df_top["corrected_first_seen_ms"].quantile(0.99)
y_max = df_top["compressed_kib"].quantile(0.99)

fig = px.density_heatmap(
    df_top,
    x="corrected_first_seen_ms",
    y="compressed_kib",
    facet_col="entity_label",
    facet_col_wrap=5,
    facet_col_spacing=0.04,
    facet_row_spacing=0.08,
    category_orders={"entity_label": label_order},
    nbinsx=20,
    nbinsy=20,
    range_x=[0, x_max],
    range_y=[0, y_max],
    color_continuous_scale="Plasma",
)
fig.update_layout(
    margin=dict(l=60, r=30, t=40, b=60),
    height=500,
    coloraxis_colorbar=dict(title="Count"),
)
fig.for_each_annotation(lambda a: a.update(
    text=a.text.replace("entity_label=", ""),
    font_size=10,
))
fig.for_each_xaxis(lambda x: x.update(title=""))
fig.for_each_yaxis(lambda y: y.update(title=""))
fig.add_annotation(
    text="Corrected first seen (ms)",
    xref="paper", yref="paper",
    x=0.5, y=-0.08,
    showarrow=False,
    font_size=12,
)
fig.add_annotation(
    text="Wire size (KiB)",
    xref="paper", yref="paper",
    x=-0.04, y=0.5,
    showarrow=False,
    font_size=12,
    textangle=-90,
)
fig.show(config={"responsive": True})

Anomaly detection¶

The following charts help identify blocks that propagated slower than expected given their size, using corrected timing.

Corrected first seen residuals¶

Residual = actual corrected first seen - expected based on block size. Positive residuals indicate blocks that were slower than expected for their size. The regression line is fit per builder category.

Show code

# Fit regression per builder category and compute residuals
df["expected_corrected_first_seen"] = np.nan
df["residual_ms"] = np.nan

for cat in CATEGORY_ORDER:
    mask = df["builder_category"] == cat
    subset = df[mask]
    if len(subset) > 10:
        slope, intercept, _, _, _ = stats.linregress(
            subset["compressed_kib"], subset["corrected_first_seen_ms"]
        )
        df.loc[mask, "expected_corrected_first_seen"] = slope * df.loc[mask, "compressed_kib"] + intercept
        df.loc[mask, "residual_ms"] = df.loc[mask, "corrected_first_seen_ms"] - df.loc[mask, "expected_corrected_first_seen"]

# Filter to categories we want to plot and sort for rendering order (Local on top)
df_plot = df[df["builder_category"].isin(CATEGORY_ORDER)].copy()
render_order = {"MEV (with bid timing)": 0, "Local": 1}
df_sorted = df_plot.sort_values("builder_category", key=lambda x: x.map(render_order))

fig = px.scatter(
    df_sorted,
    x="compressed_kib",
    y="residual_ms",
    color="builder_category",
    category_orders={"builder_category": CATEGORY_ORDER},
    color_discrete_map=CATEGORY_COLORS,
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "corrected_first_seen_ms": ":.0f", "residual_ms": ":.0f"},
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", annotation_text="Expected")
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Block size on wire (KiB)"),
    yaxis=dict(title="Corrected first seen residual (ms)"),
    legend_title="Builder category",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Count outliers (considering only plotted categories)
df_filtered = df[df["builder_category"].isin(CATEGORY_ORDER)]
outlier_threshold = df_filtered["residual_ms"].quantile(0.95)
outliers = df_filtered[df_filtered["residual_ms"] > outlier_threshold]
print(f"Outlier threshold (P95): {outlier_threshold:.0f}ms")
print(f"Blocks above P95: {len(outliers):,} ({len(outliers)/len(df_filtered)*100:.1f}%)")

Outlier threshold (P95): 1013ms
Blocks above P95: 129 (5.0%)

Slow blocks (z-score > 2)¶

Blocks with corrected first seen timing more than 2 standard deviations above the mean for their size bucket. These are unusually slow relative to similar-sized blocks.

Show code

from IPython.display import HTML, display

# Calculate z-scores within each size bucket (using corrected timing)
df["zscore"] = df.groupby("size_bucket", observed=True)["corrected_first_seen_ms"].transform(
    lambda x: (x - x.mean()) / x.std()
)

# Get blocks with z-score > 2
slow_blocks = df[df["zscore"] > 2].sort_values("zscore", ascending=False).head(20)

if len(slow_blocks) > 0:
    rows = []
    for _, row in slow_blocks.iterrows():
        slot = int(row["slot"])
        lab_url = f"https://lab.ethpandaops.io/ethereum/slots/{slot}"
        rows.append(f"""
            <tr>
                <td><a href="{lab_url}" target="_blank">{slot:,}</a></td>
                <td>{row['builder_category']}</td>
                <td>{row['proposer_entity']}</td>
                <td>{row['compressed_kib']:.1f}</td>
                <td>{row['corrected_first_seen_ms']:.0f}</td>
                <td>{row['zscore']:.1f}\u03c3</td>
            </tr>
        """)

    html = f'''
    <style>
    .anomaly-table {{ border-collapse: collapse; font-family: monospace; font-size: 13px; width: 100%; }}
    .anomaly-table th {{ background: #2c3e50; color: white; padding: 8px; text-align: left; }}
    .anomaly-table td {{ padding: 6px 8px; border-bottom: 1px solid #eee; }}
    .anomaly-table tr:hover {{ background: #f5f5f5; }}
    .anomaly-table a {{ color: #3498db; text-decoration: none; }}
    .anomaly-table a:hover {{ text-decoration: underline; }}
    </style>
    <table class="anomaly-table">
    <thead>
        <tr><th>Slot</th><th>Builder</th><th>Proposer</th><th>Size (KiB)</th><th>Corrected first seen (ms)</th><th>Z-score</th></tr>
    </thead>
    <tbody>
        {"".join(rows)}
    </tbody>
    </table>
    '''
    display(HTML(html))
    print(f"\nTotal blocks with z-score > 2: {len(df[df['zscore'] > 2]):,}")
else:
    print("No blocks with z-score > 2 found.")

Slot	Builder	Proposer	Size (KiB)	Corrected first seen (ms)	Z-score
14,010,465	Local	blockdaemon	36.4	5575	5.9σ
14,008,097	MEV (with bid timing)		72.8	4138	3.6σ
14,008,320	MEV (no bid timing)	figment	30.6	3741	3.3σ
14,011,790	MEV (with bid timing)	kraken	84.8	3812	3.2σ
14,009,319	MEV (no bid timing)	blockdaemon_lido	52.4	3753	3.1σ
14,009,184	MEV (no bid timing)	kraken	15.3	3562	3.1σ
14,008,466	MEV (with bid timing)	upbit	41.8	3451	2.9σ
14,009,169	MEV (no bid timing)	blockdaemon_lido	99.7	3610	2.9σ
14,012,611	MEV (no bid timing)	nethermind_lido	70.7	3607	2.9σ
14,007,838	MEV (no bid timing)	p2porg	186.8	3391	2.9σ
14,008,467	Local	kraken	74.1	3604	2.9σ
14,010,104	MEV (no bid timing)	nethermind_lido	38.0	3428	2.9σ
14,011,371	MEV (no bid timing)	blockdaemon	117.7	3513	2.9σ
14,009,458	MEV (no bid timing)	whale_0x8ebd	77.8	3549	2.8σ
14,008,829	MEV (no bid timing)		159.1	3334	2.8σ
14,007,644	MEV (no bid timing)	nethermind_lido	25.3	3381	2.8σ
14,011,956	MEV (no bid timing)	p2porg	91.5	3536	2.8σ
14,009,039	MEV (no bid timing)	blockdaemon	117.9	3483	2.8σ
14,008,338	MEV (no bid timing)	blockdaemon	154.3	3321	2.8σ
14,009,397	MEV (no bid timing)	nethermind_lido	77.8	3529	2.8σ

Total blocks with z-score > 2: 259

Propagation spread outliers¶

Blocks that were both slow to arrive (high corrected first seen) AND slow to spread across sentries (high propagation spread). The top-right quadrant shows the worst-performing blocks.

Show code

# Calculate percentile thresholds
first_seen_p90 = df["corrected_first_seen_ms"].quantile(0.90)
spread_p90 = df["spread_ms"].quantile(0.90)

# Mark outliers (both metrics above P90)
df["is_double_outlier"] = (df["corrected_first_seen_ms"] > first_seen_p90) & (df["spread_ms"] > spread_p90)

fig = px.scatter(
    df,
    x="corrected_first_seen_ms",
    y="spread_ms",
    color="size_bucket",
    category_orders={"size_bucket": SIZE_ORDER},
    opacity=0.5,
    hover_data={"slot": True, "proposer_entity": True, "builder_category": True, "compressed_kib": ":.1f"},
)

# Add quadrant lines
fig.add_vline(x=first_seen_p90, line_dash="dot", line_color="red", 
              annotation_text=f"P90: {first_seen_p90:.0f}ms", annotation_position="top")
fig.add_hline(y=spread_p90, line_dash="dot", line_color="red",
              annotation_text=f"P90: {spread_p90:.0f}ms")

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Corrected first seen (ms)"),
    yaxis=dict(title="Propagation spread (ms)"),
    legend_title="Size bucket",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Count double outliers
double_outliers = df[df["is_double_outlier"]]
print(f"Blocks in top-right quadrant (both > P90): {len(double_outliers):,} ({len(double_outliers)/len(df)*100:.1f}%)")
for cat in CATEGORY_ORDER:
    count = (double_outliers["builder_category"] == cat).sum()
    print(f"  {cat}: {count:,}")

Blocks in top-right quadrant (both > P90): 67 (1.2%)
  Local: 3
  MEV (with bid timing): 6

Entity anomaly rate¶

Percentage of each proposer entity's blocks that have corrected first seen > P95. Entities with high anomaly rates may have connectivity or configuration issues.

Show code

# Calculate P95 threshold using corrected timing
p95_threshold = df["corrected_first_seen_ms"].quantile(0.95)
df["is_slow"] = df["corrected_first_seen_ms"] > p95_threshold

# Aggregate by entity
entity_anomaly = df.groupby("proposer_entity").agg(
    block_count=("slot", "count"),
    slow_count=("is_slow", "sum"),
    median_corrected=("corrected_first_seen_ms", "median"),
).reset_index()
entity_anomaly["anomaly_rate"] = entity_anomaly["slow_count"] / entity_anomaly["block_count"] * 100

# Filter to entities with 20+ blocks and sort by anomaly rate
entity_anomaly = entity_anomaly[entity_anomaly["block_count"] >= 20]
top_anomaly = entity_anomaly.nlargest(15, "anomaly_rate")

fig = go.Figure()

fig.add_trace(go.Bar(
    y=top_anomaly["proposer_entity"],
    x=top_anomaly["anomaly_rate"],
    orientation="h",
    marker_color="#e74c3c",
    text=top_anomaly.apply(lambda r: f"{r['slow_count']:.0f}/{r['block_count']:.0f}", axis=1),
    textposition="outside",
    hovertemplate="<b>%{y}</b><br>Anomaly rate: %{x:.1f}%<br>Slow blocks: %{text}<extra></extra>",
))

# Add expected rate line (5% by definition of P95)
fig.add_vline(x=5, line_dash="dash", line_color="gray", annotation_text="Expected (5%)")

fig.update_layout(
    margin=dict(l=150, r=80, t=30, b=60),
    xaxis=dict(title="% of blocks with corrected first seen > P95", range=[0, max(top_anomaly["anomaly_rate"]) * 1.2]),
    yaxis=dict(title="", categoryorder="total ascending"),
    height=500,
)
fig.show(config={"responsive": True})

print(f"P95 threshold: {p95_threshold:.0f}ms")
print(f"Entities shown: {len(top_anomaly)} (with 20+ blocks, sorted by anomaly rate)")

P95 threshold: 2835ms
Entities shown: 15 (with 20+ blocks, sorted by anomaly rate)