Fri, May 8, 2026 Latest

Block propagation

Analysis of block propagation timing relative to block size on the wire, with corrected timing that isolates network propagation from block building overhead.

Terminology:

  • First seen (raw): Time from slot start until the first sentry observes the block. Includes block building time + network latency.
  • Winning bid: Time when the MEV relay received the winning bid for the block. Marks when the block was "ready" to broadcast.
  • First seen (corrected): For MEV blocks with bid timing: first_seen - winning_bid. Isolates network propagation time.
  • Propagation spread: Time between when the first sentry saw the block and when the last sentry saw it.
  • Wire size: Block size after Snappy compression, as transmitted over libp2p gossipsub.
Show code
# This notebook joins two data sources:
# 1. block_propagation_by_size: block sizes and propagation timing
# 2. block_production_timeline: MEV winning bid timing
display_sql("block_propagation_by_size", target_date)
View query
WITH
-- Get MEV slot list (slots with relay payload delivery)
mev_slots AS (
    SELECT DISTINCT slot
    FROM mev_relay_proposer_payload_delivered FINAL
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
),

-- Block metadata (size, proposer)
block_meta AS (
    SELECT DISTINCT
        slot,
        block_root AS block,
        proposer_index,
        block_total_bytes,
        block_total_bytes_compressed
    FROM canonical_beacon_block FINAL
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
),

-- Proposer entity mapping
proposer_entity AS (
    SELECT index, entity
    FROM ethseer_validator_entity FINAL
    WHERE meta_network_name = 'mainnet'
),

-- Propagation timing aggregated across all sentries
propagation AS (
    SELECT
        slot,
        block,
        min(propagation_slot_start_diff) AS first_seen_ms,
        max(propagation_slot_start_diff) AS last_seen_ms,
        quantile(0.5)(propagation_slot_start_diff) AS median_ms,
        count() AS sentry_count
    FROM libp2p_gossipsub_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
      AND propagation_slot_start_diff < 12000
    GROUP BY slot, block
)

SELECT
    p.slot AS slot,
    bm.block_total_bytes AS uncompressed_bytes,
    bm.block_total_bytes_compressed AS compressed_bytes,
    bm.proposer_index,
    coalesce(pe.entity, 'Unknown') AS proposer_entity,
    -- Use IN for reliable MEV detection on distributed tables
    if(p.slot GLOBAL IN mev_slots, 'MEV', 'Local') AS builder_type,
    p.first_seen_ms AS first_seen_ms,
    p.last_seen_ms AS last_seen_ms,
    p.median_ms AS median_ms,
    p.sentry_count AS sentry_count
FROM propagation p
GLOBAL LEFT JOIN block_meta bm ON p.slot = bm.slot AND p.block = bm.block
GLOBAL LEFT JOIN proposer_entity pe ON bm.proposer_index = pe.index
WHERE bm.block_total_bytes IS NOT NULL
ORDER BY p.slot
Show code
# Load both datasets and join on slot
df_size = load_parquet("block_propagation_by_size", target_date)
df_timeline = load_parquet("block_production_timeline", target_date)

# Join: size data + winning bid timing from timeline
df = df_size.merge(
    df_timeline[["slot", "winning_bid_ms"]],
    on="slot",
    how="left"
)

# Add derived columns
df["spread_ms"] = df["last_seen_ms"] - df["first_seen_ms"]
df["compression_ratio"] = df["uncompressed_bytes"] / df["compressed_bytes"]
df["compressed_kib"] = df["compressed_bytes"] / 1024
df["uncompressed_kib"] = df["uncompressed_bytes"] / 1024

# Corrected first seen: subtract winning bid time for MEV blocks with bid timing
df["corrected_first_seen_ms"] = np.where(
    (df["builder_type"] == "MEV") & df["winning_bid_ms"].notna(),
    df["first_seen_ms"] - df["winning_bid_ms"],
    df["first_seen_ms"]
)

# Size buckets for binning (in KiB)
df["size_bucket"] = pd.cut(
    df["compressed_kib"],
    bins=[0, 50, 100, 150, float("inf")],
    labels=["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
)
SIZE_ORDER = ["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]

# Builder category with 3 levels
def categorize_builder(row):
    if row["builder_type"] == "Local":
        return "Local"
    elif pd.notna(row["winning_bid_ms"]):
        return "MEV (with bid timing)"
    else:
        return "MEV (no bid timing)"

df["builder_category"] = df.apply(categorize_builder, axis=1)

# Category ordering and colors (excluding "MEV (no bid timing)" from plots)
CATEGORY_ORDER = ["Local", "MEV (with bid timing)"]
CATEGORY_COLORS = {
    "Local": "#3498db",
    "MEV (with bid timing)": "#9b59b6",
}

# Summary
print(f"Total blocks: {len(df):,}")
for cat in ["Local", "MEV (with bid timing)"]:
    count = (df["builder_category"] == cat).sum()
    pct = count / len(df) * 100
    print(f"  {cat}: {count:,} ({pct:.1f}%)")

# Info: MEV blocks without bid timing (excluded from category-based plots)
mev_no_bid = (df["builder_category"] == "MEV (no bid timing)").sum()
if mev_no_bid > 0:
    print(f"\nNote: {mev_no_bid:,} MEV blocks ({mev_no_bid/len(df)*100:.1f}%) lack bid timing data and are excluded from builder category comparisons.")
Total blocks: 7,190
  Local: 404 (5.6%)
  MEV (with bid timing): 3,397 (47.2%)

Note: 3,389 MEV blocks (47.1%) lack bid timing data and are excluded from builder category comparisons.

Size distribution by builder type

Histogram comparing the block size distribution between MEV and local blocks. MEV blocks tend to be larger due to MEV extraction strategies.

Show code
fig = px.histogram(
    df,
    x="compressed_kib",
    color="builder_type",
    color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    nbins=50,
    barmode="overlay",
    opacity=0.7,
)
fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Block size on wire (KiB)"),
    yaxis=dict(title="Block count"),
    legend_title="Builder type",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=400,
)
fig.show(config={"responsive": True})

# Summary
for bt in ["Local", "MEV"]:
    subset = df[df["builder_type"] == bt]
    print(f"{bt}: median size {subset['compressed_kib'].median():.1f} KiB, "
          f"mean {subset['compressed_kib'].mean():.1f} KiB")
Local: median size 32.2 KiB, mean 41.0 KiB
MEV: median size 64.8 KiB, mean 69.3 KiB

Compression ratio

Scatter plot showing the relationship between uncompressed SSZ block size and compressed (snappy) wire size. The dashed line shows the linear regression; the dotted line shows 1:1 (no compression).

Show code
fig = go.Figure()

# Scatter points colored by builder type
for bt, color in [("Local", "#3498db"), ("MEV", "#9b59b6")]:
    subset = df[df["builder_type"] == bt]
    fig.add_trace(go.Scatter(
        x=subset["uncompressed_kib"],
        y=subset["compressed_kib"],
        mode="markers",
        name=bt,
        marker=dict(color=color, opacity=0.4, size=5),
        hovertemplate="<b>Slot %{text}</b><br>Uncompressed: %{x:.1f} KiB<br>Compressed: %{y:.1f} KiB<extra></extra>",
        text=subset["slot"],
    ))

# Regression line (all data)
slope, intercept, r_value, p_value, std_err = stats.linregress(
    df["uncompressed_kib"], df["compressed_kib"]
)
x_range = np.array([df["uncompressed_kib"].min(), df["uncompressed_kib"].max()])
y_pred = slope * x_range + intercept

fig.add_trace(go.Scatter(
    x=x_range,
    y=y_pred,
    mode="lines",
    name=f"Regression (R\u00b2={r_value**2:.3f})",
    line=dict(color="#2ecc71", width=2, dash="dash"),
))

# 1:1 reference line (no compression)
fig.add_trace(go.Scatter(
    x=x_range,
    y=x_range,
    mode="lines",
    name="1:1 (no compression)",
    line=dict(color="gray", width=1, dash="dot"),
))

fig.update_layout(
    margin=dict(l=60, r=30, t=30, b=60),
    xaxis=dict(title="Uncompressed block size (KiB)"),
    yaxis=dict(title="Compressed block size (KiB, on wire)"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=500,
)
fig.show(config={"responsive": True})

# Print compression stats
print(f"Compression ratio: mean {df['compression_ratio'].mean():.2f}x, "
      f"median {df['compression_ratio'].median():.2f}x")
print(f"Regression: compressed = {slope:.3f} \u00d7 uncompressed + {intercept:.1f}")
print(f"R\u00b2 = {r_value**2:.4f}")