Fri, May 8, 2026 Latest

MEV pipeline

Analysis of MEV pipeline timing and its effect on block propagation on Ethereum mainnet.

View query
WITH
-- Base slots using proposer duty as the source of truth
slots AS (
    SELECT DISTINCT
        slot,
        slot_start_date_time,
        proposer_validator_index
    FROM canonical_beacon_proposer_duty
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
),

-- Proposer entity mapping
proposer_entity AS (
    SELECT
        index,
        entity
    FROM ethseer_validator_entity
    WHERE meta_network_name = 'mainnet'
),

-- Blob count per slot
blob_count AS (
    SELECT
        slot,
        uniq(blob_index) AS blob_count
    FROM canonical_beacon_blob_sidecar
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
    GROUP BY slot
),

-- Canonical block hash (to verify MEV payload was actually used)
canonical_block AS (
    SELECT DISTINCT
        slot,
        execution_payload_block_hash
    FROM canonical_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
),

-- MEV bid timing using timestamp_ms
mev_bids AS (
    SELECT
        slot,
        slot_start_date_time,
        min(timestamp_ms) AS first_bid_timestamp_ms,
        max(timestamp_ms) AS last_bid_timestamp_ms
    FROM mev_relay_bid_trace
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
    GROUP BY slot, slot_start_date_time
),

-- MEV payload delivery - join canonical block with delivered payloads
-- Note: Use is_mev flag because ClickHouse LEFT JOIN returns 0 (not NULL) for non-matching rows
-- Get value from proposer_payload_delivered (not bid_trace, which may not have the winning block)
mev_payload AS (
    SELECT
        cb.slot,
        cb.execution_payload_block_hash AS winning_block_hash,
        1 AS is_mev,
        max(pd.value) AS winning_bid_value,
        groupArray(DISTINCT pd.relay_name) AS relay_names,
        any(pd.builder_pubkey) AS winning_builder
    FROM canonical_block cb
    GLOBAL INNER JOIN mev_relay_proposer_payload_delivered pd
        ON cb.slot = pd.slot AND cb.execution_payload_block_hash = pd.block_hash
    WHERE pd.meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
    GROUP BY cb.slot, cb.execution_payload_block_hash
),

-- Winning bid timing from bid_trace (may not exist for all MEV blocks)
winning_bid AS (
    SELECT
        bt.slot,
        bt.slot_start_date_time,
        argMin(bt.timestamp_ms, bt.event_date_time) AS winning_bid_timestamp_ms
    FROM mev_relay_bid_trace bt
    GLOBAL INNER JOIN mev_payload mp ON bt.slot = mp.slot AND bt.block_hash = mp.winning_block_hash
    WHERE bt.meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
    GROUP BY bt.slot, bt.slot_start_date_time
),

-- Block gossip timing with spread
block_gossip AS (
    SELECT
        slot,
        min(event_date_time) AS block_first_seen,
        max(event_date_time) AS block_last_seen
    FROM libp2p_gossipsub_beacon_block
    WHERE meta_network_name = 'mainnet'
      AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
    GROUP BY slot
),

-- Column arrival timing: first arrival per column, then min/max of those
column_gossip AS (
    SELECT
        slot,
        min(first_seen) AS first_column_first_seen,
        max(first_seen) AS last_column_first_seen
    FROM (
        SELECT
            slot,
            column_index,
            min(event_date_time) AS first_seen
        FROM libp2p_gossipsub_data_column_sidecar
        WHERE meta_network_name = 'mainnet'
          AND slot_start_date_time >= '2026-05-08' AND slot_start_date_time < '2026-05-08'::date + INTERVAL 1 DAY
          AND event_date_time > '1970-01-01 00:00:01'
        GROUP BY slot, column_index
    )
    GROUP BY slot
)

SELECT
    s.slot AS slot,
    s.slot_start_date_time AS slot_start_date_time,
    pe.entity AS proposer_entity,

    -- Blob count
    coalesce(bc.blob_count, 0) AS blob_count,

    -- MEV bid timing (absolute and relative to slot start)
    fromUnixTimestamp64Milli(mb.first_bid_timestamp_ms) AS first_bid_at,
    mb.first_bid_timestamp_ms - toInt64(toUnixTimestamp(mb.slot_start_date_time)) * 1000 AS first_bid_ms,
    fromUnixTimestamp64Milli(mb.last_bid_timestamp_ms) AS last_bid_at,
    mb.last_bid_timestamp_ms - toInt64(toUnixTimestamp(mb.slot_start_date_time)) * 1000 AS last_bid_ms,

    -- Winning bid timing (from bid_trace, may be NULL if block hash not in bid_trace)
    if(wb.slot != 0, fromUnixTimestamp64Milli(wb.winning_bid_timestamp_ms), NULL) AS winning_bid_at,
    if(wb.slot != 0, wb.winning_bid_timestamp_ms - toInt64(toUnixTimestamp(s.slot_start_date_time)) * 1000, NULL) AS winning_bid_ms,

    -- MEV payload info (from proposer_payload_delivered, always present for MEV blocks)
    if(mp.is_mev = 1, mp.winning_bid_value, NULL) AS winning_bid_value,
    if(mp.is_mev = 1, mp.relay_names, []) AS winning_relays,
    if(mp.is_mev = 1, mp.winning_builder, NULL) AS winning_builder,

    -- Block gossip timing with spread
    bg.block_first_seen,
    dateDiff('millisecond', s.slot_start_date_time, bg.block_first_seen) AS block_first_seen_ms,
    bg.block_last_seen,
    dateDiff('millisecond', s.slot_start_date_time, bg.block_last_seen) AS block_last_seen_ms,
    dateDiff('millisecond', bg.block_first_seen, bg.block_last_seen) AS block_spread_ms,

    -- Column arrival timing (NULL when no blobs)
    if(coalesce(bc.blob_count, 0) = 0, NULL, cg.first_column_first_seen) AS first_column_first_seen,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', s.slot_start_date_time, cg.first_column_first_seen)) AS first_column_first_seen_ms,
    if(coalesce(bc.blob_count, 0) = 0, NULL, cg.last_column_first_seen) AS last_column_first_seen,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', s.slot_start_date_time, cg.last_column_first_seen)) AS last_column_first_seen_ms,
    if(coalesce(bc.blob_count, 0) = 0, NULL, dateDiff('millisecond', cg.first_column_first_seen, cg.last_column_first_seen)) AS column_spread_ms

FROM slots s
GLOBAL LEFT JOIN proposer_entity pe ON s.proposer_validator_index = pe.index
GLOBAL LEFT JOIN blob_count bc ON s.slot = bc.slot
GLOBAL LEFT JOIN mev_bids mb ON s.slot = mb.slot
GLOBAL LEFT JOIN mev_payload mp ON s.slot = mp.slot
GLOBAL LEFT JOIN winning_bid wb ON s.slot = wb.slot
GLOBAL LEFT JOIN block_gossip bg ON s.slot = bg.slot
GLOBAL LEFT JOIN column_gossip cg ON s.slot = cg.slot

ORDER BY s.slot DESC
Show code
df = load_parquet("block_production_timeline", target_date)

# Flag MEV vs local blocks
df["has_mev"] = df["winning_bid_value"].notna()
df["block_type"] = df["has_mev"].map({True: "MEV", False: "Local"})

print(f"Total slots: {len(df):,}")
print(f"MEV blocks: {df['has_mev'].sum():,} ({df['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df['has_mev']).sum():,} ({(~df['has_mev']).mean()*100:.1f}%)")
Total slots: 7,203
MEV blocks: 6,786 (94.2%)
Local blocks: 417 (5.8%)

Bid trace coverage

MEV block data comes from two sources with different coverage:

  • Payload delivered (mev_relay_proposer_payload_delivered): Records when a relay delivers a block to a proposer. Has value, builder, and relay info for all MEV blocks.
  • Bid trace (mev_relay_bid_trace): Records individual bids during the auction. Has bid timing but may not include the winning block if it was a late bid or data gap.

The chart below shows what proportion of each relay's blocks have bid timing data available.

Show code
# Bid trace coverage analysis
df_trace = df[df["has_mev"]].copy()
df_trace["relay"] = df_trace["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Unknown")
df_trace["has_bid_timing"] = df_trace["winning_bid_ms"].notna()

# Aggregate by relay
relay_coverage = df_trace.groupby("relay").agg(
    total=("slot", "count"),
    with_timing=("has_bid_timing", "sum"),
).reset_index()
relay_coverage["without_timing"] = relay_coverage["total"] - relay_coverage["with_timing"]
relay_coverage["pct_with_timing"] = (relay_coverage["with_timing"] / relay_coverage["total"] * 100).round(1)
relay_coverage = relay_coverage.sort_values("total", ascending=True)

# Summary stats
total_mev = relay_coverage["total"].sum()
total_with_timing = relay_coverage["with_timing"].sum()
print(f"MEV blocks: {total_mev:,}")
print(f"With bid timing: {total_with_timing:,} ({total_with_timing/total_mev*100:.1f}%)")
print(f"Without bid timing: {total_mev - total_with_timing:,} ({(total_mev - total_with_timing)/total_mev*100:.1f}%)")
MEV blocks: 6,786
With bid timing: 3,397 (50.1%)
Without bid timing: 3,389 (49.9%)
Show code
# Stacked horizontal bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    y=relay_coverage["relay"],
    x=relay_coverage["with_timing"],
    name="With bid timing",
    orientation="h",
    marker_color="#2ecc71",
    text=relay_coverage.apply(lambda r: f"{r['pct_with_timing']:.0f}%" if r['with_timing'] > 0 else "", axis=1),
    textposition="inside",
    hovertemplate="<b>%{y}</b><br>With timing: %{x:,}<extra></extra>",
))

fig.add_trace(go.Bar(
    y=relay_coverage["relay"],
    x=relay_coverage["without_timing"],
    name="Without bid timing",
    orientation="h",
    marker_color="#e74c3c",
    hovertemplate="<b>%{y}</b><br>Without timing: %{x:,}<extra></extra>",
))

fig.update_layout(
    barmode="stack",
    margin=dict(l=150, r=30, t=30, b=60),
    xaxis=dict(title="Number of blocks"),
    yaxis=dict(title=""),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=350,
)
fig.show(config={"responsive": True})

MEV pipeline and blob count effects

How do blob count and MEV pipeline characteristics interact to affect block propagation? All scatter plots use blob count as the color dimension.

Show code
# Prepare data for MEV analysis
df_mev = df.copy()

# Filter out missed slots (block never produced - shows as invalid timestamps)
# A valid block_first_seen_ms should be positive and reasonable (< 60 seconds)
df_mev = df_mev[df_mev["block_first_seen_ms"].notna()]
df_mev = df_mev[(df_mev["block_first_seen_ms"] >= 0) & (df_mev["block_first_seen_ms"] < 60000)]

# Flag MEV vs local blocks
df_mev["has_mev"] = df_mev["winning_bid_value"].notna()
df_mev["block_type"] = df_mev["has_mev"].map({True: "MEV", False: "Local"})

# Calculate bidding window duration
df_mev["bidding_duration_ms"] = df_mev["last_bid_ms"] - df_mev["first_bid_ms"]

# Calculate block to column delay (for slots with blobs)
df_mev["block_to_column_ms"] = df_mev["first_column_first_seen_ms"] - df_mev["block_first_seen_ms"]

# Calculate bid to block delay (time from winning bid to block appearing)
df_mev["bid_to_block_ms"] = df_mev["block_first_seen_ms"] - df_mev["winning_bid_ms"]

# Dynamic blob count bins based on actual data
max_blobs = df_mev["blob_count"].max()
bin_size = 3
# Create bins: [-1, 0, 3, 6, 9, 12, 15, ...] to match 0, 1-3, 4-6, etc.
bins = [-1, 0] + list(range(bin_size, max_blobs + bin_size, bin_size))
if bins[-1] < max_blobs:
    bins.append(((max_blobs // bin_size) + 1) * bin_size)
labels = ["0"] + [f"{bins[i]+1}-{bins[i+1]}" for i in range(1, len(bins)-1)]
df_mev["blob_bin"] = pd.cut(df_mev["blob_count"], bins=bins, labels=labels)
BLOB_BIN_ORDER = labels  # Store for use in charts

# Generate Plasma-based discrete colors, truncated to avoid light yellow (poor contrast)
# Sample from 0.0 to 0.70 of the Plasma scale (more aggressive truncation)
PLASMA_MAX = 0.70
sample_points = [i / (len(labels) - 1) * PLASMA_MAX for i in range(len(labels))]
BLOB_COLORS = dict(zip(labels, px.colors.sample_colorscale("Plasma", sample_points)))

# Create truncated Plasma colorscale for continuous use (avoids light yellow)
PLASMA_TRUNCATED = px.colors.sample_colorscale("Plasma", [i/10 * PLASMA_MAX for i in range(11)])

# MEV-only subset for MEV timing plots
df_mev_only = df_mev[df_mev["has_mev"]].copy()

print(f"Total slots in data: {len(df):,}")
print(f"Slots with valid blocks: {len(df_mev):,} ({len(df_mev)/len(df)*100:.1f}%)")
print(f"MEV blocks: {df_mev['has_mev'].sum():,} ({df_mev['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df_mev['has_mev']).sum():,} ({(~df_mev['has_mev']).mean()*100:.1f}%)")
print(f"Max blob count: {max_blobs}, bins: {labels}")
Total slots in data: 7,203
Slots with valid blocks: 7,196 (99.9%)
MEV blocks: 6,786 (94.3%)
Local blocks: 410 (5.7%)
Max blob count: 21, bins: ['0', '1-3', '4-6', '7-9', '10-12', '13-15', '16-18', '19-21']

Winning bid timing vs block arrival

Does late bidding combined with high blob count delay block propagation?

Show code
if len(df_mev_only) > 0:
    # Extract first relay from array for display
    df_plot = df_mev_only.dropna(subset=["winning_bid_ms", "block_first_seen_ms"]).copy()
    df_plot["relay"] = df_plot["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
    df_plot["blob_count_f"] = df_plot["blob_count"].astype(float)  # Force continuous color

    fig = px.scatter(
        df_plot,
        x="winning_bid_ms",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid timing (ms from slot start)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})
else:
    print("No MEV data available.")