MEV pipeline
Analysis of MEV pipeline timing and its effect on block propagation on Ethereum mainnet.
View query
Show code
df = load_parquet("block_production_timeline", target_date)
# Flag MEV vs local blocks
df["has_mev"] = df["winning_bid_value"].notna()
df["block_type"] = df["has_mev"].map({True: "MEV", False: "Local"})
print(f"Total slots: {len(df):,}")
print(f"MEV blocks: {df['has_mev'].sum():,} ({df['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df['has_mev']).sum():,} ({(~df['has_mev']).mean()*100:.1f}%)")
Bid trace coverage¶
MEV block data comes from two sources with different coverage:
- Payload delivered (
mev_relay_proposer_payload_delivered): Records when a relay delivers a block to a proposer. Has value, builder, and relay info for all MEV blocks. - Bid trace (
mev_relay_bid_trace): Records individual bids during the auction. Has bid timing but may not include the winning block if it was a late bid or data gap.
The chart below shows what proportion of each relay's blocks have bid timing data available.
Show code
# Bid trace coverage analysis
df_trace = df[df["has_mev"]].copy()
df_trace["relay"] = df_trace["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else "Unknown")
df_trace["has_bid_timing"] = df_trace["winning_bid_ms"].notna()
# Aggregate by relay
relay_coverage = df_trace.groupby("relay").agg(
total=("slot", "count"),
with_timing=("has_bid_timing", "sum"),
).reset_index()
relay_coverage["without_timing"] = relay_coverage["total"] - relay_coverage["with_timing"]
relay_coverage["pct_with_timing"] = (relay_coverage["with_timing"] / relay_coverage["total"] * 100).round(1)
relay_coverage = relay_coverage.sort_values("total", ascending=True)
# Summary stats
total_mev = relay_coverage["total"].sum()
total_with_timing = relay_coverage["with_timing"].sum()
print(f"MEV blocks: {total_mev:,}")
print(f"With bid timing: {total_with_timing:,} ({total_with_timing/total_mev*100:.1f}%)")
print(f"Without bid timing: {total_mev - total_with_timing:,} ({(total_mev - total_with_timing)/total_mev*100:.1f}%)")
Show code
# Stacked horizontal bar chart
fig = go.Figure()
fig.add_trace(go.Bar(
y=relay_coverage["relay"],
x=relay_coverage["with_timing"],
name="With bid timing",
orientation="h",
marker_color="#2ecc71",
text=relay_coverage.apply(lambda r: f"{r['pct_with_timing']:.0f}%" if r['with_timing'] > 0 else "", axis=1),
textposition="inside",
hovertemplate="<b>%{y}</b><br>With timing: %{x:,}<extra></extra>",
))
fig.add_trace(go.Bar(
y=relay_coverage["relay"],
x=relay_coverage["without_timing"],
name="Without bid timing",
orientation="h",
marker_color="#e74c3c",
hovertemplate="<b>%{y}</b><br>Without timing: %{x:,}<extra></extra>",
))
fig.update_layout(
barmode="stack",
margin=dict(l=150, r=30, t=30, b=60),
xaxis=dict(title="Number of blocks"),
yaxis=dict(title=""),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=350,
)
fig.show(config={"responsive": True})
MEV pipeline and blob count effects¶
How do blob count and MEV pipeline characteristics interact to affect block propagation? All scatter plots use blob count as the color dimension.
Show code
# Prepare data for MEV analysis
df_mev = df.copy()
# Filter out missed slots (block never produced - shows as invalid timestamps)
# A valid block_first_seen_ms should be positive and reasonable (< 60 seconds)
df_mev = df_mev[df_mev["block_first_seen_ms"].notna()]
df_mev = df_mev[(df_mev["block_first_seen_ms"] >= 0) & (df_mev["block_first_seen_ms"] < 60000)]
# Flag MEV vs local blocks
df_mev["has_mev"] = df_mev["winning_bid_value"].notna()
df_mev["block_type"] = df_mev["has_mev"].map({True: "MEV", False: "Local"})
# Calculate bidding window duration
df_mev["bidding_duration_ms"] = df_mev["last_bid_ms"] - df_mev["first_bid_ms"]
# Calculate block to column delay (for slots with blobs)
df_mev["block_to_column_ms"] = df_mev["first_column_first_seen_ms"] - df_mev["block_first_seen_ms"]
# Calculate bid to block delay (time from winning bid to block appearing)
df_mev["bid_to_block_ms"] = df_mev["block_first_seen_ms"] - df_mev["winning_bid_ms"]
# Dynamic blob count bins based on actual data
max_blobs = df_mev["blob_count"].max()
bin_size = 3
# Create bins: [-1, 0, 3, 6, 9, 12, 15, ...] to match 0, 1-3, 4-6, etc.
bins = [-1, 0] + list(range(bin_size, max_blobs + bin_size, bin_size))
if bins[-1] < max_blobs:
bins.append(((max_blobs // bin_size) + 1) * bin_size)
labels = ["0"] + [f"{bins[i]+1}-{bins[i+1]}" for i in range(1, len(bins)-1)]
df_mev["blob_bin"] = pd.cut(df_mev["blob_count"], bins=bins, labels=labels)
BLOB_BIN_ORDER = labels # Store for use in charts
# Generate Plasma-based discrete colors, truncated to avoid light yellow (poor contrast)
# Sample from 0.0 to 0.70 of the Plasma scale (more aggressive truncation)
PLASMA_MAX = 0.70
sample_points = [i / (len(labels) - 1) * PLASMA_MAX for i in range(len(labels))]
BLOB_COLORS = dict(zip(labels, px.colors.sample_colorscale("Plasma", sample_points)))
# Create truncated Plasma colorscale for continuous use (avoids light yellow)
PLASMA_TRUNCATED = px.colors.sample_colorscale("Plasma", [i/10 * PLASMA_MAX for i in range(11)])
# MEV-only subset for MEV timing plots
df_mev_only = df_mev[df_mev["has_mev"]].copy()
print(f"Total slots in data: {len(df):,}")
print(f"Slots with valid blocks: {len(df_mev):,} ({len(df_mev)/len(df)*100:.1f}%)")
print(f"MEV blocks: {df_mev['has_mev'].sum():,} ({df_mev['has_mev'].mean()*100:.1f}%)")
print(f"Local blocks: {(~df_mev['has_mev']).sum():,} ({(~df_mev['has_mev']).mean()*100:.1f}%)")
print(f"Max blob count: {max_blobs}, bins: {labels}")
Winning bid timing vs block arrival¶
Does late bidding combined with high blob count delay block propagation?
Show code
if len(df_mev_only) > 0:
# Extract first relay from array for display
df_plot = df_mev_only.dropna(subset=["winning_bid_ms", "block_first_seen_ms"]).copy()
df_plot["relay"] = df_plot["winning_relays"].apply(lambda x: x[0] if len(x) > 0 else None)
df_plot["blob_count_f"] = df_plot["blob_count"].astype(float) # Force continuous color
fig = px.scatter(
df_plot,
x="winning_bid_ms",
y="block_first_seen_ms",
color="blob_count_f",
color_continuous_scale=PLASMA_TRUNCATED,
range_color=[0, max_blobs],
opacity=0.6,
hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Winning bid timing (ms from slot start)"),
yaxis=dict(title="Block first seen (ms from slot start)"),
coloraxis_colorbar=dict(title="Blobs"),
height=450,
)
fig.show(config={"responsive": True})
else:
print("No MEV data available.")