Block propagation
Analysis of block propagation timing relative to block size on the wire, with corrected timing that isolates network propagation from block building overhead.
Terminology:
- First seen (raw): Time from slot start until the first sentry observes the block. Includes block building time + network latency.
- Winning bid: Time when the MEV relay received the winning bid for the block. Marks when the block was "ready" to broadcast.
- First seen (corrected): For MEV blocks with bid timing:
first_seen - winning_bid. Isolates network propagation time. - Propagation spread: Time between when the first sentry saw the block and when the last sentry saw it.
- Wire size: Block size after Snappy compression, as transmitted over libp2p gossipsub.
Show code
# This notebook joins two data sources:
# 1. block_propagation_by_size: block sizes and propagation timing
# 2. block_production_timeline: MEV winning bid timing
display_sql("block_propagation_by_size", target_date)
View query
Show code
# Load both datasets and join on slot
df_size = load_parquet("block_propagation_by_size", target_date)
df_timeline = load_parquet("block_production_timeline", target_date)
# Join: size data + winning bid timing from timeline
df = df_size.merge(
df_timeline[["slot", "winning_bid_ms"]],
on="slot",
how="left"
)
# Add derived columns
df["spread_ms"] = df["last_seen_ms"] - df["first_seen_ms"]
df["compression_ratio"] = df["uncompressed_bytes"] / df["compressed_bytes"]
df["compressed_kib"] = df["compressed_bytes"] / 1024
df["uncompressed_kib"] = df["uncompressed_bytes"] / 1024
# Corrected first seen: subtract winning bid time for MEV blocks with bid timing
df["corrected_first_seen_ms"] = np.where(
(df["builder_type"] == "MEV") & df["winning_bid_ms"].notna(),
df["first_seen_ms"] - df["winning_bid_ms"],
df["first_seen_ms"]
)
# Size buckets for binning (in KiB)
df["size_bucket"] = pd.cut(
df["compressed_kib"],
bins=[0, 50, 100, 150, float("inf")],
labels=["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
)
SIZE_ORDER = ["< 50 KiB", "50-100 KiB", "100-150 KiB", ">= 150 KiB"]
# Builder category with 3 levels
def categorize_builder(row):
if row["builder_type"] == "Local":
return "Local"
elif pd.notna(row["winning_bid_ms"]):
return "MEV (with bid timing)"
else:
return "MEV (no bid timing)"
df["builder_category"] = df.apply(categorize_builder, axis=1)
# Category ordering and colors (excluding "MEV (no bid timing)" from plots)
CATEGORY_ORDER = ["Local", "MEV (with bid timing)"]
CATEGORY_COLORS = {
"Local": "#3498db",
"MEV (with bid timing)": "#9b59b6",
}
# Summary
print(f"Total blocks: {len(df):,}")
for cat in ["Local", "MEV (with bid timing)"]:
count = (df["builder_category"] == cat).sum()
pct = count / len(df) * 100
print(f" {cat}: {count:,} ({pct:.1f}%)")
# Info: MEV blocks without bid timing (excluded from category-based plots)
mev_no_bid = (df["builder_category"] == "MEV (no bid timing)").sum()
if mev_no_bid > 0:
print(f"\nNote: {mev_no_bid:,} MEV blocks ({mev_no_bid/len(df)*100:.1f}%) lack bid timing data and are excluded from builder category comparisons.")
Size distribution by builder type¶
Histogram comparing the block size distribution between MEV and local blocks. MEV blocks tend to be larger due to MEV extraction strategies.
Show code
fig = px.histogram(
df,
x="compressed_kib",
color="builder_type",
color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
nbins=50,
barmode="overlay",
opacity=0.7,
)
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Block size on wire (KiB)"),
yaxis=dict(title="Block count"),
legend_title="Builder type",
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=400,
)
fig.show(config={"responsive": True})
# Summary
for bt in ["Local", "MEV"]:
subset = df[df["builder_type"] == bt]
print(f"{bt}: median size {subset['compressed_kib'].median():.1f} KiB, "
f"mean {subset['compressed_kib'].mean():.1f} KiB")
Compression ratio¶
Scatter plot showing the relationship between uncompressed SSZ block size and compressed (snappy) wire size. The dashed line shows the linear regression; the dotted line shows 1:1 (no compression).
Show code
fig = go.Figure()
# Scatter points colored by builder type
for bt, color in [("Local", "#3498db"), ("MEV", "#9b59b6")]:
subset = df[df["builder_type"] == bt]
fig.add_trace(go.Scatter(
x=subset["uncompressed_kib"],
y=subset["compressed_kib"],
mode="markers",
name=bt,
marker=dict(color=color, opacity=0.4, size=5),
hovertemplate="<b>Slot %{text}</b><br>Uncompressed: %{x:.1f} KiB<br>Compressed: %{y:.1f} KiB<extra></extra>",
text=subset["slot"],
))
# Regression line (all data)
slope, intercept, r_value, p_value, std_err = stats.linregress(
df["uncompressed_kib"], df["compressed_kib"]
)
x_range = np.array([df["uncompressed_kib"].min(), df["uncompressed_kib"].max()])
y_pred = slope * x_range + intercept
fig.add_trace(go.Scatter(
x=x_range,
y=y_pred,
mode="lines",
name=f"Regression (R\u00b2={r_value**2:.3f})",
line=dict(color="#2ecc71", width=2, dash="dash"),
))
# 1:1 reference line (no compression)
fig.add_trace(go.Scatter(
x=x_range,
y=x_range,
mode="lines",
name="1:1 (no compression)",
line=dict(color="gray", width=1, dash="dot"),
))
fig.update_layout(
margin=dict(l=60, r=30, t=30, b=60),
xaxis=dict(title="Uncompressed block size (KiB)"),
yaxis=dict(title="Compressed block size (KiB, on wire)"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
height=500,
)
fig.show(config={"responsive": True})
# Print compression stats
print(f"Compression ratio: mean {df['compression_ratio'].mean():.2f}x, "
f"median {df['compression_ratio'].median():.2f}x")
print(f"Regression: compressed = {slope:.3f} \u00d7 uncompressed + {intercept:.1f}")
print(f"R\u00b2 = {r_value**2:.4f}")