SDK Reference¶
PeerCache is usable both as a drop-in SGLang backend and as a library. This page documents the public Python API and the C++ data-plane binding.
peercache.store.PeerCacheStore¶
The SGLang HiCacheStorage backend. Normally instantiated by SGLang's dynamic
backend factory, but can be driven directly.
from types import SimpleNamespace
from peercache.store import PeerCacheStore
storage_config = SimpleNamespace(
tp_rank=0, tp_size=1, pp_rank=0, pp_size=1, is_mla_model=False,
extra_config={"discovery_addr": "10.0.0.1:31998", "protocol": "rdma",
"device_name": "mlx5_0", "global_segment_size": "8gb"},
)
store = PeerCacheStore(storage_config)
store.register_mem_pool_host(mem_pool_host) # SGLang calls this
Key methods¶
| method | description |
|---|---|
register_mem_pool_host(mem_pool_host) |
Register the receive MR (host KV buffer) and allocate + register the published pool. |
register_mem_host_pool_v2(host_pool, name) |
Register an extra (hybrid) pool for v2 transfers. |
batch_set_v1(keys, host_indices) |
Publish pages locally + record locations in the directory. Returns list[bool] per page. |
batch_get_v1(keys, host_indices) |
Look up locations + RDMA READ into host buffer. Returns list[bool] per page. |
batch_exists(keys) |
Number of consecutive existing pages from the start. |
batch_set_v2 / batch_get_v2 / batch_exists_v2 |
Hybrid-model (KV + sidecar pools) paths. |
set / get / batch_set / batch_get / exists |
Single-key/batch zero-copy (ptr+size) APIs. |
clear() |
Drop all locally published entries (pool + directory). |
close() |
Tear down discovery, RPC server, and transport. |
peercache.config.PeerCacheConfig¶
from peercache.config import PeerCacheConfig
cfg = PeerCacheConfig(
discovery_addr="10.0.0.1:31998",
protocol="rdma", # or "tcp"
device_name="mlx5_0",
ib_port=1, gid_index=3,
global_segment_size="8gb", # accepts int or "4gb"/"512mb"
vnodes=160,
directory_replicas=2,
max_masters=3, # head + up to 2 more hosts act as discovery masters
disk_enabled=True,
disk_path="/data/peercache/",
disk_size="100gb",
)
# or from SGLang extra_config:
cfg = PeerCacheConfig.from_extra_config({"discovery_addr": "10.0.0.1:31998"})
peercache.server.NodeRuntime¶
Wires the control + data planes for one node: transport, control RPC server + directory shard, discovery client, hash ring, and directory client.
from peercache.server import NodeRuntime
rt = NodeRuntime(cfg)
rt.start()
print(rt.node_id, rt.local_rdma_endpoint)
rt.directory.put({...}) # peercache.directory.DirectoryClient
rt.transport.batch_read([...]) # peercache.transport.Transport
rt.stop()
peercache.transport¶
Common data-plane interface with two implementations.
from peercache.transport import create_transport, Mr, ReadOp
t = create_transport(cfg) # RdmaTransport, or TcpTransport fallback
mr: Mr = t.register_mr(addr, length) # -> Mr(addr, rkey, lkey)
ok: list[bool] = t.batch_read([
ReadOp(remote_endpoint="host:port", local_addr=dst,
remote_addr=src, rkey=mr.rkey, length=n),
])
endpoint = t.local_endpoint() # "host:port" to advertise
RdmaTransportwraps the C++_peercache.TransferEngine(one-sided RDMA READ).TcpTransportmirrors the same API over TCP for testing without RDMA.
peercache.directory¶
from peercache.directory import DirectoryServer, DirectoryClient
server = DirectoryServer(); server.attach(rpc_server) # hosts a shard
client = DirectoryClient(ring, resolve_control, replicas=1)
client.put({key: DataLocation(...)})
locs = client.get([key]) # list[DataLocation | None]
present = client.exists([key]) # list[bool]
client.delete([key])
peercache.discovery¶
from peercache.discovery import DiscoveryServer, DiscoveryClient
meta = DiscoveryServer("0.0.0.0", 31998); meta.start()
client = DiscoveryClient(addr, node_info, on_members=cb, heartbeat_interval=2.0)
client.start() # register + heartbeat + refresh membership
client.members() # dict[node_id, NodeInfo]
peercache.hashring.ConsistentHashRing¶
from peercache.hashring import ConsistentHashRing
ring = ConsistentHashRing(vnodes=160)
ring.set_nodes(["n1", "n2", "n3"])
ring.get_node("key") # owner node_id
ring.get_nodes("key", 2) # replica set, clockwise, distinct
peercache.pool.PublishedPool¶
from peercache.pool import PublishedPool
pool = PublishedPool(base_addr, capacity, rkey, on_evict=cb)
remote_addr = pool.publish(key, src_ptr, length) # local memcpy; None if too big
pool.address_of(key) # (remote_addr, length) | None
pool.remove(key)
peercache.diskstore.DiskStore¶
The L4 disk tier: async write-through, LRU-bounded capacity, restart-safe index.
from peercache.diskstore import DiskStore
d = DiskStore("/data/peercache", max_bytes=100 << 30,
on_evict=lambda keys: None, node_id="node-0")
d.put("key", b"...page bytes...") # async write-through (idempotent)
d.get("key") # bytes | None (moves to MRU)
d.exists("key") # bool
used_bytes, num_keys = d.stats()
d.remove("key"); d.close()
peercache.metrics¶
from peercache.metrics import Metrics, MetricsServer
m = Metrics(node_id="node-0")
m.record_read(hit=True, nbytes=4096, seconds=0.0003, source="remote") # local/remote/disk
m.record_write(nbytes=4096, seconds=0.0002)
m.set_gauge_provider("pool_bytes_used", lambda: pool.bytes_used)
text = m.render_prometheus() # Prometheus exposition
srv = MetricsServer(m, "0.0.0.0", 31997, dashboard=True)
srv.start() # GET /metrics, GET / (dashboard), GET /healthz
srv.stop()
PeerCacheStore wires these automatically: it spills to a DiskStore, registers
pool/disk/membership gauges, and runs a MetricsServer (see extra_config keys
disk_* and metrics_*).
peercache.types¶
DataLocation(node_id, rdma_endpoint, remote_addr, rkey, length)β a directory value;to_dict()/from_dict()for the wire.NodeInfo(node_id, control_host, control_port, rdma_host, rdma_port)β what a node advertises to discovery.
C++ binding: _peercache.TransferEngine¶
Built from cpp/ via pybind11 (only on hosts with libibverbs/librdmacm).
import _peercache
assert _peercache.HAS_RDMA
eng = _peercache.TransferEngine(device_name="mlx5_0", ib_port=1, gid_index=3,
bind_host="0.0.0.0", bind_port=0)
mr = eng.register_mr(addr, length) # -> MrHandle(addr, length, lkey, rkey)
reqs = [_peercache.ReadRequest(remote_node="host:port", local_addr=dst,
remote_addr=src, rkey=mr.rkey, length=n)]
ok = eng.batch_read(reqs) # list[bool]
endpoint = eng.local_endpoint()
When built without RDMA (PEERCACHE_NO_RDMA=ON), the module imports with
HAS_RDMA == False and create_transport automatically selects the TCP fallback.