BibTeX
@ARTICLE{
Bringmann2015OCw,
author = "Bringmann, Karl and Doerr, Benjamin and Neumann, Adrian and Sliacan, Jakub",
title = "Online Checkpointing with Improved Worst-Case Guarantees",
journal = "{INFORMS} Journal on Computing",
volume = "27",
number = "3",
pages = "478--490",
year = "2015",
doi = "10.1287/ijoc.2014.0639",
url = "https://doi.org/10.1287/ijoc.2014.0639",
abstract = "In the online checkpointing problem, the task is to continuously maintain a set of
k checkpoints that allow rewinding an ongoing computation faster than by a full restart. The only
operation allowed is to replace an old checkpoint by the current state. Our aim is checkpoint
placement strategies that minimize rewinding cost, i.e., such that at all times T when requested to
rewind to some time t ≤ T the number of computation steps that need to be redone to get to
t from a checkpoint before t is as few as possible. In particular, we want the closest checkpoint
earlier than t to be no farther away from t than qk times the ideal distance T/(k + 1), where qk is
a small constant. Improving earlier work showing 1 + 1/k ≤ qk ≤ 2, we show that
qk can be chosen asymptotically less than 2. We present algorithms with asymptotic discrepancy qk
≤ 1.59 + o(1) valid for all k and qk ≤ ln(4) + o(1) ≤ 1.39 + o(1)
valid for k being a power of two. Experiments indicate the uniform bound pk ≤ 1.7 for all
k. For small k, we show how to use a linear programming approach to compute good checkpointing
algorithms. This gives discrepancies of less than 1.55 for all k < 60. We prove the first lower
bound that is asymptotically more than 1, namely qk ≥ 1.30 − o(1). We also show
that optimal algorithms (yielding the infimum discrepancy) exist for all k.",
ad_theotech = "Checkpointing"
}
|