echo $exponent;
//The ListOrderItems and ListOrderItemsByNextToken operations together share a maximum request quota of 30 and a restore rate of one request every two seconds.
function ExponentialBackoffSleep($restoreRatePerSecond = 0.5, $base = 2, $exponent = 3, $maximumRequestQuota = 30)
$unixTimestamp = time();
$intervalSeconds = $maximumRequestQuota / $restoreRatePerSecond;
$restoreOneNeedSeconds = ceil(1 / $restoreRatePerSecond);
if ($unixTimestamp % $intervalSeconds == 0) {
$max = pow($base, $exponent);
$max = $max > $restoreOneNeedSeconds ? $max : $restoreOneNeedSeconds;
$ExponentialBackoff = rand($restoreOneNeedSeconds, $max);
echo "ExponentialBackoffSleep " . $ExponentialBackoff . "\\r\\n";
function findCloestNumToPow($input, $base = 2, $exponent = 1)
if (ceil($input) <= 1) {
return 1;
for ($w = $exponent, $len = pow($base, $exponent); $w <= $len; $w++) {
return 10;
//The ListOrderItems and ListOrderItemsByNextToken operations together share a maximum request quota of 30 and a restore rate of one request every two seconds.
function ExponentialBackoffSleep($restoreRatePerSecond = 0.5, $base = 2, $exponent = 3, $maximumRequestQuota = 30)
$unixTimestamp = time();
$intervalSeconds = $maximumRequestQuota / $restoreRatePerSecond;
$restoreOneNeedSeconds = ceil(1 / $restoreRatePerSecond);
if ($unixTimestamp % $intervalSeconds == 0) {
$max = pow($base, $exponent);
$max = $max > $restoreOneNeedSeconds ? $max : $restoreOneNeedSeconds;
$ExponentialBackoff = rand($restoreOneNeedSeconds, $max);
echo "ExponentialBackoffSleep " . $ExponentialBackoff . "\\r\\n";
function findCloestNumToPow($input, $base = 2, $exponent = 1)
if (ceil($input) <= 1) {
return 1;
for ($w = $exponent, $len = pow($base, $exponent); $w <= $len; $w++) {
return 10;
function ExponentialBackoffSleep($intervalSeconds = 8)
$unixTimestamp = time();
if ($unixTimestamp % $intervalSeconds == 0) {
$ExponentialBackoff = rand(1, $intervalSeconds);
echo "\r\n" . date('Y-m-d H:i:s') . " TODO StartScript \r\n";
* win-cli-require_once
* */
//return; NULL
//return '';
//2017年4月25日 19:59:27
$win_cli_dir = 'D:\cmd\amzapi\amzapitest_com\MarketplaceWebServiceOrders\\';
require_once($win_cli_dir . 'Samples\.config.inc.php');
require_once($win_cli_dir . 'Samples\.config.db.php');
require_once($win_cli_dir . 'Client.php');
require_once($win_cli_dir . 'Model\ListOrderItemsRequest.php');
require_once($win_cli_dir . 'Model\ListOrderItemsResponse.php');
require_once($win_cli_dir . 'Model\ListOrderItemsByNextTokenRequest.php');
require_once($win_cli_dir . 'Model\ListOrderItemsByNextTokenResponse.php');
$serviceUrl = "https://mws.amazonservices.com/Orders/2013-09-01";
$config = array(
'ServiceURL' => $serviceUrl,
'ProxyHost' => null,
'ProxyPort' => -1,
'ProxyUsername' => null,
'ProxyPassword' => null,
'MaxErrorRetry' => 3,
$service = new MarketplaceWebServiceOrders_Client(
//TODO StartScript
//bug limit page
$sql = 'SELECT DISTINCT AmazonOrderId FROM listorders';
$w = 0;
foreach ($dbh->query($sql) as $row) {
echo "\r\n" . date('Y-m-d H:i:s') . "\r\n";
$InputAmazonOrderId = $row['AmazonOrderId'];
$sql\_inserted = 'SELECT id FROM ListOrderItems WHERE AmazonOrderId="' . $InputAmazonOrderId . '" LIMIT 1';
foreach ($dbh->query($sql\_inserted) as $inserted) {
if (!empty($inserted)) {
echo "\\r\\n" . $InputAmazonOrderId . " inserted\\r\\n";
// $w++;//TODO modify
// if ($w % 30 == 0) sleep(8);
$request = new MarketplaceWebServiceOrders\_Model\_ListOrderItemsRequest();
invokeListOrderItems($service, $request);
function ExponentialBackoffSleep($intervalSeconds = 8)
$unixTimestamp = time();
$ExponentialBackoff = rand(1, $intervalSeconds);
if ($unixTimestamp % $intervalSeconds == 0) sleep($ExponentialBackoff);
// Exception to DB
//to move
function apiCaughtException($ex)
echo("Caught Exception: " . $ex->getMessage() . "\n");
echo("Response Status Code: " . $ex->getStatusCode() . "\n");
echo("Error Code: " . $ex->getErrorCode() . "\n");
echo("Error Type: " . $ex->getErrorType() . "\n");
echo("Request ID: " . $ex->getRequestId() . "\n");
echo("XML: " . $ex->getXML() . "\n");
echo("ResponseHeaderMetadata: " . $ex->getResponseHeaderMetadata() . "\n");
function invokeListOrderItems(MarketplaceWebServiceOrders_Interface $service, $request)
global $link;
global $InputAmazonOrderId;
try {
$response = $service->ListOrderItems($request);
//TODO class XML(){}
$dom = new DOMDocument();
$dom->preserveWhiteSpace = false;
$dom->formatOutput = true;
$savexml = $dom->saveXML();
$readxml = simplexml_load_string($savexml);
$obj = $readxml->ListOrderItemsResult->OrderItems;
foreach ($obj->children() AS $one) {
TMPdbInsertListOrders($one, $link, $InputAmazonOrderId);
echo("ResponseHeaderMetadata: " . $response->getResponseHeaderMetadata() . "\n");
} catch (MarketplaceWebServiceOrders_Exception $ex) {
//TODO EndScript
echo "\r\n" . date('Y-m-d H:i:s') . " TODO EndScript \r\n";
function ExponentialBackoffSleep($intervalSeconds = 8)
$unixTimestamp = time();
$ExponentialBackoff = rand(1, $intervalSeconds);
if ($unixTimestamp % $intervalSeconds == 0) sleep($ExponentialBackoff);
//TODO modify
if ($w % 30 == 0) sleep(8);
Exponential Backoff And Jitter
04 Mar 2015 in Performance, Scalability | Permalink
Optimistic concurrency control (OCC) is a time-honored way for multiple writers to safely modify a single object without losing writes. OCC has three nice properties: it will always make progress as long as the underlying store is available, it’s easy to understand, and it’s easy to implement. DynamoDB’s conditional writes make OCC a natural fit for DynamoDB users, and it’s natively supported by the DynamoDBMapper client.
While OCC is guaranteed to make progress, it can still perform quite poorly under high contention. The simplest of these contention cases is when a whole lot of clients start at the same time, and try to update the same database row. With one client guaranteed to succeed every round, the time to complete all the updates grows linearly with contention.
For the graphs in this post, I used a small simulator to model the behavior of OCC on a network with delay (and variance in delay), against a remote database. In this simulation, the network introduces delay with a mean of 10ms and variance of 4ms. The first simulation shows how completion time grows linearly with contention. This linear growth is because one client succeeds every round, so it takes N rounds for all N clients to succeed.
Unfortunately, that’s not the whole picture. With N clients contending, the total amount of work done by the system increases with N2.
The problem here is that N clients compete in the first round, N-1 in the second round, and so on. Having every client compete in every round is wasteful. Slowing clients down may help, and the classic way to slow clients down is capped exponential backoff. Capped exponential backoff means that clients multiply their backoff by a constant after each attempt, up to some maximum value. In our case, after each unsuccessful attempt, clients sleep for:
sleep = min(cap, base * ** attempt)
Running the simulation again shows that backoff helps a small amount, but doesn’t solve the problem. Client work has only been reduced slightly.
The best way to see the problem is to look at the times these exponentially backed-off calls happen.
It’s obvious that the exponential backoff is working, in that the calls are happening less and less frequently. The problem also stands out: there are still clusters of calls. Instead of reducing the number of clients competing in every round, we’ve just introduced times when no client is competing. Contention hasn’t been reduced much, although the natural variance in network delay has introduced some spreading.
The solution isn’t to remove backoff. It’s to add jitter. Initially, jitter may appear to be a counter-intuitive idea: trying to improve the performance of a system by adding randomness. The time series above makes a great case for jitter – we want to spread out the spikes to an approximately constant rate. Adding jitter is a small change to the sleep function:
sleep = random_between(, min(cap, base * ** attempt))
That time series looks a whole lot better. The gaps are gone, and beyond the initial spike, there’s an approximately constant rate of calls. It’s also had a great effect on the total number of calls.
In the case with 100 contending clients, we’ve reduced our call count by more than half. We’ve also significantly improved the time to completion, when compared to un-jittered exponential backoff.
There are a few ways to implement these timed backoff loops. Let’s call the algorithm above “Full Jitter”, and consider two alternatives. The first alternative is “Equal Jitter”, where we always keep some of the backoff and jitter by a smaller amount:
temp = min(cap, base * ** attempt)
sleep = temp / + random_between(, temp / )
The intuition behind this one is that it prevents very short sleeps, always keeping some of the slow down from the backoff. A second alternative is “Decorrelated Jitter”, which is similar to “Full Jitter”, but we also increase the maximum jitter based on the last random value.
sleep = min(cap, random_between(base, sleep * ))
Which approach do you think is best?
Looking at the amount of client work, the number of calls is approximately the same for “Full” and “Equal” jitter, and higher for “Decorrelated”. Both cut down work substantially relative to both the no-jitter approaches.
The no-jitter exponential backoff approach is the clear loser. It not only takes more work, but also takes more time than the jittered approaches. In fact, it takes so much more time we have to leave it off the graph to get a good comparison of the other methods.
Of the jittered approaches, “Equal Jitter” is the loser. It does slightly more work than “Full Jitter”, and takes much longer. The decision between “Decorrelated Jitter” and “Full Jitter” is less clear. The “Full Jitter” approach uses less work, but slightly more time. Both approaches, though, present a substantial decrease in client work and server load.
It’s worth noting that none of these approaches fundamentally change the N2 nature of the work to be done, but do substantially reduce work at reasonable levels of contention. The return on implementation complexity of using jittered backoff is huge, and it should be considered a standard approach for remote clients.
All of the graphs and numbers from this post were generated using a simple simulation of OCC behavior. You can get our simulator code on GitHub, in the aws-arch-backoff-simulator project.
- Marc Brooker
# Simulator for the effects of backoff and jitter on a remote OCC system.
import heapq
import random
class Net:
def __init__(self, mean, sd):
self.mean = mean
self.sd = sd
def delay(self):
# We use a normal distribution model. Networks are more likely to be a Weibull model
# in reality, but this is close enough for the model comparison.
return abs(random.normalvariate(self.mean, self.sd))
class Backoff:
def __init__(self, base, cap):
self.base = base
self.cap = cap
def expo(self, n):
return min(self.cap, pow(2, n)\*self.base)
class NoBackoff(Backoff):
def backoff(self, n):
return 0
class ExpoBackoff(Backoff):
def backoff(self, n):
return self.expo(n)
class ExpoBackoffEqualJitter(Backoff):
def backoff(self, n):
v = self.expo(n)
return v/2 + random.uniform(0, v/2)
class ExpoBackoffFullJitter(Backoff):
def backoff(self, n):
v = self.expo(n)
return random.uniform(0, v)
class ExpoBackoffDecorr(Backoff):
def __init__(self, base, cap):
Backoff.__init__(self, base, cap)
self.sleep = self.base
def backoff(self, n):
self.sleep = min(self.cap, random.uniform(self.base, self.sleep \* 3))
return self.sleep
class Stats:
def __init__(self):
self.failures = 0
self.calls = 0
def msg(tm, send_to, reply_to, payload):
assert tm >= 0
assert send_to is not None
return (tm, send_to, reply_to, payload)
class OccServer:
def __init__(self, net, stats, ts_f):
self.version = 0
self.net = net
self.stats = stats
self.ts_f = ts_f
# Try to write the row. If you provide the right version number (obtained from a read),
# the write will succeed.
def write(self, tm, request):
success = False
self.stats.calls += 1
if request\[3\] == self.version:
self.version += 1
success = True
self.stats.failures += 1
return msg(tm + self.net.delay(), request\[2\], None, success)
# Read the current version number of the row.
def read(self, tm, request):
return msg(tm + self.net.delay(), request\[2\], None, self.version)
class OccClient:
def __init__(self, server, net, backoff):
self.server = server
self.net = net
self.attempt = 0
self.backoff = backoff
def start(self, tm):
return msg(tm + self.net.delay(), self.server.read, self.read\_rsp, None)
def read\_rsp(self, tm, request):
return msg(tm + self.net.delay(), self.server.write, self.write\_rsp, request\[3\])
def write\_rsp(self, tm, request):
if not request\[3\]:
self.attempt += 1
return msg(tm + self.net.delay() + self.backoff.backoff(self.attempt), self.server.read, self.read\_rsp, None)
return None
def run_sim(queue):
tm = 0
while len(queue) > 0:
# Pull an event off the priority queue
msg = heapq.heappop(queue)
assert msg[0] >= tm # TIme must move forward
tm = msg[0]
next_msg = msg[1](tm, msg)
if next_msg is not None:
# If it cause another event to be generated, enqueue it
heapq.heappush(queue, next_msg)
return tm
def setup_sim(clients, backoff_cls, ts_f, stats):
net = Net(10, 2)
queue = []
server = OccServer(net, stats, ts_f)
for i in xrange(0, clients):
client = OccClient(server, net, backoff_cls(5, 2000))
heapq.heappush(queue, client.start(0))
return (queue, stats)
backoff_types = ((ExpoBackoff, "Exponential"),
(ExpoBackoffEqualJitter, "EqualJitter"),
(ExpoBackoffFullJitter, "FullJitter"),
(NoBackoff, "None"))
def run():
with open("backoff_results.csv", "w") as f:
for i in xrange(1, 20):
clients = i * 10
for backoff in backoff_types:
with open("ts_" + backoff[1], "w") as ts_f:
stats = Stats()
tm = 0
for t in xrange(0, 100):
queue, stats = setup_sim(clients, backoff[0], ts_f, stats)
tm += run_sim(queue)
f.write("%d,%d,%d,%s\n"%(clients, tm/100, stats.calls/100, backoff[1]))