concurrecy techdrop
TRANSCRIPT
Clearpoint TechdropConcurrency
@mikeb2701http://bad-concurrency.blogspot.com
Friday, 8 March 13
public class OrderController { private Counter orderCounter; // What implementation??? public void placeOrder(long productId, long customerId) { orderCounter.increment(); // Place the order }
}
Friday, 8 March 13
public class BrokenCounter implements Counter { private long value = 0; @Override public void increment() { value++; }
@Override public long getValue() { return value; }}
Friday, 8 March 13
public class SynchronizedCounter implements Counter {
private final Object mutex = new Object();
private long value = 0;
public void increment() { synchronized (mutex) { value++; } }
public long getValue() { return value; }}
Friday, 8 March 13
public class LockedCounter implements Counter { private final Lock l = new ReentrantLock(); private long value = 0;
public void increment() { l.lock(); try { value++; } finally { l.unlock(); } }
public long getValue() { return value; }}
Friday, 8 March 13
0
10
20
30
40
1 2 4 8 16 32
M o
ps/s
ec
Threads
Sync Locked
Friday, 8 March 13
public class AtomicCounter implements Counter { private final AtomicLong value = new AtomicLong(0);
@Override public void increment() { value.incrementAndGet(); }
@Override public long getValue() { return value.get(); }}
Friday, 8 March 13
public final long incrementAndGet() { for (;;) { long current = get(); long next = current + 1; if (compareAndSet(current, next)) return next; }}
Friday, 8 March 13
0
22.5
45
67.5
90
1 2 4 8 16 32
Sync Locked Atomic
Friday, 8 March 13
public class ThreadLocalCounter implements Counter { ConcurrentLinkedQueue<AtomicLong> values = new ConcurrentLinkedQueue<AtomicLong>();
private final ThreadLocal<AtomicLong> counterLocal = new ThreadLocal<AtomicLong>() { protected AtomicLong initialValue() { AtomicLong value = new AtomicLong(); values.add(value); return value; } };
public void increment() { AtomicLong atomicLong = counterLocal.get(); atomicLong.lazySet(atomicLong.get() + 1); }
public long getValue() { long l = 0; for (AtomicLong value : values) { l += value.get(); } return l; }}
Friday, 8 March 13
0
375
750
1125
1500
1 2 4 8 16 32
Sync Locked Atomic Thread Local
Friday, 8 March 13
3. Write cache friendly code
C2 C3C1 C4
L1 L1 L1 L1
L2 L2 L2 L2
L3
C2 C3C1 C4
L1 L1 L1 L1
L2 L2 L2 L2
MC
DRAM
DRAM
DRAM
DRAM
DRAM
DRAM
Registers <1ns
~4 cycles ~1ns
~10 cycles ~3ns
MC
L3~42 cycles ~15ns
QPI ~20ns
~65ns
Friday, 8 March 13
Friday, 8 March 13
Memory Models
Friday, 8 March 13
public class SimpleQueue { private final Object[] data = new Object[1024]; private volatile int head, tail;
public boolean offer(Object e) { if (head - (tail + data.length) < 0) { data[head & (data.length - 1)] = e; head++; return true; } return false; }
public Object poll() { if (head - tail > 0) { Object e = data[tail & (data.length - 1)]; tail++; return e; } return null; }}
Friday, 8 March 13
Happens-BeforeFriday, 8 March 13
Causality
CausalityFear will keep the
local systems inline.instructions
- Grand Moff Wilhuff Tarkin
Friday, 8 March 13
1stOperation
2nd Operation2nd Operation2nd Operation
Normal LoadNormal Store
Volatile LoadMonitor Enter
Volatile StoreMonitor Exit
Normal LoadNormal Store NOVolatile Load
Monitor Enter NO NO NOVolatile StoreMonitor Exit NO NO
Friday, 8 March 13
• Loads are not reordered with other loads.
• Stores are not reordered with other stores.
• Stores are not reordered with older loads.
• In a multiprocessor system, memory ordering obeys causality (memory ordering respects transitive visibility).
• In a multiprocessor system, stores to the same location have a total order.
• In a multiprocessor system, locked instructions to the same location have a total order.
• Loads and Stores are not reordered with locked instructions.
Friday, 8 March 13
Non-Blocking Primitives
Friday, 8 March 13
UnsafeFriday, 8 March 13
public class AtomicLong extends Number implements Serializable {
// ... private volatile long value;
// ... /** * Sets to the given value. * * @param newValue the new value */ public final void set(long newValue) { value = newValue; }
// ...}
Friday, 8 March 13
# {method} 'set' '(J)V' in 'java/util/concurrent/atomic/AtomicLong'# this: rsi:rsi = 'java/util/concurrent/atomic/AtomicLong'# parm0: rdx:rdx = long# [sp+0x20] (sp of caller) mov 0x8(%rsi),%r10d shl $0x3,%r10 cmp %r10,%rax jne 0x00007f1f410378a0 ; {runtime_call} xchg %ax,%ax nopl 0x0(%rax,%rax,1) xchg %ax,%ax push %rbp sub $0x10,%rsp nop mov %rdx,0x10(%rsi) lock addl $0x0,(%rsp) ;*putfield value ; - j.u.c.a.AtomicLong::set@2 (line 112) add $0x10,%rsp pop %rbp test %eax,0xa40fd06(%rip) # 0x00007f1f4b471000 ; {poll_return}
Friday, 8 March 13
public class AtomicLong extends Number implements Serializable {
// setup to use Unsafe.compareAndSwapLong for updates private static final Unsafe unsafe = Unsafe.getUnsafe(); private static final long valueOffset;
// ... /** * Eventually sets to the given value. * * @param newValue the new value * @since 1.6 */ public final void lazySet(long newValue) { unsafe.putOrderedLong(this, valueOffset, newValue); }
// ...}
Friday, 8 March 13
# {method} 'lazySet' '(J)V' in 'java/util/concurrent/atomic/AtomicLong'# this: rsi:rsi = 'java/util/concurrent/atomic/AtomicLong'# parm0: rdx:rdx = long# [sp+0x20] (sp of caller) mov 0x8(%rsi),%r10d shl $0x3,%r10 cmp %r10,%rax jne 0x00007f1f410378a0 ; {runtime_call} xchg %ax,%ax nopl 0x0(%rax,%rax,1) xchg %ax,%ax push %rbp sub $0x10,%rsp nop mov %rdx,0x10(%rsi) ;*invokevirtual putOrderedLong ; - AtomicLong::lazySet@8 (line 122) add $0x10,%rsp pop %rbp test %eax,0xa41204b(%rip) # 0x00007f1f4b471000 ; {poll_return}
Friday, 8 March 13
public class AtomicInteger extends Number implements Serializable {
// setup to use Unsafe.compareAndSwapInt for updates private static final Unsafe unsafe = Unsafe.getUnsafe(); private static final long valueOffset;
private volatile int value; //...
public final boolean compareAndSet(int expect, int update) { return unsafe.compareAndSwapInt(this, valueOffset, expect, update); }}
Friday, 8 March 13
# {method} 'compareAndSet' '(JJ)Z' in 'java/util/concurrent/atomic/AtomicLong' # this: rsi:rsi = 'java/util/concurrent/atomic/AtomicLong' # parm0: rdx:rdx = long # parm1: rcx:rcx = long # [sp+0x20] (sp of caller) mov 0x8(%rsi),%r10d shl $0x3,%r10 cmp %r10,%rax jne 0x00007f6699037a60 ; {runtime_call} xchg %ax,%ax nopl 0x0(%rax,%rax,1) xchg %ax,%ax sub $0x18,%rsp mov %rbp,0x10(%rsp) mov %rdx,%rax lock cmpxchg %rcx,0x10(%rsi) sete %r11b movzbl %r11b,%r11d ;*invokevirtual compareAndSwapLong ; - j.u.c.a.AtomicLong::compareAndSet@9 (line 149) mov %r11d,%eax add $0x10,%rsp pop %rbp test %eax,0x91df935(%rip) # 0x00007f66a223e000 ; {poll_return}
Friday, 8 March 13
0
2.25
4.5
6.75
9
nanoseconds/op
set() compareAndSet lazySet()
Friday, 8 March 13
Example - Disruptor Multi-producer
private void publish(Disruptor disruptor, long value) { long next = disruptor.next(); disruptor.setValue(next, value); disruptor.publish(next); }
Friday, 8 March 13
Example - Disruptor Multi-producer
public long next() { long next; long current;
do { current = nextSequence.get(); next = current + 1; while (next > (readSequence.get() + size)) { LockSupport.parkNanos(1L); continue; } } while (!nextSequence.compareAndSet(current, next));
return next; }
Friday, 8 March 13
Algorithm: Spin - 1
public void publish(long sequence) { long sequenceMinusOne = sequence - 1; while (cursor.get() != sequenceMinusOne) { // Spin }
cursor.lazySet(sequence); }
Friday, 8 March 13
0
6.25
12.5
18.75
25
1 2 3 4 5 6 7 8
mill
ion
ops/
sec
Producer Threads
Spin - 1
Friday, 8 March 13
Algorithm: Co-Op
public void publish(long sequence) { int counter = RETRIES; while (sequence - cursor.get() > pendingPublication.length()) { if (--counter == 0) { Thread.yield(); counter = RETRIES; } }
long expectedSequence = sequence - 1; pendingPublication.set((int) sequence & pendingMask, sequence);
if (cursor.get() >= sequence) { return; }
long nextSequence = sequence; while (cursor.compareAndSet(expectedSequence, nextSequence)) { expectedSequence = nextSequence; nextSequence++; if (pendingPublication.get((int) nextSequence & pendingMask) != nextSequence) { break; } } }
Friday, 8 March 13
0
7.5
15
22.5
30
1 2 3 4 5 6 7 8
mill
ion
ops/
sec
Producer Threads
Spin - 1 Co-Op
Friday, 8 March 13
Algorithm: Buffer
public long next() { long next; long current;
do { current = cursor.get(); next = current + 1; while (next > (readSequence.get() + size)) { LockSupport.parkNanos(1L); continue; } } while (!cursor.compareAndSet(current, next));
return next; }
Friday, 8 March 13
Algorithm: Buffer
public void publish(long sequence) { int publishedValue = (int) (sequence >>> indexShift); published.set(indexOf(sequence), publishedValue); }
// Get Value int availableValue = (int) (current >>> indexShift); int index = indexOf(current); while (published.get(index) != availableValue) { // Spin }
Friday, 8 March 13
0
17.5
35
52.5
70
1 2 3 4 5 6 7 8
mill
ion
ops/
sec
Threads
Spin - 1 Co-Op Buffer
Friday, 8 March 13
• https://github.com/mikeb01/jax2012
• http://yow.eventer.com/yow-2012-1012/lock-free-algorithms-for-ultimate-performance-by-martin-thompson-1250
Q&A
Friday, 8 March 13