data in motion: streaming static data efficiently 2

Post on 16-Apr-2017

483 Views

Category:

Software

1 Downloads

Preview:

Click to see full reader

TRANSCRIPT

MANCHESTER LONDON NEW YORK

Martin Zapletal @zapletal_martin#ScalaDays

Data in Motion: Streaming Static Data Efficientlyin Akka Persistence (and elsewhere)

@cakesolutions

Data at scale

● Reactive● Real time, asynchronous and message driven● Elastic and scalable● Resilient and fault tolerant

Streams

persistence_id1, event 2

persistence_id1, event 3

persistence_id1, event 4

persistence_id1, event 1

235

Akka Persistence

1 4

class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {

override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState)

private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors)

... } }}

class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {

override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState)

private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors)

... } }}

class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {

override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState)

private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors)

... } }}

class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {

override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState)

private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors)

... } }}

class AccountActor(protected[this] val passivationTimeout: Duration) extends PersistentActor {

override val persistenceId: String = extractId(self.path.name) override def receiveCommand: Receive = active(initialState)

private def active( balance: State ): Receive = { case command: AccountCommand => command match { case cmd: UpdateBalanceCommand => cmd.validate().fold({ balanceUpdated => persist(balanceUpdated) { persisted => val updatedState = balance.update(persisted) sender() ! updatedState context.become(active(updatedState)) } }, processValidationErrors)

... } }}

case cmd: UpdateGroupBalanceCommand => cmd.validate().fold({ groupBalanceUpdated => persist(Tagged(groupBalanceUpdated, Set("tag1"))) { persisted => sender() ! groupBalanceUpdated } }, processValidationErrors)

case cmd: UpdateGroupBalanceCommand => cmd.validate().fold({ groupBalanceUpdated => persist(Tagged(groupBalanceUpdated, Set("tag1"))) { persisted => sender() ! groupBalanceUpdated } }, processValidationErrors)

override def receiveRecover: Receive = { var state: State = initialState

{ case balanceUpdated: BalanceUpdatedEvent => state = state.update(balanceUpdated)

case RecoveryCompleted => context.become(active(state)) }}

override def receiveRecover: Receive = { var state: State = initialState

{ case balanceUpdated: BalanceUpdatedEvent => state = state.update(balanceUpdated)

case RecoveryCompleted => context.become(active(state)) }}

0

1

2

3

4

0

5

10

1

5

Inserted value 0

Inserted value 5

Inserted value 10

Inserted value 1

Inserted value 55

Log data structure

Persistence_ id partition_nr

0 00 1

event 1

event 100 event 101 event 102

event 0 event 2

1 0 event 0 event 1 event 2

Akka Persistence Cassandra● Purely pull● Event (log) data

Akka Persistence Query● eventsByPersistenceId, allPersistenceIds, eventsByTag

1 4 235

persistence_id1, event 2

persistence_id1, event 3

persistence_id1, event 4

persistence_id1, event 1

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .eventsByPersistenceId(persistenceId, 0, Long.MaxValue) .runForeach(println)

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .eventsByPersistenceId(persistenceId, 0, Long.MaxValue) .runForeach(println)

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .eventsByPersistenceId(persistenceId, 0, Long.MaxValue) .runForeach(println)

EventEnvelope(1,persistenceId,1,GroupBalanceUpdatedEvent(9248.0))EventEnvelope(2,persistenceId,2,BalanceUpdatedEvent(4355.0))EventEnvelope(3,persistenceId,3,BalanceUpdatedEvent(5245.0))EventEnvelope(4,persistenceId,4,BalanceUpdatedEvent(4631.0))EventEnvelope(5,persistenceId,5,BalanceUpdatedEvent(973.0))...

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .allPersistenceIds() .runForeach(println)

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .allPersistenceIds() .runForeach(println)

persistenceId5persistenceId2persistenceId4persistenceId1persistenceId4...

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .eventsByTag("tag1", 0) .runForeach(println)

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system) .readJournalFor[CassandraReadJournal]("cassandra-query-journal")

queries .eventsByTag("tag1", 0) .runForeach(println)

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal")

val transform = Flow[EventEnvelope] .collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value } .scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s }

val g = RunnableGraph.fromGraph { GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] => import akka.stream.scaladsl.GraphDSL.Implicits._

queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink ClosedShape }}

g.run()

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal")

val transform = Flow[EventEnvelope] .collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value } .scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s }

val g = RunnableGraph.fromGraph { GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] => import akka.stream.scaladsl.GraphDSL.Implicits._

queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink ClosedShape }}

g.run()

implicit val system: ActorSystem = ...implicit val materializer: Materializer = ...

lazy val queries: CassandraReadJournal = PersistenceQuery(system).readJournalFor[CassandraReadJournal]("cassandra-query-journal")

val transform = Flow[EventEnvelope] .collect { case EventEnvelope(_, _, _, BalanceUpdatedEvent(value)) => value } .scan(new CircularFifoQueue[Double](5)){ (s, d) => s.add(d); s }

val g = RunnableGraph.fromGraph { GraphDSL.create() { implicit builder: GraphDSL.Builder[NotUsed] => import akka.stream.scaladsl.GraphDSL.Implicits._

queries.eventsByPersistenceId(persistenceId, 0, Long.MaxValue) ~> transform ~> kafkaSink ClosedShape }}

g.run()

public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> {

@Override public Behavior initialBehavior(Optional<State> snapshotState) {

BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState);

b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> { if (! validate(cmd)) { ctx.invalidCommand("..."); return ctx.done(); } else { return ctx.thenPersist( new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance())); } });

b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt));

return b.build(); }}

public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> {

@Override public Behavior initialBehavior(Optional<State> snapshotState) {

BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState);

b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> { if (! validate(cmd)) { ctx.invalidCommand("..."); return ctx.done(); } else { return ctx.thenPersist( new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance())); } });

b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt));

return b.build(); }}

public class AccountEntity extends PersistentEntity<AccountCommand, AccountEvent, State> {

@Override public Behavior initialBehavior(Optional<State> snapshotState) {

BehaviorBuilder b = newBehaviorBuilder(snapshotState.orElse(initialState);

b.setCommandHandler(UpdateBalanceCommand.class, (cmd, ctx) -> { if (! validate(cmd)) { ctx.invalidCommand("..."); return ctx.done(); } else { return ctx.thenPersist( new BalanceUpdatedEvent(cmd.value), () -> ctx.reply(Done.getInstance())); } });

b.setEventHandler(BalanceUpdatedEvent.class, evt -> state.update(evt));

return b.build(); }}

public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> { AccountEventProcessor state = ... @Override public AggregateEventTag<AccountEvent> aggregateTag() { return Tag1.INSTANCE; }

@Override public CompletionStage<Optional<UUID>> prepare(CassandraSession session) { return prepareCreateTables(session).thenCompose(a -> … // Prepare tables, statements, etc. }

@Override public EventHandlers defineEventHandlers(EventHandlersBuilder builder) { builder.setEventHandler(AccountEvent.class, this::processAccountEvent); return builder.build(); }

private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) { BoundStatement bindWriteAnalytics = writeAnalytics.bind(); writeAnalytics.setString("entity_id", event.id); ... return completedStatements(Arrays.asList(bindWriteAnalytics)); } }

public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> { AccountEventProcessor state = ... @Override public AggregateEventTag<AccountEvent> aggregateTag() { return Tag1.INSTANCE; }

@Override public CompletionStage<Optional<UUID>> prepare(CassandraSession session) { return prepareCreateTables(session).thenCompose(a -> … // Prepare tables, statements, etc. }

@Override public EventHandlers defineEventHandlers(EventHandlersBuilder builder) { builder.setEventHandler(AccountEvent.class, this::processAccountEvent); return builder.build(); }

private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) { BoundStatement bindWriteAnalytics = writeAnalytics.bind(); writeAnalytics.setString("entity_id", event.id); ... return completedStatements(Arrays.asList(bindWriteAnalytics)); } }

public class AccountEventProcessor extends CassandraReadSideProcessor<AccountEvent> { AccountEventProcessor state = ... @Override public AggregateEventTag<AccountEvent> aggregateTag() { return Tag1.INSTANCE; }

@Override public CompletionStage<Optional<UUID>> prepare(CassandraSession session) { return prepareCreateTables(session).thenCompose(a -> … // Prepare tables, statements, etc. }

@Override public EventHandlers defineEventHandlers(EventHandlersBuilder builder) { builder.setEventHandler(AccountEvent.class, this::processAccountEvent); return builder.build(); }

private CompletionStage<List<BoundStatement>> processAccountEvent(AccountEvent event, UUID offset) { BoundStatement bindWriteAnalytics = writeAnalytics.bind(); writeAnalytics.setString("entity_id", event.id); ... return completedStatements(Arrays.asList(bindWriteAnalytics)); } }

Streaming static data

● Turning database into a stream

Pulling data from a log

0 0

105 5

10

0 0

105 5

100 0

00 0

105 5

105 5 0

10 10 5 5 0 0

0 0

105 5

10

10 10 5 5 0 0

0 0

1015 15

5 510

0 0

15 15

5 5 15 15 10 10 5 5 0 010 10

Actor publisherprivate[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration]) extends ActorPublisher[MessageType] {

protected def initialState: Future[State] protected def initialQuery(initialState: State): Future[Action] protected def requestNext(state: State, resultSet: ResultSet): Future[Action] protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action] protected def updateState(state: State, row: Row): (Option[MessageType], State) protected def completionCondition(state: State): Boolean

private[this] def nextBehavior(...): Receive = { if (shouldFetchMore(...)) { listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self) awaiting(resultSet, state, finished) } else if (shouldIdle(...)) { idle(resultSet, state, finished) } else if (shouldComplete(...)) { onCompleteThenStop() Actor.emptyBehavior } else if (shouldRequestMore(...)) { if (finished) requestNextFinished(state, resultSet).pipeTo(self) else requestNext(state, resultSet).pipeTo(self) awaiting(resultSet, state, finished) } else { idle(resultSet, state, finished) } }}

private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration]) extends ActorPublisher[MessageType] {

protected def initialState: Future[State] protected def initialQuery(initialState: State): Future[Action] protected def requestNext(state: State, resultSet: ResultSet): Future[Action] protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action] protected def updateState(state: State, row: Row): (Option[MessageType], State) protected def completionCondition(state: State): Boolean

private[this] def nextBehavior(...): Receive = { if (shouldFetchMore(...)) { listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self) awaiting(resultSet, state, finished) } else if (shouldIdle(...)) { idle(resultSet, state, finished) } else if (shouldComplete(...)) { onCompleteThenStop() Actor.emptyBehavior } else if (shouldRequestMore(...)) { if (finished) requestNextFinished(state, resultSet).pipeTo(self) else requestNext(state, resultSet).pipeTo(self) awaiting(resultSet, state, finished) } else { idle(resultSet, state, finished) } }}

}

private[query] abstract class QueryActorPublisher[MessageType, State: ClassTag](refreshInterval: Option[FiniteDuration]) extends ActorPublisher[MessageType] {

protected def initialState: Future[State] protected def initialQuery(initialState: State): Future[Action] protected def requestNext(state: State, resultSet: ResultSet): Future[Action] protected def requestNextFinished(state: State, resultSet: ResultSet): Future[Action] protected def updateState(state: State, row: Row): (Option[MessageType], State) protected def completionCondition(state: State): Boolean

private[this] def nextBehavior(...): Receive = { if (shouldFetchMore(...)) { listenableFutureToFuture(resultSet.fetchMoreResults()).map(FetchedResultSet).pipeTo(self) awaiting(resultSet, state, finished) } else if (shouldIdle(...)) { idle(resultSet, state, finished) } else if (shouldComplete(...)) { onCompleteThenStop() Actor.emptyBehavior } else if (shouldRequestMore(...)) { if (finished) requestNextFinished(state, resultSet).pipeTo(self) else requestNext(state, resultSet).pipeTo(self) awaiting(resultSet, state, finished) } else { idle(resultSet, state, finished) } }}

}

initialQuery

Cancel

initialFinished

shouldFetchMore

shouldIdle

shouldTerminate

shouldRequestMore

SubscriptionTimeout

Cancel

SubscriptionTimeout

initialNewResultSet

request newResultSet

fetchedResultSet

finished

Cancel

SubscriptionTimeout

requestcontinue

Red transitionsdeliver buffer and update internal state (progress)

Blue transitions asynchronous database query

SELECT * FROM ${tableName} WHERE persistence_id = ? AND partition_nr = ? AND sequence_nr >= ? AND sequence_nr <= ?

0 0

0 1

event 1

event 100 event 101 event 102

event 0 event 2

Events by persistence id

0 0

0 1

event 1

event 100 event 101 event 102

event 2event 0

0 0

0 1

event 1

event 100 event 101 event 102

event 2event 0

0 0

0 1

event 1

event 100 event 101 event 102

event 2event 0

0 0

0 1

event 1

event 100 event 101 event 102

event 2event 0

0 0

0 1

event 1

event 100 event 101 event 102

event 2event 0

0 0

0 1

event 0 event 1

event 100 event 101 event 102

event 2

private[query] class EventsByPersistenceIdPublisher(...) extends QueryActorPublisher[PersistentRepr, EventsByPersistenceIdState](...) { override protected def initialState: Future[EventsByPersistenceIdState] = { ... EventsByPersistenceIdState(initialFromSequenceNr, 0, currentPnr) }

override protected def updateState( state: EventsByPersistenceIdState, Row: Row): (Option[PersistentRepr], EventsByPersistenceIdState) = { val event = extractEvent(row) val partitionNr = row.getLong("partition_nr") + 1

(Some(event), EventsByPersistenceIdState(event.sequenceNr + 1, state.count + 1, partitionNr)) }}

private[query] class EventsByPersistenceIdPublisher(...) extends QueryActorPublisher[PersistentRepr, EventsByPersistenceIdState](...) { override protected def initialState: Future[EventsByPersistenceIdState] = { ... EventsByPersistenceIdState(initialFromSequenceNr, 0, currentPnr) }

override protected def updateState( state: EventsByPersistenceIdState, Row: Row): (Option[PersistentRepr], EventsByPersistenceIdState) = { val event = extractEvent(row) val partitionNr = row.getLong("partition_nr") + 1

(Some(event), EventsByPersistenceIdState(event.sequenceNr + 1, state.count + 1, partitionNr)) }}

0 0

0 1

event 1

event 100 event 101 event 102

event 0 event 2

1 0 event 0 event 1 event 2

All persistence idsSELECT DISTINCT persistence_id, partition_nr FROM $tableName

0 0

0 1

event 1

event 100 event 101 event 102

event 0 event 2

1 0 event 0 event 1 event 2

0 0

0 1

event 1

event 100 event 101 event 102

event 0 event 2

1 0 event 0 event 1 event 2

0

0

0

1

event 1

event 100 event 101 event 102

event 0 event 2

1 0 event 0 event 1 event 2

private[query] class AllPersistenceIdsPublisher(...) extends QueryActorPublisher[String, AllPersistenceIdsState](...) {

override protected def initialState: Future[AllPersistenceIdsState] = Future.successful(AllPersistenceIdsState(Set.empty))

override protected def updateState( state: AllPersistenceIdsState, row: Row): (Option[String], AllPersistenceIdsState) = {

val event = row.getString("persistence_id")

if (state.knownPersistenceIds.contains(event)) { (None, state) } else { (Some(event), state.copy(knownPersistenceIds = state.knownPersistenceIds + event)) } }}

private[query] class AllPersistenceIdsPublisher(...) extends QueryActorPublisher[String, AllPersistenceIdsState](...) {

override protected def initialState: Future[AllPersistenceIdsState] = Future.successful(AllPersistenceIdsState(Set.empty))

override protected def updateState( state: AllPersistenceIdsState, row: Row): (Option[String], AllPersistenceIdsState) = {

val event = row.getString("persistence_id")

if (state.knownPersistenceIds.contains(event)) { (None, state) } else { (Some(event), state.copy(knownPersistenceIds = state.knownPersistenceIds + event)) } }}

Events by tag

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0 event 2,tag 1

1 0 event 0 event 1 event 2,tag 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 2,tag 1

1 0 event 0 event 1

event 0

event 2,tag 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0 event 2,tag 1

1 0 event 1event 0 event 2,tag 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0 event 2,tag 1

1 0 event 0 event 1 event 2,tag 1

event 0

event 0

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 2,tag 1

1 0 event 1 event 2,tag 1

event 0

event 0 event 1

0 0

0 1event 100,tag 1

event 101 event 102

event 2,tag 1

1 0event 2,tag 1

event 1,tag 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 2,tag 1

1 0event 2,tag 1

event 0

event 0 event 1

event 1,tag 1

event 1,tag 1

event 2,tag 1

event 0

event 0 event 1

event 1,tag 10 0

0 1event 100,tag 1

event 101 event 102

1 0event 2,tag 1

event 2,tag 1

event 0

event 0 event 1

0 0

0 1event 100,tag 1

event 101 event 102

1 0

event 2,tag 1

event 1,tag 1

0 0

0 1

1 0event 2,tag 1

event 0

event 0 event 1

event 100,tag 1

event 101 event 102

event 2,tag 1

event 1,tag 1

Events by tag

Id 0, event 1

Id 1,event 2

Id 0, event 100

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

1 0 event 0 event 1 event 2,tag 1

Id 0, event 2

tag 1 1/1/2016

tag 1 1/2/2016

event 2,tag 1

SELECT * FROM $eventsByTagViewName$tagId WHERE tag$tagId = ? AND timebucket = ? AND timestamp > ? AND timestamp <= ? ORDER BY timestamp ASC LIMIT ?

Id 1,event 2

Id 0, event 100

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

Id 0, event 2

1 0 event 0 event 1 event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

event 2,tag 1

Id 1,event 2

Id 0, event 100

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

Id 0, event 2

1 0 event 0 event 1 event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

event 2,tag 1

Id 0, event 100

Id 1,event 2

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

Id 0, event 2

1 0 event 0 event 1 event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

event 2,tag 1

Id 0, event 100

Id 1,event 2

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

1 0 event 0 event 1 event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

event 2,tag 1

Id 0, event 2

PRAM

MR MWRYW

Strong Serializable

Linearizable Serializable

Sequential RR SI

Causal

WFR

EC

CS MAW

RC

P-CI

PRAM

MR MWRYW

Strong Serializable

Linearizable Serializable

Sequential RR SI

Causal

WFR

EC

CS MAW

RC

P-CI

PRAM

MR MWRYW

Strong Serializable

Linearizable Serializable

Sequential RR SI

Causal

WFR

EC

CS MAW

RC

P-CI

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0 event 2,tag 1

1 0 event 0 event 1 event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

tag 1 1/1/2016

tag 1 1/2/2016

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

1 0 event 0 event 1 event 2,tag 1

persistence_id

seq

0 11 . . .

event 2,tag 1

Id 0, event 100

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

1 0 event 0 event 1 event 2,tag 1

persistence_id

seq

0 ?1 . . .

event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

Id 0, event 100

Id 0, event 2

Id 0, event 1

0 0

0 1

event 1,tag 1

event 100,tag 1

event 101 event 102

event 0

1 0 event 0 event 1 event 2,tag 1

persistence_id

seq

0 ?1

event 2,tag 1

tag 1 1/1/2016

tag 1 1/2/2016

. . .

def replay(): Unit = { val backtracking = isBacktracking val limit = if (backtracking) maxBufferSize else maxBufferSize - buf.size val toOffs = if (backtracking && abortDeadline.isEmpty) highestOffset else UUIDs.endOf(System.currentTimeMillis() - eventualConsistencyDelayMillis) context.actorOf(EventsByTagFetcher.props(tag, currTimeBucket, currOffset, toOffs, limit, backtracking, self, session, preparedSelect, seqNumbers, settings)) context.become(replaying(limit))}

def replaying(limit: Int): Receive = { case env @ UUIDPersistentRepr(offs, _) => // Deliver buffer case ReplayDone(count, seqN, highest) => // Request more case ReplayAborted(seqN, pid, expectedSeqNr, gotSeqNr) => // Causality violation, wait and retry. Only applicable if all events for persistence_id are tagged case ReplayFailed(cause) => // Failure case _: Request => // Deliver buffer case Continue => // Do nothing case Cancel => // Stop}

def replay(): Unit = { val backtracking = isBacktracking val limit = if (backtracking) maxBufferSize else maxBufferSize - buf.size val toOffs = if (backtracking && abortDeadline.isEmpty) highestOffset else UUIDs.endOf(System.currentTimeMillis() - eventualConsistencyDelayMillis) context.actorOf(EventsByTagFetcher.props(tag, currTimeBucket, currOffset, toOffs, limit, backtracking, self, session, preparedSelect, seqNumbers, settings)) context.become(replaying(limit))}

def replaying(limit: Int): Receive = { case env @ UUIDPersistentRepr(offs, _) => // Deliver buffer case ReplayDone(count, seqN, highest) => // Request more case ReplayAborted(seqN, pid, expectedSeqNr, gotSeqNr) => // Causality violation, wait and retry. Only applicable if all events for persistence_id are tagged case ReplayFailed(cause) => // Failure case _: Request => // Deliver buffer case Continue => // Do nothing case Cancel => // Stop}

Akka Persistence Cassandra Replaydef asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) (replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future { new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => { replayCallback(msg) }) }

class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator[PersistentRepr] { private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr) private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr) private var mcnt = 0L private var c: PersistentRepr = null private var n: PersistentRepr = PersistentRepr(Undefined) fetch() def hasNext: Boolean = ... def next(): PersistentRepr = … ...}

Akka Persistence Cassandra Replaydef asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) (replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future { new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => { replayCallback(msg) }) }

class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator[PersistentRepr] { private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr) private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr) private var mcnt = 0L private var c: PersistentRepr = null private var n: PersistentRepr = PersistentRepr(Undefined) fetch() def hasNext: Boolean = ... def next(): PersistentRepr = … ...}

Akka Persistence Cassandra Replaydef asyncReplayMessages(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) (replayCallback: (PersistentRepr) => Unit): Future[Unit] = Future { new MessageIterator(persistenceId, fromSequenceNr, toSequenceNr, max).foreach(msg => { replayCallback(msg) }) }

class MessageIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long) extends Iterator[PersistentRepr] { private val initialFromSequenceNr = math.max(highestDeletedSequenceNumber(persistenceId) + 1, fromSequenceNr) private val iter = new RowIterator(persistenceId, initialFromSequenceNr, toSequenceNr) private var mcnt = 0L private var c: PersistentRepr = null private var n: PersistentRepr = PersistentRepr(Undefined) fetch() def hasNext: Boolean = ... def next(): PersistentRepr = … ...}

class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] { var currentPnr = partitionNr(fromSequenceNr) var currentSnr = fromSequenceNr var fromSnr = fromSequenceNr var toSnr = toSequenceNr var iter = newIter()

def newIter() = session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator

final def hasNext: Boolean = { if (iter.hasNext) true else if (!inUse) false } else { currentPnr += 1 fromSnr = currentSnr iter = newIter() hasNext } }

def next(): Row = { val row = iter.next() currentSnr = row.getLong("sequence_nr") row }}

class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] { var currentPnr = partitionNr(fromSequenceNr) var currentSnr = fromSequenceNr var fromSnr = fromSequenceNr var toSnr = toSequenceNr var iter = newIter()

def newIter() = session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator

final def hasNext: Boolean = { if (iter.hasNext) true else if (!inUse) false } else { currentPnr += 1 fromSnr = currentSnr iter = newIter() hasNext } }

def next(): Row = { val row = iter.next() currentSnr = row.getLong("sequence_nr") row }}

class RowIterator(persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long) extends Iterator[Row] { var currentPnr = partitionNr(fromSequenceNr) var currentSnr = fromSequenceNr var fromSnr = fromSequenceNr var toSnr = toSequenceNr var iter = newIter()

def newIter() = session.execute(preparedSelectMessages.bind(persistenceId, currentPnr, fromSnr, toSnr)).iterator

final def hasNext: Boolean = { if (iter.hasNext) true else if (!inUse) false } else { currentPnr += 1 fromSnr = currentSnr iter = newIter() hasNext } }

def next(): Row = { val row = iter.next() currentSnr = row.getLong("sequence_nr") row }}

Non blocking asynchronous replayprivate[this] val queries: CassandraReadJournal = new CassandraReadJournal( extendedActorSystem, context.system.settings.config.getConfig("cassandra-query-journal"))

override def asyncReplayMessages( persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)(replayCallback: (PersistentRepr) => Unit): Future[Unit] = queries .eventsByPersistenceId( persistenceId, fromSequenceNr, toSequenceNr, max, replayMaxResultSize, None, "asyncReplayMessages") .runForeach(replayCallback) .map(_ => ())

private[this] val queries: CassandraReadJournal = new CassandraReadJournal( extendedActorSystem, context.system.settings.config.getConfig("cassandra-query-journal"))

override def asyncReplayMessages( persistenceId: String, fromSequenceNr: Long, toSequenceNr: Long, max: Long)(replayCallback: (PersistentRepr) => Unit): Future[Unit] = queries .eventsByPersistenceId( persistenceId, fromSequenceNr, toSequenceNr, max, replayMaxResultSize, None, "asyncReplayMessages") .runForeach(replayCallback) .map(_ => ())

Benchmarks

500010 00015 00020 00025 00030 00035 00040 000

500010 00015 00020 00025 00030 00035 00040 000

0 0

10 00020 00030 00040 000

0

50 000

Time

(ms

)

Time

(ms

)

Time

(ms

)

Actors

Threads, Actors

Threads 20 40 60 80 100 120 1405000 10000 15000 20000 25000 30000

10 20 30 40 50 60 70

45 00050 000

blockingasynchronous

REPLAY STRONG SCALING

WEAK SCALING

my-dispatcher { type = "Dispatcher" executor = "thread-pool-executor"

thread-pool-executor { fixed-pool-size = $fixedPoolSize } throughput = $throughput}

my-dispatcher { type = "Dispatcher" executor = "fork-join-executor"

fork-join-executor { parallelism-min = $parallelismMin parallelism-max = $parallelismMax parallelism-factor = $parallelismFactor }

throughput = $throughput}

cassandra-journal { plugin-dispatcher = $pluginDispatcher replay-dispatcher = $replayDispatcher max-result-size = $resultSize max-result-size-replay = $resultSizeReplay target-partition-size = $partitionSize}

cassandra-query-journal { plugin-dispatcher = $queryPluginDispatcher max-buffer-size = $bufferSize max-result-size-query = $resultSizeReplay}

node_id

Alternative architecture

0

1

persistence_id 0, event 0

persistence_id 0, event 1

persistence_id 1, event 0

persistence_id 0, event 2

persistence_id 2, event 0

persistence_id 0, event 3

persistence_id 0, event 0

persistence_id 0, event 1

persistence_id 1, event 0

persistence_id 2, event 0

persistence_id 0, event 2

persistence_id 0, event 3

tag 1 0

all Ids

Id 0, event 1

Id 2, event 1

0 1

0 0 event 1event o

tag 1 0

allIds

Id 0, event 1

Id 2, event 1

0 1

0 0 event 0 event 1

val boundStatements = statementGroup(eventsByPersistenceId, eventsByTag, allPersistenceIds)

Future.sequence(boundStatements).flatMap { stmts => val batch = new BatchStatement().setConsistencyLevel(...).setRetryPolicy(...) stmts.foreach(batch.add) session.underlying().flatMap(_.executeAsync(batch))}

tag 1 0

allIds

Id 0, event 1

Id 2, event 1

0 1

0 0 event 0 event 1

val boundStatements = statementGroup(eventsByPersistenceId, eventsByTag, allPersistenceIds)

Future.sequence(boundStatements).flatMap { stmts => val batch = new BatchStatement().setConsistencyLevel(...).setRetryPolicy(...) stmts.foreach(batch.add) session.underlying().flatMap(_.executeAsync(batch))}

val eventsByPersistenceIdStatement = statementGroup(eventsByPersistenceIdStatement)val boundStatements = statementGroup(eventsByTagStatement, allPersistenceIdsStatement)...session.underlying().flatMap { s => val ebpResult = s.executeAsync(eventsByPersistenceIdStatement) val batchResult = s.executeAsync(batch)) ...}

tag 1 0

allIds

Id 0, event 1

Id 2, event 1

0 1

0 0 event 0 event 1

val eventsByPersistenceIdStatement = statementGroup(eventsByPersistenceIdStatement)val boundStatements = statementGroup(eventsByTagStatement, allPersistenceIdsStatement)...session.underlying().flatMap { s => val ebpResult = s.executeAsync(eventsByPersistenceIdStatement) val batchResult = s.executeAsync(batch)) ...}

tag 1 0

allIds

Id 0, event 1

Id 2, event 1

0 1

0 0 event 0 event 1

Event time processing● Ingestion time, processing time, event time

Ordering

10 2

1 12:34:57 1

KEY TIME VALUE

2 12:34:58 2

KEY TIME VALUE

0 12:34:56 0

KEY TIME VALUE

0

1

21 12:34:57 1

KEY TIME VALUE

2 12:34:58 2

KEY TIME VALUE

0 12:34:56 0

KEY TIME VALUE

Distributed causal stream merging

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1Id 2,event 0

Id 0,event 3

node_id

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1Id 2,event 0

Id 0,event 3

Id 0,event 0

node_id

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1Id 2,event 0

Id 0,event 3

Id 0,event 0

node_id

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1Id 2,event 0

Id 0,event 3

Id 0,event 0

node_id

persistence_id

seq

0 0

1 . . .

2 . . .

persistence_id

seq

0 1

1 . . .

2 . . .

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 0

node_id

0

1Id 2,event 0

Id 0,event 0

Id 0,event 1

Id 0,event 3

persistence_id

seq

0 2

1 0

2 0Id 0,event 1

Id 0,event 0

Id 1,event 0

node_id

0

1Id 2,event 0

Id 0,event 0

Id 0,event 1

Id 0,event 2

Id 0,event 3

Id 2,event 0

Id 0,event 2

Id 1,event 0

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1Id 2,event 0

Id 0,event 3

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 0,event 3

node_id

Id 1,event 0

persistence_id

seq

0 3

1 0

2 0

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1 Id 2,event 0

Id 0,event 3

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

node_id

Id 1,event 0 0 0 Id 0,

event 0Id 0,event 1

Replay

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1 Id 2,event 0

Id 0,event 3

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

node_id

Id 1,event 0 0 0 Id 0,

event 0Id 0,event 1

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1 Id 2,event 0

Id 0,event 3

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 1,event 0 0 0 Id 0,

event 0Id 0,event 1

node_id

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

1 Id 2,event 0

Id 0,event 3

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 1,event 0 0 0 Id 0,

event 0Id 0,event 1

node_id

persistence_id

seq

0 2

Id 0,event 2

Id 0,event 1

Id 0,event 0

Id 1,event 00

Id 2,event 0

Id 0,event 3

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 1,event 0 0 0 Id 0,

event 0Id 0,event 1

persistence_id

seq

0 2

stream_id seq

0 1

1 2

1

node_id

Exactly once delivery

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 0,event 3

Id 1,event 0

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 0,event 3

Id 1,event 0

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 0,event 3

Id 1,event 0

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 3

Id 1,event 0

ACK ACK ACK ACK ACK

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 0,event 3

Id 1,event 0

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 3

Id 1,event 0

ACK ACK ACK ACK ACK

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 2

Id 0,event 3

Id 1,event 0

Id 0,event 0

Id 0,event 1

Id 2,event 0

Id 0,event 3

Id 1,event 0

ACK ACK ACK ACK ACK

Exactly once delivery● Durable offset

0 1 2 3 4

0 1 2 3 4

10 2 3 4

10 3 42

node_id

0

1

Id 0, event 0

Id 0, event 1

Id 1, event 0

Id 0, event 2

Id 2, event 0

Id 0, event 3

Id 0, event 0

Id 0, event 1

Id 1, event 0

Id 2, event 0

Id 0, event 2

Id 0, event 3 tag 1 0

allIds

Id 0, event 1

Id 2, event 1

0 1

0 0 event 0 event 1

val conf = new SparkConf().setAppName("...").setMaster("...").set("spark.cassandra.connection.host", "...")

val sc = new SparkContext(conf)

implicit val ordering = new Ordering[(String, Double)] { override def compare(x: (String, Double), y: (String, Double)): Int = implicitly[Ordering[Double]].compare(x._2, y._2)}

sc.eventTable() .cache() .flatMap { case (JournalKey(persistenceId, _, _), BalanceUpdatedEvent(change)) => (persistenceId -> change) :: Nil case _ => Nil } .reduceByKey(_ + _) .top(100) .foreach(println)

sc.stop()

Akka Analytics

val conf = new SparkConf().setAppName("...").setMaster("...").set("spark.cassandra.connection.host", "...")

val sc = new StreamingContext(conf, Seconds(5))

implicit val ordering = new Ordering[(String, Double)] { override def compare(x: (String, Double), y: (String, Double)): Int = implicitly[Ordering[Double]].compare(x._2, y._2)}

sc.eventTable() .cache() .flatMap { case (JournalKey(persistenceId, _, _), BalanceUpdatedEvent(change)) => (persistenceId -> change) :: Nil case _ => Nil } .reduceByKey(_ + _) .top(100) .foreach(println)

sc.stop()

internet

services

devices

social

Kafka Stream processing

apps

Stream consumer

Search

Apps

Services

Databases

Batch

Batch

Serialisation

Distributed systems

User

Mobile

System

Microservice

Microservice

MicroserviceMicroservice Microservice Microservice

Microservice

CQRS/ES Relational NoSQL

Client 1

Client 2

Client 3

Update

Update

UpdateModel devices Model devices Model devices

Input data Input data Input data

Parameter devices

P

ΔP

ΔP

ΔP

Challenges

● All the solved problems○ Exactly once delivery○ Consistency○ Availability○ Fault tolerance○ Cross service invariants and consistency○ Transactions○ Automated deployment and configuration management○ Serialization, versioning, compatibility○ Automated elasticity○ No downtime version upgrades○ Graceful shutdown of nodes○ Distributed system verification, logging, tracing, monitoring, debugging○ Split brains○ ...

Conclusion

● From request, response, synchronous, mutable state● To streams, asynchronous messaging

● Production ready distributed systems

Questions

MANCHESTER LONDON NEW YORK

MANCHESTER LONDON NEW YORK

@zapletal_martin @cakesolutions

347 708 1518

enquiries@cakesolutions.net

We are hiringhttp://www.cakesolutions.net/careers

top related