Time	Test
	297,973 ms	mesosphere.marathon.integration.ResidentTaskIntegrationTest::ResidentTaskIntegrationTest should Scale Down org.scalatest.exceptions.TestFailedException: 10 was not equal to 5
	306,671 ms	mesosphere.marathon.integration.DeleteAppAndBackupIntegrationTest::(not a test) org.scalatest.concurrent.Futures$FutureConcept$$anon$1: A timeout occurred waiting for a future to complete. Queried 19692 times, sleeping 15 milliseconds between each query.
	305,144 ms	mesosphere.marathon.integration.GroupDeployIntegrationTest::(not a test) org.scalatest.concurrent.Futures$FutureConcept$$anon$1: A timeout occurred waiting for a future to complete. Queried 19340 times, sleeping 15 milliseconds between each query.
	685,665 ms	mesosphere.marathon.integration.ResidentTaskIntegrationTest::(not a test) org.scalatest.exceptions.TestFailedDueToTimeoutException: The code passed to eventually never returned normally. Attempted 37 times over 30.20466503 seconds. Last failure message: clean slate in Mesos not satisfied.
	460 ms	mesosphere.marathon.integration.AppDeployIntegrationTest::AppDeploy should Docker info is not automagically created
		View Full Test Results (1 Failed · 3 Broken · 90 Passed)

Diff	ID	Base	Description	Created	Lint	Unit
Base			Base
Diff 1	3781	9ab118a		Jul 14 2017, 3:39 PM	★	★
Diff 2	3784	9ab118a	- Verify that all tasks are killed.	Jul 14 2017, 3:57 PM	★	★
Diff 3	3792	0dd45a6	- Correct refactoring bug.	Jul 17 2017, 2:25 PM	★	★
Diff 4	3797	f6c709c	- Rebase on master.	Jul 17 2017, 4:40 PM	★	★
Diff 5	3888	4324db0	- Rebase	Aug 1 2017, 1:06 PM	★	★

Commit	Tree	Parents	Author	Summary	Date
c1c27d62f9ba	412e978e6097	4324db0f8906	Karsten Jeschkies	Do not kill terminated tasks of reserved instances. (Show More…)	Jul 11 2017, 6:10 PM

Diff 3888

src/main/scala/mesosphere/marathon/MarathonSchedulerActor.scala

Show First 20 Lines • Show All 375 Lines • ▼ Show 20 Line(s)
376	376
377	377	def startRunSpec(runSpec: RunSpec): Future[Done] = {
378	378	logger.info(s"Starting runSpec ${runSpec.id}")
379	379	scale(runSpec)
380	380	}
381	381
382	382	@SuppressWarnings(Array("all")) // async/await
383	383	def stopRunSpec(runSpec: RunSpec): Future[Done] = {
	384	logger.info(s"Stopping runSpec ${runSpec.id}")
	385
384	386	healthCheckManager.removeAllFor(runSpec.id)
385	387
386		logger.info(s"Stopping runSpec ${runSpec.id}")
387	388	async {
388	389	val tasks = await(instanceTracker.specInstances(runSpec.id))
389	390
390	391	tasks.foreach { instance =>
391	392	if (instance.isLaunched) {
392	393	logger.info("Killing {}", instance.instanceId)
393	394	killService.killInstance(instance, KillReason.DeletingApp)
394	395	}
▲ Show 20 Lines • Show All 161 Lines • Show Last 20 Lines

View Options

src/main/scala/mesosphere/marathon/api/TaskKiller.scala

1	1	package mesosphere.marathon
2	2	package api
3	3
4	4	import javax.inject.Inject
5	5
6	6	import akka.Done
	7	import com.typesafe.scalalogging.StrictLogging
7	8	import mesosphere.marathon.core.async.ExecutionContexts.global
8	9	import mesosphere.marathon.core.deployment.DeploymentPlan
9	10	import mesosphere.marathon.core.group.GroupManager
10	11	import mesosphere.marathon.core.instance.Instance
11	12	import mesosphere.marathon.core.instance.update.InstanceUpdateOperation
12	13	import mesosphere.marathon.core.task.termination.{ KillReason, KillService }
13	14	import mesosphere.marathon.core.task.tracker.{ InstanceTracker, TaskStateOpProcessor }
14	15	import mesosphere.marathon.plugin.auth.{ Authenticator, Authorizer, Identity, UpdateRunSpec }
15	16	import mesosphere.marathon.state._
16		import org.slf4j.LoggerFactory
17	17
18	18	import scala.async.Async.{ async, await }
19	19	import scala.concurrent.Future
20	20	import scala.util.control.NonFatal
21	21
22	22	class TaskKiller @Inject() (
23	23	instanceTracker: InstanceTracker,
24	24	stateOpProcessor: TaskStateOpProcessor,
25	25	groupManager: GroupManager,
26	26	service: MarathonSchedulerService,
27	27	val config: MarathonConf,
28	28	val authenticator: Authenticator,
29	29	val authorizer: Authorizer,
30		killService: KillService) extends AuthResource {
	30	killService: KillService) extends AuthResource with StrictLogging {
31	31
32		private[this] val log = LoggerFactory.getLogger(getClass)
33
34	32	@SuppressWarnings(Array("all")) // async/await
35	33	def kill(
36	34	runSpecId: PathId,
37	35	findToKill: (Seq[Instance] => Seq[Instance]),
38	36	wipe: Boolean = false)(implicit identity: Identity): Future[Seq[Instance]] = {
39	37
40	38	groupManager.runSpec(runSpecId) match {
41	39	case Some(runSpec) =>
Show All 19 Lines
61	59	}
62	60	}
63	61
64	62	private[this] def expunge(instances: Seq[Instance]): Future[Done] = {
65	63	// Note: We process all instances sequentially.
66	64
67	65	instances.foldLeft(Future.successful(Done)) { (resultSoFar, nextInstance) =>
68	66	resultSoFar.flatMap { _ =>
69		log.info("Expunging {}", nextInstance.instanceId)
	67	logger.info(s"Expunging ${nextInstance.instanceId}")
70	68	stateOpProcessor.process(InstanceUpdateOperation.ForceExpunge(nextInstance.instanceId)).map(_ => Done).recover {
71	69	case NonFatal(cause) =>
72		log.info("Failed to expunge {}, got: {}", Array[Object](nextInstance.instanceId, cause): _*)
	70	logger.info(s"Failed to expunge ${nextInstance.instanceId}, got:", cause)
73	71	Done
74	72	}
75	73	}
76	74	}
77	75	}
78	76
79	77	@SuppressWarnings(Array("all")) // async/await
80	78	def killAndScale(
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

View Options

src/main/scala/mesosphere/marathon/core/deployment/impl/DeploymentActor.scala

Show First 20 Lines • Show All 145 Lines • ▼ Show 20 Line(s)
146	146	}
147	147	promise.future
148	148	}
149	149
150	150	@SuppressWarnings(Array("all")) /* async/await */
151	151	def scaleRunnable(runnableSpec: RunSpec, scaleTo: Int,
152	152	toKill: Option[Seq[Instance]],
153	153	status: DeploymentStatus): Future[Done] = {
154		logger.debug("Scale runnable {}", runnableSpec)
	154	logger.debug(s"Scale runnable $runnableSpec")
155	155
156	156	def killToMeetConstraints(notSentencedAndRunning: Seq[Instance], toKillCount: Int) = {
157	157	Constraints.selectInstancesToKill(runnableSpec, notSentencedAndRunning, toKillCount)
158	158	}
159	159
160	160	async {
161	161	val instances = await(instanceTracker.specInstances(runnableSpec.id))
162	162	val runningInstances = instances.filter(_.state.condition.isActive)
Show All 19 Lines
182	182	}
183	183	}
184	184	await(startTasksIfNeeded)
185	185	}
186	186	}
187	187
188	188	@SuppressWarnings(Array("all")) /* async/await */
189	189	def stopRunnable(runnableSpec: RunSpec): Future[Done] = async {
	190	logger.debug(s"Stop runnable $runnableSpec")
190	191	val instances = await(instanceTracker.specInstances(runnableSpec.id))
191	192	val launchedInstances = instances.filter(_.isLaunched)
192	193	// TODO: the launch queue is purged in stopRunnable, but it would make sense to do that before calling kill(tasks)
193	194	await(killService.killInstances(launchedInstances, KillReason.DeletingApp))
	195
	196	logger.debug(s"Killed all remaining tasks: ${launchedInstances.map(_.instanceId)}")
194	197
195	198	// Note: This is an asynchronous call. We do NOT wait for the run spec to stop. If we do, the DeploymentActorTest
196	199	// fails.
197	200	scheduler.stopRunSpec(runnableSpec)
198	201
199	202	Done
200	203	}
201	204
▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

View Options

src/main/scala/mesosphere/marathon/core/instance/update/InstanceUpdater.scala

Show First 20 Lines • Show All 111 Lines • ▼ Show 20 Line(s)
112	112
113	113	private[marathon] def reservationTimeout(instance: Instance, now: Timestamp): InstanceUpdateEffect = {
114	114	if (instance.isReserved) {
115	115	// TODO(cleanup): Using Killed for now; we have no specific state yet bit this must be considered Terminal
116	116	val updatedInstance = instance.copy(
117	117	state = instance.state.copy(condition = Condition.Killed)
118	118	)
119	119	val events = eventsGenerator.events(updatedInstance, task = None, now, previousCondition = Some(instance.state.condition))
	120
	121	logger.debug(s"Expunge reserved ${instance.instanceId}")
	122
120	123	InstanceUpdateEffect.Expunge(instance, events)
121	124	} else {
122	125	InstanceUpdateEffect.Failure("ReservationTimeout can only be applied to a reserved instance")
123	126	}
124	127	}
125	128
126	129	private[marathon] def forceExpunge(instance: Instance, now: Timestamp): InstanceUpdateEffect = {
127	130	val updatedInstance = instance.copy(
128	131	// TODO(cleanup): Using Killed for now; we have no specific state yet bit this must be considered Terminal
129	132	state = instance.state.copy(condition = Condition.Killed)
130	133	)
131	134	val events = InstanceChangedEventsGenerator.events(
132	135	updatedInstance, task = None, now, previousCondition = Some(instance.state.condition))
	136
	137	logger.debug(s"Force expunge ${instance.instanceId}")
	138
133	139	InstanceUpdateEffect.Expunge(updatedInstance, events)
134	140	}
135	141
136	142	private[marathon] def revert(instance: Instance): InstanceUpdateEffect = {
137	143	InstanceUpdateEffect.Update(instance, oldState = None, events = Nil)
138	144	}
139	145	}

View Options

src/main/scala/mesosphere/marathon/core/matcher/reconcile/impl/OfferMatcherReconciler.scala

1	1	package mesosphere.marathon
2	2	package core.matcher.reconcile.impl
3	3
	4	import com.typesafe.scalalogging.StrictLogging
4	5	import mesosphere.marathon.core.instance.Instance
5	6	import mesosphere.marathon.core.instance.update.InstanceUpdateOperation
6	7	import mesosphere.marathon.core.launcher.InstanceOp
7	8	import mesosphere.marathon.core.launcher.impl.TaskLabels
8	9	import mesosphere.marathon.core.matcher.base.OfferMatcher
9	10	import mesosphere.marathon.core.matcher.base.OfferMatcher.{ InstanceOpSource, InstanceOpWithSource, MatchedInstanceOps }
10	11	import mesosphere.marathon.core.task.Task
11	12	import mesosphere.marathon.core.task.tracker.InstanceTracker
12	13	import mesosphere.marathon.core.task.tracker.InstanceTracker.InstancesBySpec
13	14	import mesosphere.marathon.state.RootGroup
14	15	import mesosphere.marathon.storage.repository.GroupRepository
15	16	import mesosphere.marathon.stream.Implicits._
16	17	import mesosphere.util.state.FrameworkId
17	18	import org.apache.mesos.Protos.{ Offer, OfferID, Resource }
18		import org.slf4j.LoggerFactory
19	19
20	20	import scala.concurrent.Future
21	21
22	22	/**
23	23	* Matches task labels found in offer against known tasks/apps and
24	24	*
25	25	* * destroys unknown volumes
26	26	* * unreserves unknown reservations
27	27	*
28	28	* In the future, we probably want to switch to a less agressive approach
29	29	*
30	30	* * by creating tasks in state "unknown" of unknown tasks which are then transitioned to state "garbage" after
31	31	* a delay
32	32	* * and creating unreserved/destroy operations for tasks in state "garbage" only
33	33	*/
34	34	private[reconcile] class OfferMatcherReconciler(instanceTracker: InstanceTracker, groupRepository: GroupRepository)
35		extends OfferMatcher {
	35	extends OfferMatcher with StrictLogging {
36	36
37		private val log = LoggerFactory.getLogger(getClass)
38
39	37	import mesosphere.marathon.core.async.ExecutionContexts.global
40	38
41	39	override def matchOffer(offer: Offer): Future[MatchedInstanceOps] = {
42	40
43	41	val frameworkId = FrameworkId("").mergeFromProto(offer.getFrameworkId)
44	42
45	43	val resourcesByTaskId: Map[Task.Id, Seq[Resource]] = {
46	44	// TODO(PODS): don't use resident resources yet. Once they're needed it's not clear whether the labels
Show All 13 Lines
60	58	private[this] def processResourcesByTaskId(
61	59	offer: Offer, resourcesByTaskId: Map[Task.Id, Seq[Resource]]): Future[MatchedInstanceOps] =
62	60	{
63	61	// do not query instanceTracker in the common case
64	62	if (resourcesByTaskId.isEmpty) Future.successful(MatchedInstanceOps.noMatch(offer.getId))
65	63	else {
66	64	def createInstanceOps(instancesBySpec: InstancesBySpec, rootGroup: RootGroup): MatchedInstanceOps = {
67	65
68		// TODO(jdef) pods don't suport resident resources yet so we don't need to worry about including them here
	66	// TODO(jdef) pods don't support resident resources yet so we don't need to worry about including them here
69	67	/* Was this task launched from a previous app definition, or a prior launch that did not clean up properly */
70	68	def spurious(instanceId: Instance.Id): Boolean =
71	69	instancesBySpec.instance(instanceId).isEmpty \|\| rootGroup.app(instanceId.runSpecId).isEmpty
72	70
73	71	val instanceOps: Seq[InstanceOpWithSource] = resourcesByTaskId.collect {
74	72	case (taskId, spuriousResources) if spurious(taskId.instanceId) =>
75	73	val unreserveAndDestroy =
76	74	InstanceOp.UnreserveAndDestroyVolumes(
77	75	stateOp = InstanceUpdateOperation.ForceExpunge(taskId.instanceId),
78	76	oldInstance = instancesBySpec.instance(taskId.instanceId),
79	77	resources = spuriousResources
80	78	)
81		log.warn(
82		"removing spurious resources and volumes of {} because the instance does no longer exist",
83		taskId.instanceId)
	79	logger.warn(s"removing spurious resources and volumes of ${taskId.instanceId} because the instance does no longer exist")
84	80	InstanceOpWithSource(source(offer.getId), unreserveAndDestroy)
85	81	}(collection.breakOut)
86	82
87	83	MatchedInstanceOps(offer.getId, instanceOps, resendThisOffer = true)
88	84	}
89	85
90	86	// query in parallel
91	87	val instancesBySpedFuture = instanceTracker.instancesBySpec()
92	88	val rootGroupFuture = groupRepository.root()
93	89
94	90	for { instancesBySpec <- instancesBySpedFuture; rootGroup <- rootGroupFuture }
95	91	yield createInstanceOps(instancesBySpec, rootGroup)
96	92	}
97	93	}
98	94
99	95	private[this] def source(offerId: OfferID) = new InstanceOpSource {
100	96	override def instanceOpAccepted(instanceOp: InstanceOp): Unit =
101		log.info(s"accepted unreserveAndDestroy for ${instanceOp.instanceId} in offer [${offerId.getValue}]")
	97	logger.info(s"accepted unreserveAndDestroy for ${instanceOp.instanceId} in offer [${offerId.getValue}]")
102	98	override def instanceOpRejected(instanceOp: InstanceOp, reason: String): Unit =
103		log.info("rejected unreserveAndDestroy for {} in offer [{}]: {}", instanceOp.instanceId, offerId.getValue, reason)
	99	logger.info(s"rejected unreserveAndDestroy for ${instanceOp.instanceId} in offer [${offerId.getValue}]: $reason")
104	100	}
105	101	}

View Options

src/main/scala/mesosphere/marathon/core/task/jobs/impl/OverdueTasksActor.scala

1	1	package mesosphere.marathon
2	2	package core.task.jobs.impl
3	3
4	4	import akka.actor._
	5	import com.typesafe.scalalogging.StrictLogging
5	6	import mesosphere.marathon.core.base.Clock
6	7	import mesosphere.marathon.core.condition.Condition
7	8	import mesosphere.marathon.core.instance.Instance
8	9	import mesosphere.marathon.core.instance.update.InstanceUpdateOperation
9	10	import mesosphere.marathon.core.task.Task
10	11	import mesosphere.marathon.core.task.termination.{ KillReason, KillService }
11	12	import mesosphere.marathon.core.task.tracker.{ InstanceTracker, TaskStateOpProcessor }
12	13	import mesosphere.marathon.state.Timestamp
Show All 16 Lines
29	30	/**
30	31	* Contains the core logic for the KillOverdueTasksActor.
31	32	*/
32	33	private class Support(
33	34	config: MarathonConf,
34	35	taskTracker: InstanceTracker,
35	36	taskStateOpProcessor: TaskStateOpProcessor,
36	37	killService: KillService,
37		clock: Clock) {
	38	clock: Clock) extends StrictLogging {
38	39	import mesosphere.marathon.core.async.ExecutionContexts.global
39	40
40		private[this] val log = LoggerFactory.getLogger(getClass)
41
42	41	def check(): Future[Unit] = {
43	42	val now = clock.now()
44		log.debug("checking for overdue tasks")
	43	logger.debug("checking for overdue tasks")
45	44	taskTracker.instancesBySpec().flatMap { tasksByApp =>
46	45	val instances = tasksByApp.allInstances
47	46
48	47	killOverdueInstances(now, instances)
49	48
50	49	timeoutOverdueReservations(now, instances)
51	50	}
52	51	}
53	52
54	53	private[this] def killOverdueInstances(now: Timestamp, instances: Seq[Instance]): Unit = {
55	54	overdueTasks(now, instances).foreach { overdueTask =>
56		log.info("Killing overdue {}", overdueTask.instanceId)
	55	logger.info(s"Killing overdue ${overdueTask.instanceId}")
57	56	killService.killInstance(overdueTask, KillReason.Overdue)
58	57	}
59	58	}
60	59
61	60	private[this] def overdueTasks(now: Timestamp, instances: Seq[Instance]): Seq[Instance] = {
62	61	// stagedAt is set when the task is created by the scheduler
63	62	val stagedExpire = now - config.taskLaunchTimeout().millis
64	63	val unconfirmedExpire = now - config.taskLaunchConfirmTimeout().millis
65	64
66	65	def launchedAndExpired(task: Task): Boolean = {
67	66	task.status.condition match {
68	67	case Condition.Created \| Condition.Starting if task.status.stagedAt < unconfirmedExpire =>
69		log.warn(s"Should kill: ${task.taskId} was launched " +
	68	logger.warn(s"Should kill: ${task.taskId} was launched " +
70	69	s"${task.status.stagedAt.until(now).toSeconds}s ago and was not confirmed yet")
71	70	true
72	71
73	72	case Condition.Staging if task.status.stagedAt < stagedExpire =>
74		log.warn(s"Should kill: ${task.taskId} was staged ${task.status.stagedAt.until(now).toSeconds}s" +
	73	logger.warn(s"Should kill: ${task.taskId} was staged ${task.status.stagedAt.until(now).toSeconds}s" +
75	74	" ago and has not yet started")
76	75	true
77	76
78	77	case _ =>
79	78	// running
80	79	false
81	80	}
82	81	}
83	82
84	83	// TODO(PODS): adjust this to consider instance.status and `since`
85	84	instances.filter(instance => instance.tasksMap.valuesIterator.exists(launchedAndExpired))
86	85	}
87	86
88	87	private[this] def timeoutOverdueReservations(now: Timestamp, instances: Seq[Instance]): Future[Unit] = {
89	88	val taskTimeoutResults = overdueReservations(now, instances).map { instance =>
90		log.warn("Scheduling ReservationTimeout for {}", instance.instanceId)
	89	logger.warn("Scheduling ReservationTimeout for {}", instance.instanceId)
91	90	taskStateOpProcessor.process(InstanceUpdateOperation.ReservationTimeout(instance.instanceId))
92	91	}
93	92	Future.sequence(taskTimeoutResults).map(_ => ())
94	93	}
95	94
96	95	private[this] def overdueReservations(now: Timestamp, instances: Seq[Instance]): Seq[Instance] = {
97	96	// TODO PODs is an Instance overdue if a single task is overdue? / move reservation to instance level
98	97	instances.filter { instance =>
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

View Options

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillAction.scala

Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Line(s)
64	64		val hasReservations = knownInstance.fold(false)(_.hasReservation)
65	65
66	66		// TODO(PODS): align this with other Terminal/Unreachable/whatever extractors
67	67		val maybeCondition = knownInstance.map(_.state.condition)
68	68		val isUnkillable = maybeCondition.fold(false)(wontRespondToKill)
69	69
70	70		// Ephemeral instances are expunged once all tasks are terminal, it's unlikely for this to be true for them.
71	71		// Resident tasks, however, could be in this state if scaled down, or, if kill is attempted between recovery.
72			val allTerminal: Boolean = taskIds.isEmpty
	72		val allTerminal: Boolean = knownInstance.fold(false) { instance =>
	73		instance.tasksMap.values.forall { task =>
	74		task.status.condition.isTerminal \|\| task.status.mesosStatus.exists(taskStatus => Task.Terminated.isTerminated(taskStatus.getState))
	75		}
	76		}
		meichstedtUnsubmitted Done IIRC, `knownInstance` potentially carries more information, but the `KillAction` ins generally based on the passed taskIds. If Marathon receives a status update for an unknown non-terminal task, it will kill that task. So we might end up with a `ToKill` that contains a taskId but no knownInstance. Since you default to `false`, `allTerminal` will in that case never result to `true`. Furthermore, the KillServiceActor listens to `UnknownInstanceTerminated` events. The idea here is that even if the task associated with a taskId is already terminal, issuing a kill request will end up as such event in the actor. The `ToKill` structure is initialized with only non-terminal taskIds, so the following check doesn't add any more functionality AFAICT. Should the `\|\| task.status.mesosStatus.exists(taskStatus => Task.Terminated.isTerminated(taskStatus.getState)` check be moved to `KillServiceActor.killInstances`?
		jeschkiesAuthorUnsubmitted Done It actually seems to be the other case. The instance if known to the instance tracker but should actually be not known. Somehow resident instances do not get removed properly. This change just fixes that Marathon does not retry killing lost or failed tasks. However, I still encounter cases when the instance is not removed from the instance tracker. See the logs in the ticket for details.
73	77
74	78		if (isUnkillable \|\| allTerminal) {
75	79		val msg = if (isUnkillable)
76	80		s"it is ${maybeCondition.fold("unknown")(_.toString)}"
77	81		else
78	82		"none of its tasks are running"
79	83		if (hasReservations) {
80	84		logger.info(
81	85		s"Ignoring kill request for ${instanceId}; killing it while ${msg} is unsupported")
82	86		KillAction.Noop
83	87		} else {
84	88		logger.warn(s"Expunging ${instanceId} from state because ${msg}")
85	89		// we will eventually be notified of a taskStatusUpdate after the instance has been expunged
86	90		KillAction.ExpungeFromState
87	91		}
88	92		} else {
89	93		val knownOrNot = if (knownInstance.isDefined) "known" else "unknown"
90			logger.warn("Killing {} {} of instance {}", knownOrNot, taskIds.mkString(","), instanceId)
	94		logger.warn(s"Killing $knownOrNot ${taskIds.mkString(",")} of $instanceId with ${maybeCondition.fold("unknown")(_.toString)} condition")
	95		knownInstance.foreach { instance =>
	96		logger.debug(s"Task statuses: ${instance.tasksMap.values.map(_.status)}")
	97		logger.debug(s"Task conditions: ${instance.tasksMap.values.map(_.status.condition)}")
	98		}
	99
91	100		KillAction.IssueKillRequest
92	101		}
93	102		}
94	103
95	104		}

View Options

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActor.scala

Show All 9 Lines
10	10	import mesosphere.marathon.core.instance.Instance
11	11	import mesosphere.marathon.core.instance.update.InstanceUpdateOperation
12	12	import mesosphere.marathon.core.task.Task
13	13	import mesosphere.marathon.core.task.Task.Id
14	14	import mesosphere.marathon.core.task.termination.InstanceChangedPredicates.considerTerminal
15	15	import mesosphere.marathon.core.task.termination.KillConfig
16	16	import mesosphere.marathon.core.task.tracker.TaskStateOpProcessor
17	17	import mesosphere.marathon.state.Timestamp
18		import mesosphere.marathon.stream.Sink
19	18
20	19	import scala.collection.mutable
21	20	import scala.concurrent.{ Future, Promise }
22	21
23	22	/**
24	23	* An actor that handles killing instances in chunks and depending on the instance state.
25	24	* Lost instances will simply be expunged from state, while active instances will be killed
26	25	* via the scheduler driver. There is be a maximum number of kills in flight, and
27	26	* the service will only issue more kills when instances are reported terminal.
28	27	*
29	28	* If a kill is not acknowledged with a terminal status update within a configurable
30	29	* time window, the kill is retried a configurable number of times. If the maximum
31	30	* number of retries is exceeded, the instance will be expunged from state similar to a
32	31	* lost instance.
33	32	*
34		* For each kill request, a [[KillStreamWatcher]] will be created, which
35		* is supposed to watch the progress and complete a given promise when all watched
36		* instances are reportedly terminal.
37		*
38	33	* For pods started via the default executor, it is sufficient to kill 1 task of the group,
39	34	* which will cause all tasks to be killed
40	35	*
41	36	* See [[KillConfig]] for configuration options.
42	37	*/
43	38	private[impl] class KillServiceActor(
44	39	driverHolder: MarathonSchedulerDriverHolder,
45	40	stateOpProcessor: TaskStateOpProcessor,
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Line(s)
88	83	handleTerminal(id)
89	84
90	85	case Retry =>
91	86	retry()
92	87	}
93	88
94	89	def killUnknownTaskById(taskId: Task.Id): Unit = {
95	90	logger.debug(s"Received KillUnknownTaskById($taskId)")
96		instancesToKill.update(taskId.instanceId, ToKill(taskId.instanceId, Seq(taskId), maybeInstance = None, attempts = 0))
	91	val promise = Promise[Done]
	92	instancesToKill.update(taskId.instanceId, ToKill(taskId.instanceId, Seq(taskId), maybeInstance = None, attempts = 0, promise = promise))
97	93	processKills()
98	94	}
99	95
100	96	def killInstances(instances: Seq[Instance], promise: Promise[Done]): Unit = {
	97	if (instances.isEmpty) promise.trySuccess(Done)
	98
101	99	val instanceIds = instances.map(_.instanceId)
102		logger.debug(s"Adding instances $instanceIds to queue; setting up child actor to track progress")
103		promise.completeWith(watchForKilledInstances(instanceIds))
	100	logger.debug(s"Adding instances $instanceIds to queue")
	101
	102	val instanceKilledFutures = Seq.newBuilder[Future[Done]]
	103
104	104	instances.foreach { instance =>
	105
	106	// This promise is completed once this instance has been killed.
	107	val killPromise = Promise[Done]()
	108	instanceKilledFutures += killPromise.future
	109
105	110	// TODO(PODS): do we make sure somewhere that an instance has _at_least_ one task?
106	111	val taskIds: IndexedSeq[Id] = instance.tasksMap.values.withFilter(!_.isTerminal).map(_.taskId)(collection.breakOut)
107	112	instancesToKill.update(
108	113	instance.instanceId,
109		ToKill(instance.instanceId, taskIds, maybeInstance = Some(instance), attempts = 0)
	114	ToKill(instance.instanceId, taskIds, maybeInstance = Some(instance), attempts = 0, killPromise)
110	115	)
111	116	}
	117
	118	// Complete promise once all instances have been killed.
	119	val allInstancesKilled: Future[Done] = Future.sequence(instanceKilledFutures.result()).map(_ => Done)
	120	promise.completeWith(allInstancesKilled)
	121
112	122	processKills()
113	123	}
114	124
115		/**
116		* Begins watching immediately for terminated instances. Future is completed when all instances are seen.
117		*/
118		def watchForKilledInstances(instanceIds: Seq[Instance.Id]): Future[Done] = {
119		// Note - we toss the materialized cancellable. We are okay to do this here because KillServiceActor will continue to retry
120		// killing the instanceIds in question, forever, until this Future completes.
121		KillStreamWatcher.
122		watchForKilledInstances(context.system.eventStream, instanceIds).
123		runWith(Sink.head)
124		}
125
126	125	def processKills(): Unit = {
127	126	val killCount = config.killChunkSize - inFlight.size
128	127	val toKillNow = instancesToKill.take(killCount)
129	128
130	129	logger.info(s"processing ${toKillNow.size} kills for ${toKillNow.keys}")
131	130	toKillNow.foreach {
132	131	case (instanceId, data) => processKill(data)
133	132	}
134	133
135	134	if (inFlight.isEmpty) {
136	135	retryTimer.cancel()
137	136	} else {
138	137	retryTimer.setup()
139	138	}
140	139	}
141	140
142	141	def processKill(toKill: ToKill): Unit = {
143		val instanceId = toKill.instanceId
144		val taskIds = toKill.taskIdsToKill
145	142
146	143	KillAction(toKill.instanceId, toKill.taskIdsToKill, toKill.maybeInstance) match {
147	144	case KillAction.Noop =>
148	145	()
149	146
150	147	case KillAction.IssueKillRequest =>
151	148	driverHolder.driver.foreach { driver =>
152		taskIds.map(_.mesosTaskId).foreach(driver.killTask)
	149	toKill.taskIdsToKill.map(_.mesosTaskId).foreach(driver.killTask)
153	150	}
154		val attempts = inFlight.get(toKill.instanceId).fold(1)(_.attempts + 1)
	151	val attempts = toKill.attempts + 1
155	152	inFlight.update(
156		toKill.instanceId, ToKill(instanceId, taskIds, toKill.maybeInstance, attempts, issued = clock.now()))
	153	toKill.instanceId, toKill.copy(attempts = attempts, issued = clock.now())
	154	)
157	155
158	156	case KillAction.ExpungeFromState =>
159	157	stateOpProcessor.process(InstanceUpdateOperation.ForceExpunge(toKill.instanceId))
	158	// TODO: When should the promise be fulfilled?
	159	toKill.promise.trySuccess(Done)
160	160	}
161	161
162		instancesToKill.remove(instanceId)
	162	instancesToKill.remove(toKill.instanceId)
163	163	}
164	164
165	165	def handleTerminal(instanceId: Instance.Id): Unit = {
166	166	instancesToKill.remove(instanceId)
167		inFlight.remove(instanceId)
	167	inFlight.remove(instanceId).map(_.promise.trySuccess(Done))
168	168	logger.debug(s"$instanceId is terminal. (${instancesToKill.size} kills queued, ${inFlight.size} in flight)")
169	169	processKills()
170	170	}
171	171
172	172	def retry(): Unit = {
173	173	val now = clock.now()
174	174
175	175	inFlight.foreach {
Show All 24 Lines
200	200
201	201	/**
202	202	* Metadata used to track which instances to kill and how many attempts have been made
203	203	*
204	204	* @param instanceId id of the instance to kill
205	205	* @param taskIdsToKill ids of the tasks to kill
206	206	* @param maybeInstance the instance, if available
207	207	* @param attempts the number of kill attempts
	208	* @param promise Promise that is fulfilled once task has been killed
208	209	* @param issued the time of the last issued kill request
209	210	*/
210	211	case class ToKill(
211	212	instanceId: Instance.Id,
212	213	taskIdsToKill: Seq[Task.Id],
213	214	maybeInstance: Option[Instance],
214	215	attempts: Int,
	216	promise: Promise[Done],
215	217	issued: Timestamp = Timestamp.zero)
216	218	}
217	219
218	220	/**
219	221	* Wraps a timer into an interface that hides internal mutable state behind simple setup and cancel methods
220	222	*/
221	223	private[this] trait RetryTimer {
222	224	private[this] var retryTimer: Option[Cancellable] = None
Show All 22 Lines

View Options

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceDelegate.scala

1	1		package mesosphere.marathon
2	2		package core.task.termination.impl
3	3
4	4		import akka.Done
5	5		import akka.actor.ActorRef
	6		import com.typesafe.scalalogging.StrictLogging
6	7		import mesosphere.marathon.core.instance.Instance
7	8		import mesosphere.marathon.core.task.Task
8	9		import mesosphere.marathon.core.task.termination.{ KillReason, KillService }
9			import org.slf4j.LoggerFactory
10	10
11	11		import scala.concurrent.{ Future, Promise }
12	12		import scala.collection.immutable.Seq
13	13
14			private[termination] class KillServiceDelegate(actorRef: ActorRef) extends KillService {
15			import KillServiceDelegate.log
	14		private[termination] class KillServiceDelegate(actorRef: ActorRef) extends KillService with StrictLogging {
16	15		import KillServiceActor._
17	16
18	17		override def killInstances(instances: Seq[Instance], reason: KillReason): Future[Done] = {
19			log.info(
	18		logger.info(
20	19		s"Killing ${instances.size} tasks for reason: $reason (ids: {} ...)",
21	20		instances.take(3).map(_.instanceId).mkString(","))
		meichstedtUnsubmitted Done Have you changed the text of the log message on purpose?
		jeschkiesAuthorUnsubmitted Done No. That's a refactoring error.
22	21
23	22		val promise = Promise[Done]
24	23		actorRef ! KillInstances(instances, promise)
25	24
26	25		promise.future
27	26		}
28	27
29	28		override def killInstance(instance: Instance, reason: KillReason): Future[Done] = {
30	29		killInstances(Seq(instance), reason)
31	30		}
32	31
33	32		override def killUnknownTask(taskId: Task.Id, reason: KillReason): Unit = {
34			log.info(s"Killing unknown task for reason: $reason (id: {})", taskId)
	33		logger.info(s"Killing unknown task for reason: $reason (id: {})", taskId)
35	34		actorRef ! KillUnknownTaskById(taskId)
36	35		}
37			}
38
39			object KillServiceDelegate {
40			private[impl] val log = LoggerFactory.getLogger(getClass)
41	36		}

View Options

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcher.scala

This file was deleted.

This file was completely deleted. Show File Contents

View Options

src/main/scala/mesosphere/marathon/core/task/tracker/InstanceTracker.scala

1	1	package mesosphere.marathon
2	2	package core.task.tracker
3	3
	4	import com.typesafe.scalalogging.StrictLogging
4	5	import mesosphere.marathon.core.instance.Instance
5	6	import mesosphere.marathon.core.task.Task
6	7	import mesosphere.marathon.state.PathId
7		import org.slf4j.LoggerFactory
8	8
9	9	import scala.concurrent.{ ExecutionContext, Future }
10	10
11	11	/**
12	12	* The TaskTracker exposes the latest known state for every task.
13	13	*
14	14	* It is an read-only interface. For modification, see
15	15	* * [[TaskStateOpProcessor]] for create, update, delete operations
Show All 18 Lines
34	34	def hasSpecInstancesSync(appId: PathId): Boolean
35	35	def hasSpecInstances(appId: PathId)(implicit ec: ExecutionContext): Future[Boolean]
36	36	}
37	37
38	38	object InstanceTracker {
39	39	/**
40	40	* Contains all tasks grouped by app ID.
41	41	*/
42		case class InstancesBySpec private (instancesMap: Map[PathId, InstanceTracker.SpecInstances]) {
43		import InstancesBySpec._
	42	case class InstancesBySpec private (instancesMap: Map[PathId, InstanceTracker.SpecInstances]) extends StrictLogging {
44	43
45	44	def allSpecIdsWithInstances: Set[PathId] = instancesMap.keySet
46	45
47	46	def hasSpecInstances(appId: PathId): Boolean = instancesMap.contains(appId)
48	47
49	48	def specInstances(pathId: PathId): Seq[Instance] = {
50	49	instancesMap.get(pathId).map(_.instances).getOrElse(Seq.empty)
51	50	}
Show All 10 Lines
62	61	}
63	62
64	63	def allInstances: Seq[Instance] = instancesMap.values.flatMap(_.instances)(collection.breakOut)
65	64
66	65	private[tracker] def updateApp(appId: PathId)(
67	66	update: InstanceTracker.SpecInstances => InstanceTracker.SpecInstances): InstancesBySpec = {
68	67	val updated = update(instancesMap(appId))
69	68	if (updated.isEmpty) {
70		log.info(s"Removed app [$appId] from tracker")
	69	logger.info(s"Removed app [$appId] from tracker")
71	70	copy(instancesMap = instancesMap - appId)
72	71	} else {
73		log.debug(s"Updated app [$appId], currently ${updated.instanceMap.size} tasks in total.")
	72	logger.debug(s"Updated app [$appId], currently ${updated.instanceMap.size} tasks in total.")
74	73	copy(instancesMap = instancesMap + (appId -> updated))
75	74	}
76	75	}
77	76	}
78	77
79	78	object InstancesBySpec {
80		private val log = LoggerFactory.getLogger(getClass)
81	79
82	80	def of(specInstances: collection.immutable.Map[PathId, InstanceTracker.SpecInstances]): InstancesBySpec = {
83	81	new InstancesBySpec(specInstances.withDefault(appId => InstanceTracker.SpecInstances(appId)))
84	82	}
85	83
86	84	def of(apps: InstanceTracker.SpecInstances): InstancesBySpec = of(Map(apps.map(app => app.specId -> app): _))
87	85
88	86	def forInstances(tasks: Instance*): InstancesBySpec = of(
Show All 29 Lines

View Options

src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceOpProcessorImpl.scala

Show All 19 Lines
20	20	private[tracker] class InstanceOpProcessorImpl(
21	21	instanceTrackerRef: ActorRef,
22	22	repository: InstanceRepository,
23	23	stateOpResolver: InstanceUpdateOpResolver,
24	24	config: InstanceTrackerConfig) extends InstanceOpProcessor with StrictLogging {
25	25	import InstanceOpProcessor._
26	26
27	27	override def process(op: Operation)(implicit ec: ExecutionContext): Future[Unit] = {
	28	logger.debug(s"Process $op")
28	29	val stateChange = stateOpResolver.resolve(op.op)
29	30
30	31	stateChange.flatMap {
31	32	case change: InstanceUpdateEffect.Expunge =>
32	33	// Used for task termination or as a result from a UpdateStatus action.
33	34	// The expunge is propagated to the instanceTracker which informs the sender about the success (see Ack).
34	35	repository.delete(change.instance.instanceId).map { _ =>
35	36	logger.debug(s"Expunged $change")
▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines

View Options

src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceTrackerActor.scala

1	1	package mesosphere.marathon
2	2	package core.task.tracker.impl
3	3
4	4	import akka.Done
5	5	import akka.actor.SupervisorStrategy.Escalate
6	6	import akka.actor._
7	7	import akka.event.LoggingReceive
8	8	import com.typesafe.scalalogging.StrictLogging
9	9	import mesosphere.marathon.core.appinfo.TaskCounts
10	10	import mesosphere.marathon.core.instance.Instance
11	11	import mesosphere.marathon.core.instance.update.{ InstanceChange, InstanceDeleted, InstanceUpdateEffect, InstanceUpdateOperation, InstanceUpdated }
12	12	import mesosphere.marathon.core.task.tracker.impl.InstanceTrackerActor.ForwardTaskOp
13	13	import mesosphere.marathon.core.task.tracker.{ InstanceTracker, InstanceTrackerUpdateStepProcessor }
14	14	import mesosphere.marathon.metrics.AtomicGauge
15	15	import mesosphere.marathon.state.{ PathId, Timestamp }
16		import org.slf4j.LoggerFactory
17	16
18	17	import scala.concurrent.Future
19	18	import scala.util.control.NonFatal
20	19
21	20	object InstanceTrackerActor {
22	21	def props(
23	22	metrics: ActorMetrics,
24	23	taskLoader: InstancesLoader,
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Line(s)
67	66	* after they have been persisted.
68	67	*
69	68	* It also spawns the [[InstanceUpdateActor]] as a child and forwards update operations to it.
70	69	*/
71	70	private[impl] class InstanceTrackerActor(
72	71	metrics: InstanceTrackerActor.ActorMetrics,
73	72	taskLoader: InstancesLoader,
74	73	updateStepProcessor: InstanceTrackerUpdateStepProcessor,
75		taskUpdaterProps: ActorRef => Props) extends Actor with Stash {
	74	taskUpdaterProps: ActorRef => Props) extends Actor with Stash with StrictLogging {
76	75
77		private[this] val log = LoggerFactory.getLogger(getClass)
78	76	private[this] val updaterRef = context.actorOf(taskUpdaterProps(self), "updater")
79	77
80	78	override val supervisorStrategy = OneForOneStrategy() { case _: Exception => Escalate }
81	79
82	80	override def preStart(): Unit = {
83	81	super.preStart()
84	82
85		log.info(s"${getClass.getSimpleName} is starting. Task loading initiated.")
	83	logger.info(s"${getClass.getSimpleName} is starting. Task loading initiated.")
86	84	metrics.resetMetrics()
87	85
88	86	import akka.pattern.pipe
89	87	import context.dispatcher
90	88	taskLoader.load().pipeTo(self)
91	89	}
92	90
93	91	override def postStop(): Unit = {
94	92	metrics.resetMetrics()
95	93
96	94	super.postStop()
97	95	}
98	96
99	97	override def receive: Receive = initializing
100	98
101	99	private[this] def initializing: Receive = LoggingReceive.withLabel("initializing") {
102	100	case appTasks: InstanceTracker.InstancesBySpec =>
103		log.info("Task loading complete.")
	101	logger.info("Task loading complete.")
104	102
105	103	unstashAll()
106	104	context.become(withTasks(
107	105	appTasks,
108	106	TaskCounts(appTasks.allInstances, healthStatuses = Map.empty)))
109	107
110	108	case Status.Failure(cause) =>
111	109	// escalate this failure
Show All 39 Lines
151	149
152	150	case msg @ InstanceTrackerActor.StateChanged(ack) =>
153	151	val maybeChange: Option[InstanceChange] = ack.effect match {
154	152	case InstanceUpdateEffect.Update(instance, oldState, events) =>
155	153	becomeWithUpdatedApp(instance.runSpecId)(instance.instanceId, newInstance = Some(instance))
156	154	Some(InstanceUpdated(instance, lastState = oldState.map(_.state), events))
157	155
158	156	case InstanceUpdateEffect.Expunge(instance, events) =>
	157	logger.debug(s"Received expunge for ${instance.instanceId}")
159	158	becomeWithUpdatedApp(instance.runSpecId)(instance.instanceId, newInstance = None)
160	159	Some(InstanceDeleted(instance, lastState = None, events))
161	160
162	161	case InstanceUpdateEffect.Noop(_) \|
163	162	InstanceUpdateEffect.Failure(_) =>
164	163	None
165	164	}
166	165
167	166	val originalSender = sender()
168	167
169	168	import context.dispatcher
170	169	maybeChange.map { change =>
171	170	updateStepProcessor.process(change).recover {
172	171	case NonFatal(cause) =>
173	172	// since we currently only use ContinueOnErrorSteps, we can simply ignore failures here
174		log.warn("updateStepProcessor.process failed", cause)
	173	logger.warn("updateStepProcessor.process failed", cause)
175	174	Done
176	175	}
177	176	}.getOrElse(Future.successful(Done)).foreach { _ =>
178	177	ack.sendAck()
179	178	originalSender ! (())
180	179	}
181	180	}
182	181	}
183	182	}

View Options

src/test/resources/logback-test.xml

1	1		<?xml version="1.0" encoding="UTF-8"?>
2	2
3	3		<configuration>
4	4		<appender name="stdout" class="ch.qos.logback.core.ConsoleAppender">
5	5		<encoder>
6	6		<pattern>%highlight(%-5level)[%date{HH:mm:ss} %logger{0}] %message%n</pattern>
7	7		</encoder>
8	8		</appender>
9	9
10	10		<!-- Change to DEBUG for debugging test failures -->
11			<logger name="mesosphere.marathon" level="INFO"/>
	11		<logger name="mesosphere.marathon" level="DEBUG"/>
		meichstedtUnsubmitted Done hey, it actually does what the comment says :)
12	12		<logger name="mesosphere.marathon.integration.process" level="DEBUG"/>
13	13		<logger name="akka" level="INFO"/>
14	14		<logger name="native-zk-connector" level="WARN" />
15	15		<logger name="org.eclipse" level="INFO"/>
16	16		<logger name="org.apache.zookeeper" level="WARN" />
17	17
18	18		<logger name="spray" level="ERROR"/>
19	19		<root level="INFO">
20	20		<appender-ref ref="stdout"/>
21	21		</root>
22	22		</configuration>

View Options

src/test/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActorTest.scala

Show First 20 Lines • Show All 79 Lines • ▼ Show 20 Line(s)
80	80		verify(f.stateOpProcessor, timeout(f.killConfig.killRetryTimeout.toMillis.toInt * 2)).process(InstanceUpdateOperation.ForceExpunge(instance.instanceId))
81	81
82	82		f.publishInstanceChanged(TaskStatusUpdateTestHelper.killed(instance).wrapped)
83	83
84	84		promise.future.futureValue should be(Done)
85	85		}
86	86		}
87	87
	88		"asked to kill single known instance that results in TASK_LOST" should {
		meichstedtUnsubmitted Done Does this actually test against a TASK_LOST?
		jeschkiesAuthorUnsubmitted Done Yes and no. We get a TASK_LOST as a reply to killing a failed task. This test just simulates the reply by Mesos.
	89		"complete the kill" in withActor(defaultConfig) { (f, actor) =>
	90
	91		val instance = f.mockInstance(f.runSpecId, f.now(), mesos.Protos.TaskState.TASK_RUNNING)
	92		val promise = Promise[Done]()
	93		actor ! KillServiceActor.KillInstances(Seq(instance), promise)
	94
	95		val (taskId, _) = instance.tasksMap.head
	96		verify(f.driver, timeout(f.killConfig.killRetryTimeout.toMillis.toInt * 2)).killTask(taskId.mesosTaskId)
	97
	98		f.publishInstanceChanged(TaskStatusUpdateTestHelper.lost(mesos.Protos.TaskStatus.Reason.REASON_TASK_UNKNOWN, instance).wrapped)
	99
	100		promise.future.futureValue should be(Done)
	101		}
	102		}
	103
88	104		"asked to kill multiple instances at once" should {
89	105		"issue three kill requests to the driver" in withActor(defaultConfig) { (f, actor) =>
90	106		val runningInstance = f.mockInstance(f.runSpecId, f.clock.now(), mesos.Protos.TaskState.TASK_RUNNING)
91	107		val unreachableInstance = f.mockInstance(f.runSpecId, f.clock.now(), mesos.Protos.TaskState.TASK_UNREACHABLE)
92	108		val stagingInstance = f.mockInstance(f.runSpecId, f.clock.now(), mesos.Protos.TaskState.TASK_STAGING)
93	109
94	110		val promise = Promise[Done]()
95	111		actor ! KillServiceActor.KillInstances(Seq(runningInstance, unreachableInstance, stagingInstance), promise)
96	112
97	113		val (runningTaskId, _) = runningInstance.tasksMap.head
98	114		verify(f.driver, timeout(f.killConfig.killRetryTimeout.toMillis.toInt * 2)).killTask(runningTaskId.mesosTaskId)
99	115		verify(f.stateOpProcessor, timeout(f.killConfig.killRetryTimeout.toMillis.toInt * 2)).process(InstanceUpdateOperation.ForceExpunge(unreachableInstance.instanceId))
100	116
101	117		val (stagingTaskId, _) = stagingInstance.tasksMap.head
102	118		verify(f.driver, timeout(f.killConfig.killRetryTimeout.toMillis.toInt * 2)).killTask(stagingTaskId.mesosTaskId)
103	119		noMoreInteractions(f.driver)
104	120
105	121		f.publishInstanceChanged(TaskStatusUpdateTestHelper.killed(runningInstance).wrapped)
106	122		f.publishInstanceChanged(TaskStatusUpdateTestHelper.gone(unreachableInstance).wrapped)
	123
	124		promise.future.isReadyWithin(1 second) should be (false) withClue "Kill service completed promise before all tasks have been killed."
	125
107	126		f.publishInstanceChanged(TaskStatusUpdateTestHelper.unreachable(stagingInstance).wrapped)
108	127
109	128		promise.future.futureValue should be (Done)
110	129		}
111	130		}
112	131
113	132		"asked to kill multiple tasks at once with an empty list" should {
114	133		"issue no kill" in withActor(defaultConfig) { (f, actor) =>
▲ Show 20 Lines • Show All 247 Lines • Show Last 20 Lines

View Options

src/test/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcherTest.scala

This file was deleted.

This file was completely deleted. Show File Contents

View Options

src/test/scala/mesosphere/marathon/integration/setup/MarathonTest.scala

Show All 23 Lines
24	24	import mesosphere.marathon.integration.facades._
25	25	import mesosphere.marathon.raml.{ App, AppDockerVolume, Network, NetworkMode, AppHealthCheck, PodState, PodStatus, ReadMode }
26	26	import mesosphere.marathon.state.PathId
27	27	import mesosphere.marathon.util.{ Lock, Retry }
28	28	import mesosphere.util.PortAllocator
29	29	import org.apache.commons.io.FileUtils
30	30	import org.scalatest.concurrent.{ Eventually, ScalaFutures }
31	31	import org.scalatest.exceptions.TestFailedDueToTimeoutException
32		import org.scalatest.time.{ Milliseconds, Span }
	32	import org.scalatest.time.{ Milliseconds, Second, Seconds, Span }
33	33	import org.scalatest.{ BeforeAndAfterAll, Suite }
34	34	import play.api.libs.json.{ JsObject, Json }
35	35
36	36	import scala.annotation.tailrec
37	37	import scala.async.Async.{ async, await }
38	38	import scala.collection.mutable
39	39	import scala.concurrent.duration._
40	40	import scala.concurrent.{ ExecutionContext, Future }
▲ Show 20 Lines • Show All 407 Lines • ▼ Show 20 Line(s)
448	448	logger.info(">>> Starting to CLEAN UP...")
449	449	events.clear()
450	450
451	451	// Wait for a clean slate in Marathon, if there is a running deployment or a runSpec exists
452	452	logger.info("Clean Marathon State")
453	453	//do not fail here, since the require statements will ensure a correct setup and fail otherwise
454	454	Try(waitForDeployment(eventually(marathon.deleteGroup(testBasePath, force = true))))
455	455
456		WaitTestSupport.waitUntil("clean slate in Mesos", patienceConfig.timeout.toMillis.millis) {
	456	WaitTestSupport.waitUntil("clean slate in Mesos") {
457	457	val occupiedAgents = mesos.state.value.agents.filter { agent => agent.usedResources.nonEmpty \|\| agent.reservedResourcesByRole.nonEmpty }
458	458	occupiedAgents.foreach { agent =>
459	459	import mesosphere.marathon.integration.facades.MesosFormats._
460	460	val usedResources: String = Json.prettyPrint(Json.toJson(agent.usedResources))
461	461	val reservedResources: String = Json.prettyPrint(Json.toJson(agent.reservedResourcesByRole))
462	462	logger.info(s"""Waiting for blank slate Mesos...\n "used_resources": "$usedResources"\n"reserved_resources": "$reservedResources"""")
463	463	}
464	464	occupiedAgents.isEmpty
465		}
	465	}(WaitTestSupport.PatienceConfig(timeout = Span(30, Seconds), interval = Span(1, Second)))
466	466
467	467	val apps = marathon.listAppsInBaseGroup
468	468	require(apps.value.isEmpty, s"apps weren't empty: ${apps.entityPrettyJsonString}")
469	469	val pods = marathon.listPodsInBaseGroup
470	470	require(pods.value.isEmpty, s"pods weren't empty: ${pods.entityPrettyJsonString}")
471	471	val groups = marathon.listGroupsInBaseGroup
472	472	require(groups.value.isEmpty, s"groups weren't empty: ${groups.entityPrettyJsonString}")
473	473	events.clear()
▲ Show 20 Lines • Show All 381 Lines • Show Last 20 Lines

timcharper
zen-dog
meichstedt
jenkins

Buildable 3196
Build 6095: Marathon (revised)	Jenkins
Build 6094: arc lint + arc unit

		Path
M		src/main/scala/mesosphere/marathon/MarathonSchedulerActor.scala (3 lines)
M		src/main/scala/mesosphere/marathon/api/TaskKiller.scala (10 lines)
M		src/main/scala/mesosphere/marathon/core/deployment/impl/DeploymentActor.scala (5 lines)
M		src/main/scala/mesosphere/marathon/core/instance/update/InstanceUpdater.scala (6 lines)
M		src/main/scala/mesosphere/marathon/core/matcher/reconcile/impl/OfferMatcherReconciler.scala (16 lines)
M		src/main/scala/mesosphere/marathon/core/task/jobs/impl/OverdueTasksActor.scala (15 lines)
M		src/main/scala/mesosphere/marathon/core/task/termination/impl/KillAction.scala (13 lines)
M		src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActor.scala (56 lines)
M		src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceDelegate.scala (13 lines)
D	M	src/main/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcher.scala (96 lines)
M		src/main/scala/mesosphere/marathon/core/task/tracker/InstanceTracker.scala (10 lines)
M		src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceOpProcessorImpl.scala (1 line)
M		src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceTrackerActor.scala (11 lines)
M		src/test/resources/logback-test.xml (2 lines)
M		src/test/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActorTest.scala (19 lines)
D	M	src/test/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcherTest.scala (40 lines)
M		src/test/scala/mesosphere/marathon/integration/setup/MarathonTest.scala (6 lines)

Do not kill terminated tasks of reserved instances.
AbandonedAll Users
Actions

Details

Diff Detail

Unit TestsFailed
View All

(๑′°︿°๑)

Revision Contents

Diff 3888

src/main/scala/mesosphere/marathon/MarathonSchedulerActor.scala

src/main/scala/mesosphere/marathon/api/TaskKiller.scala

src/main/scala/mesosphere/marathon/core/deployment/impl/DeploymentActor.scala

src/main/scala/mesosphere/marathon/core/instance/update/InstanceUpdater.scala

src/main/scala/mesosphere/marathon/core/matcher/reconcile/impl/OfferMatcherReconciler.scala

src/main/scala/mesosphere/marathon/core/task/jobs/impl/OverdueTasksActor.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillAction.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActor.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceDelegate.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcher.scala

src/main/scala/mesosphere/marathon/core/task/tracker/InstanceTracker.scala

src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceOpProcessorImpl.scala

src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceTrackerActor.scala

src/test/resources/logback-test.xml

src/test/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActorTest.scala

src/test/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcherTest.scala

src/test/scala/mesosphere/marathon/integration/setup/MarathonTest.scala

Do not kill terminated tasks of reserved instances.AbandonedAll UsersActions

Details

Diff Detail

Unit TestsFailedView All

(๑′°︿°๑)

Revision Contents

Diff 3888

src/main/scala/mesosphere/marathon/MarathonSchedulerActor.scala

src/main/scala/mesosphere/marathon/api/TaskKiller.scala

src/main/scala/mesosphere/marathon/core/deployment/impl/DeploymentActor.scala

src/main/scala/mesosphere/marathon/core/instance/update/InstanceUpdater.scala

src/main/scala/mesosphere/marathon/core/matcher/reconcile/impl/OfferMatcherReconciler.scala

src/main/scala/mesosphere/marathon/core/task/jobs/impl/OverdueTasksActor.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillAction.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActor.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillServiceDelegate.scala

src/main/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcher.scala

src/main/scala/mesosphere/marathon/core/task/tracker/InstanceTracker.scala

src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceOpProcessorImpl.scala

src/main/scala/mesosphere/marathon/core/task/tracker/impl/InstanceTrackerActor.scala

src/test/resources/logback-test.xml

src/test/scala/mesosphere/marathon/core/task/termination/impl/KillServiceActorTest.scala

src/test/scala/mesosphere/marathon/core/task/termination/impl/KillStreamWatcherTest.scala

src/test/scala/mesosphere/marathon/integration/setup/MarathonTest.scala

Do not kill terminated tasks of reserved instances.
AbandonedAll Users
Actions

Unit TestsFailed
View All