elastic · juliaElastic · Oct 8, 2024 · Oct 9, 2024 · Oct 22, 2024 · Oct 28, 2024
@@ -337,6 +337,9 @@ func (ct *CheckinT) ProcessRequest(zlog zerolog.Logger, w http.ResponseWriter, r
 	actions, ackToken = convertActions(zlog, agent.Id, pendingActions)
 
 	span, ctx := apm.StartSpan(r.Context(), "longPoll", "process")
+	ctx, cancel := context.WithTimeout(ctx, pollDuration)
 // Chill out for a bit. Long poll. 
 longPoll := time.NewTicker(pollDuration) 
 defer longPoll.Stop() 
 case <-tick.C: 
 	err := ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, nil, rawComponents, nil, ver, unhealthyReason, false) 
 	if err != nil { 
 		zlog.Error().Err(err).Str(logger.AgentID, agent.Id).Msg("checkin failed") 
 	} 
 } 
 case <-ctx.Done(): 
 	defer span.End() 
 	// If the request context is canceled, the API server is shutting down. 
 	// We want to immediately stop the long-poll and return a 200 with the ackToken and no actions. 
 	if errors.Is(ctx.Err(), context.Canceled) { 
 		resp := CheckinResponse{ 
 			AckToken: &ackToken, 
 			Action:   "checkin", 
 		} 
 		return ct.writeResponse(zlog, w, r, agent, resp) 
 	} 
 	return ctx.Err() 
 actionResp, err := processPolicy(ctx, zlog, ct.bulker, agent.Id, policy) 
 func (b *Bulker) dispatch(ctx context.Context, blk *bulkT) respT { 
 	start := time.Now() 
 	// Dispatch to bulk Run loop 
 	select { 
 	case b.ch <- blk: 
 	case <-ctx.Done(): 
 if err := b.flushQueue(ctx, w, *q); err != nil { 
 // Bulker is started in its own context and managed in the scope of this function. This is done so 
 // when the `ctx` is cancelled, the bulker will remain executing until this function exits. 
 // This allows the child subsystems to continue to write to the data store while tearing down. 
 bulkCtx, bulkCancel := context.WithCancel(context.Background()) 
 defer bulkCancel() 
 bulkCtx, bulkCancel := context.WithCancel(context.Background()) 
 es, err := b.createRemoteEsClient(bulkCtx, outputName, outputMap) 
 if err != nil { 
 	defer bulkCancel() 
 	return nil, hasConfigChanged, err 
 } 
 // starting a new bulker to create/update API keys for remote ES output 
 newBulker := NewBulker(es, b.tracer) 
 newBulker.cancelFn = bulkCancel 
 b.updateBulkerMap(outputName, newBulker) 
 errCh := make(chan error) 
 go func() { 
 	runFunc := func() (err error) { 
 		zlog.Debug().Str(logger.PolicyOutputName, outputName).Msg("Bulker started") 
 		return newBulker.Run(bulkCtx) 
 func (c *Elasticsearch) InitDefaults() { 
 	c.Protocol = schemeHTTP 
 	c.Hosts = []string{"localhost:9200"} 
 	c.Timeout = 90 * time.Second 
 Int("cluster.maxConnsPersHost", mcph). 
 // Chill out for a bit. Long poll. 
 longPoll := time.NewTicker(pollDuration) 
 defer longPoll.Stop() 
  
 case <-tick.C: 
 	err := ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, nil, rawComponents, nil, ver, unhealthyReason, false) 
 	if err != nil { 
 		zlog.Error().Err(err).Str(logger.AgentID, agent.Id).Msg("checkin failed") 
 	} 
 } 
 case <-ctx.Done(): 
 	defer span.End() 
 	// If the request context is canceled, the API server is shutting down. 
 	// We want to immediately stop the long-poll and return a 200 with the ackToken and no actions. 
 	if errors.Is(ctx.Err(), context.Canceled) { 
 		resp := CheckinResponse{ 
 			AckToken: &ackToken, 
 			Action:   "checkin", 
 		} 
 		return ct.writeResponse(zlog, w, r, agent, resp) 
 	} 
 	return ctx.Err() 
 actionResp, err := processPolicy(ctx, zlog, ct.bulker, agent.Id, policy) 
 func (b *Bulker) dispatch(ctx context.Context, blk *bulkT) respT { 
 	start := time.Now() 
  
 	// Dispatch to bulk Run loop 
 	select { 
 	case b.ch <- blk: 
 	case <-ctx.Done(): 
 if err := b.flushQueue(ctx, w, *q); err != nil { 
 // Bulker is started in its own context and managed in the scope of this function. This is done so 
 // when the `ctx` is cancelled, the bulker will remain executing until this function exits. 
 // This allows the child subsystems to continue to write to the data store while tearing down. 
 bulkCtx, bulkCancel := context.WithCancel(context.Background()) 
 defer bulkCancel() 
  
 bulkCtx, bulkCancel := context.WithCancel(context.Background()) 
 es, err := b.createRemoteEsClient(bulkCtx, outputName, outputMap) 
 if err != nil { 
 	defer bulkCancel() 
 	return nil, hasConfigChanged, err 
 } 
 // starting a new bulker to create/update API keys for remote ES output 
 newBulker := NewBulker(es, b.tracer) 
 newBulker.cancelFn = bulkCancel 
  
 b.updateBulkerMap(outputName, newBulker) 
  
 errCh := make(chan error) 
 go func() { 
 	runFunc := func() (err error) { 
 		zlog.Debug().Str(logger.PolicyOutputName, outputName).Msg("Bulker started") 
 		return newBulker.Run(bulkCtx) 
 func (c *Elasticsearch) InitDefaults() { 
 	c.Protocol = schemeHTTP 
 	c.Hosts = []string{"localhost:9200"} 
 	c.Timeout = 90 * time.Second 
 Int("cluster.maxConnsPersHost", mcph). 
+	defer cancel()
+
 	if len(actions) == 0 {
 	LOOP:
 		for {
@@ -368,7 +371,7 @@ func (ct *CheckinT) ProcessRequest(zlog zerolog.Logger, w http.ResponseWriter, r
 				actions = append(actions, *actionResp)
 				break LOOP
 			case <-longPoll.C:
-				zlog.Trace().Msg("fire long poll")
+				zlog.Debug().Str(logger.AgentID, agent.Id).Msg("fire long poll")
 				break LOOP
 			case <-tick.C:
 				err := ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, nil, rawComponents, nil, ver, unhealthyReason, false)