Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NOISSUE - Fix handling of runreq chunks #234

Merged
merged 3 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/checkproto.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:

- name: Set up protoc
run: |
PROTOC_VERSION=27.2
PROTOC_VERSION=27.3
PROTOC_GEN_VERSION=v1.34.2
PROTOC_GRPC_VERSION=v1.4.0

Expand Down
2 changes: 1 addition & 1 deletion agent/agent.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion agent/agent_grpc.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 76 additions & 20 deletions manager/api/grpc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
package grpc

import (
"bytes"
"context"
"log/slog"
"sync"
"time"

"github.com/absmach/magistrala/pkg/errors"
Expand All @@ -22,19 +22,21 @@ var (
)

type ManagerClient struct {
stream pkgmanager.ManagerService_ProcessClient
svc manager.Service
messageQueue chan *pkgmanager.ClientStreamMessage
logger *slog.Logger
stream pkgmanager.ManagerService_ProcessClient
svc manager.Service
messageQueue chan *pkgmanager.ClientStreamMessage
logger *slog.Logger
runReqManager *runRequestManager
}

// NewClient returns new gRPC client instance.
func NewClient(stream pkgmanager.ManagerService_ProcessClient, svc manager.Service, messageQueue chan *pkgmanager.ClientStreamMessage, logger *slog.Logger) ManagerClient {
return ManagerClient{
stream: stream,
svc: svc,
messageQueue: messageQueue,
logger: logger,
stream: stream,
svc: svc,
messageQueue: messageQueue,
logger: logger,
runReqManager: newRunRequestManager(),
}
}

Expand All @@ -53,7 +55,6 @@ func (client ManagerClient) Process(ctx context.Context, cancel context.CancelFu
}

func (client ManagerClient) handleIncomingMessages(ctx context.Context) error {
var runReqBuffer bytes.Buffer
for {
select {
case <-ctx.Done():
Expand All @@ -63,39 +64,42 @@ func (client ManagerClient) handleIncomingMessages(ctx context.Context) error {
if err != nil {
return err
}
if err := client.processIncomingMessage(ctx, req, &runReqBuffer); err != nil {
if err := client.processIncomingMessage(ctx, req); err != nil {
return err
}
}
}
}

func (client ManagerClient) processIncomingMessage(ctx context.Context, req *pkgmanager.ServerStreamMessage, runReqBuffer *bytes.Buffer) error {
func (client ManagerClient) processIncomingMessage(ctx context.Context, req *pkgmanager.ServerStreamMessage) error {
switch mes := req.Message.(type) {
case *pkgmanager.ServerStreamMessage_RunReqChunks:
return client.handleRunReqChunks(ctx, mes, runReqBuffer)
return client.handleRunReqChunks(ctx, mes)
case *pkgmanager.ServerStreamMessage_TerminateReq:
return client.handleTerminateReq(mes)
case *pkgmanager.ServerStreamMessage_StopComputation:
go client.handleStopComputation(ctx, mes)
case *pkgmanager.ServerStreamMessage_BackendInfoReq:
go client.handleBackendInfoReq(ctx, mes)
go client.handleBackendInfoReq(mes)
default:
return errors.New("unknown message type")
}
return nil
}

func (client ManagerClient) handleRunReqChunks(ctx context.Context, mes *pkgmanager.ServerStreamMessage_RunReqChunks, runReqBuffer *bytes.Buffer) error {
if len(mes.RunReqChunks.Data) == 0 {
func (client *ManagerClient) handleRunReqChunks(ctx context.Context, mes *pkgmanager.ServerStreamMessage_RunReqChunks) error {
buffer, complete := client.runReqManager.addChunk(mes.RunReqChunks.Id, mes.RunReqChunks.Data, mes.RunReqChunks.IsLast)

if complete {
var runReq pkgmanager.ComputationRunReq
if err := proto.Unmarshal(runReqBuffer.Bytes(), &runReq); err != nil {
if err := proto.Unmarshal(buffer, &runReq); err != nil {
return errors.Wrap(err, errCorruptedManifest)
}

go client.executeRun(ctx, &runReq)
}
_, err := runReqBuffer.Write(mes.RunReqChunks.Data)
return err

return nil
}

func (client ManagerClient) executeRun(ctx context.Context, runReq *pkgmanager.ComputationRunReq) {
Expand Down Expand Up @@ -129,7 +133,7 @@ func (client ManagerClient) handleStopComputation(ctx context.Context, mes *pkgm
client.sendMessage(&pkgmanager.ClientStreamMessage{Message: msg})
}

func (client ManagerClient) handleBackendInfoReq(ctx context.Context, mes *pkgmanager.ServerStreamMessage_BackendInfoReq) {
func (client ManagerClient) handleBackendInfoReq(mes *pkgmanager.ServerStreamMessage_BackendInfoReq) {
res, err := client.svc.FetchBackendInfo()
if err != nil {
client.logger.Warn(err.Error())
Expand Down Expand Up @@ -167,3 +171,55 @@ func (client ManagerClient) sendMessage(mes *pkgmanager.ClientStreamMessage) {
client.logger.Warn("Failed to send message: timeout exceeded")
}
}

type runRequestManager struct {
requests map[string]*runRequest
mu sync.Mutex
}

type runRequest struct {
buffer []byte
lastChunk time.Time
timer *time.Timer
}

func newRunRequestManager() *runRequestManager {
return &runRequestManager{
requests: make(map[string]*runRequest),
}
}

func (m *runRequestManager) addChunk(id string, chunk []byte, isLast bool) ([]byte, bool) {
m.mu.Lock()
defer m.mu.Unlock()

req, exists := m.requests[id]
if !exists {
req = &runRequest{
buffer: make([]byte, 0),
lastChunk: time.Now(),
timer: time.AfterFunc(runReqTimeout, func() { m.timeoutRequest(id) }),
}
m.requests[id] = req
}

req.buffer = append(req.buffer, chunk...)
req.lastChunk = time.Now()
req.timer.Reset(runReqTimeout)

if isLast {
delete(m.requests, id)
req.timer.Stop()
return req.buffer, true
}

return nil, false
}

func (m *runRequestManager) timeoutRequest(id string) {
m.mu.Lock()
defer m.mu.Unlock()

delete(m.requests, id)
// Log timeout or handle it as needed
}
18 changes: 14 additions & 4 deletions manager/api/grpc/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"errors"
"io"
"time"

"github.com/ultravioletrs/cocos/pkg/manager"
"golang.org/x/sync/errgroup"
Expand All @@ -20,7 +21,10 @@ var (
ErrUnexpectedMsg = errors.New("unknown message type")
)

const bufferSize = 1024 * 1024 // 1 MB
const (
bufferSize = 1024 * 1024 // 1 MB
runReqTimeout = 30 * time.Second
)

type SendFunc func(*manager.ServerStreamMessage) error

Expand Down Expand Up @@ -89,14 +93,20 @@ func (s *grpcServer) sendRunReqInChunks(stream manager.ManagerService_ProcessSer

for {
n, err := dataBuffer.Read(buf)
if err != nil && err != io.EOF {
isLast := false

if err == io.EOF {
isLast = true
} else if err != nil {
return err
}

chunk := &manager.ServerStreamMessage{
Message: &manager.ServerStreamMessage_RunReqChunks{
RunReqChunks: &manager.RunReqChunks{
Data: buf[:n],
Id: runReq.Id,
Data: buf[:n],
IsLast: isLast,
},
},
}
Expand All @@ -105,7 +115,7 @@ func (s *grpcServer) sendRunReqInChunks(stream manager.ManagerService_ProcessSer
return err
}

if err == io.EOF {
if isLast {
break
}
}
Expand Down
2 changes: 2 additions & 0 deletions manager/manager.proto
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ message ServerStreamMessage {

message RunReqChunks {
bytes data = 1;
string id = 2;
bool is_last = 3;
}

message ComputationRunReq {
Expand Down
13 changes: 13 additions & 0 deletions manager/qemu/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os/exec"

"github.com/gofrs/uuid"
"github.com/ultravioletrs/cocos/internal"
"github.com/ultravioletrs/cocos/manager/vm"
"github.com/ultravioletrs/cocos/pkg/manager"
)
Expand All @@ -16,6 +17,7 @@ const (
firmwareVars = "OVMF_VARS"
KernelFile = "bzImage"
rootfsFile = "rootfs.cpio"
tmpDir = "/tmp"
)

type qemuVM struct {
Expand Down Expand Up @@ -43,6 +45,17 @@ func (v *qemuVM) Start() error {
v.config.NetDevConfig.ID = fmt.Sprintf("%s-%s", v.config.NetDevConfig.ID, id)
v.config.SevConfig.ID = fmt.Sprintf("%s-%s", v.config.SevConfig.ID, id)

if !v.config.KernelHash {
// Copy firmware vars file.
srcFile := v.config.OVMFVarsConfig.File
dstFile := fmt.Sprintf("%s/%s-%s.fd", tmpDir, firmwareVars, id)
err = internal.CopyFile(srcFile, dstFile)
if err != nil {
return err
}
v.config.OVMFVarsConfig.File = dstFile
}

exe, args, err := v.executableAndArgs()
if err != nil {
return err
Expand Down
Loading