From 9585870156cbf001b9bfbe601ddbadcd92f2f245 Mon Sep 17 00:00:00 2001 From: Baptiste Date: Mon, 6 Jan 2025 11:21:03 +0000 Subject: [PATCH] debug dind --- .github/workflows/debug-dind.yml | 54 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/.github/workflows/debug-dind.yml b/.github/workflows/debug-dind.yml index dc339ec1..b282d24f 100644 --- a/.github/workflows/debug-dind.yml +++ b/.github/workflows/debug-dind.yml @@ -21,15 +21,40 @@ jobs: apt-get update -y apt-get install -y docker.io + # @pauline, everything below are alternative tests that show the same behavior (bug) + # this is the most concise test that shows the behavior and i think it is the most straightforward to understand + # More or less when doing DIND, i cannot get the container to be reachable from the runner + # test-pytorch-xla-tpu-tgi-integration.yml is my full workflow when i first notice this behavior + # test-pytorch-xla-tpu-tgi-integration.yml starts a TGI webserver and then do requests to it to check that it's working, this is my end goal + # this is a minimal test to see if i can get the container to be reachable from the runner and it does not work + # i'm trying to find out why the container is not reachable from the runner, do you have any idea why? + + - name: Test HTTP request + run: | + # Run the whoami container with environment variables + # @pauline adding --ipc host, --privileged and/or --network host did not help me. I also tried to use different ports 80, 8080, 5001 + docker run ${{ vars.V5_LITEPOD_8_ENV}} -d --name network-test -p 5001:80 traefik/whoami + + # Wait for container to be ready + sleep 5 + + # Show container status and logs + docker ps + docker logs network-test + + # Test with multiple endpoints to verify connectivity + echo "Testing basic endpoint..." + curl --max-time 30 -v localhost:5001 + # OPTION 1: altnerative test 1 that shows the same behavior # the run test step is similar to "Create test server Dockerfile", "Create minimal test server", "Build and run test container" - # but it's not in a single step inside a python script so it can be run locally - # this scripts work on the local machine, not in the github actions environment - # i left commented out for now, as "Create test server Dockerfile", "Create minimal test server", "Build and run test container" maybe be easier to debug in a ci environment + # but it's in a single step inside a python script so it can be run locally + # this scripts work on the local machine but not in the github actions environment and this is not expected, the behavior should be the same # - name: Run test # run: | # python debug-dind-locally/test.py + # OPTION 2: altnerative test 2 that shows the same behavior (until end of the file) # - name: Create test server Dockerfile # run: | # cat << EOF > Dockerfile @@ -83,26 +108,3 @@ jobs: # # Clean up # docker stop test-server - # @pauline, everything above is alternative tests that show the same behavior (bug) - # More or less when doing DIND, i cannot get the container to be reachable from the runner - # test-pytorch-xla-tpu-tgi-integration.yml is my full workflow when i first notice this behavior - # test-pytorch-xla-tpu-tgi-integration.yml starts a TGI webserver and then do requests to it to check that it's working, this is my end goal - # this is a minimal test to see if i can get the container to be reachable from the runner and it does not work - # i'm trying to find out why the container is not reachable from the runner, do you have any idea why? - - - name: Test HTTP request - run: | - # Run the whoami container with environment variables - # @pauline adding --ipc host, --privileged and/or --network host did not help me. I also tried to use different ports 80, 8080, 5001 - docker run ${{ vars.V5_LITEPOD_8_ENV}} -d --name network-test -p 5001:80 traefik/whoami - - # Wait for container to be ready - sleep 5 - - # Show container status and logs - docker ps - docker logs network-test - - # Test with multiple endpoints to verify connectivity - echo "Testing basic endpoint..." - curl --max-time 30 -v localhost:5001 \ No newline at end of file