diff --git a/deployment/backstage/values.yaml b/deployment/backstage/values.yaml index a24a826..527f786 100644 --- a/deployment/backstage/values.yaml +++ b/deployment/backstage/values.yaml @@ -1,7 +1,6 @@ ingress: enabled: true host: "backstage.ingress.k8s.cn" - backstage: image: repository: liushaohui/backstage @@ -25,7 +24,6 @@ backstage: links: - url: https://discord.gg/backstage-687207715902193673 title: '#backstage' - backend: baseUrl: https://backstage.ingress.k8s.cn listen: @@ -46,87 +44,71 @@ backstage: - host: example.com - host: '*.mozilla.org' # workingDirectory: /tmp # Use this to configure a working directory for the scaffolder, defaults to the OS temp-dir - # See README.md in the proxy-backend plugin for information on the configuration format proxy: '/circleci/api': target: https://circleci.com/api/v1.1 headers: Circle-Token: ${CIRCLECI_AUTH_TOKEN} - '/jenkins/api': target: http://localhost:8080 headers: Authorization: ${JENKINS_BASIC_AUTH_HEADER} - '/travisci/api': target: https://api.travis-ci.com changeOrigin: true headers: Authorization: ${TRAVISCI_AUTH_TOKEN} travis-api-version: '3' - '/newrelic/apm/api': target: https://api.newrelic.com/v2 headers: X-Api-Key: ${NEW_RELIC_REST_API_KEY} - '/newrelic/api': target: https://api.newrelic.com headers: X-Api-Key: ${NEW_RELIC_USER_KEY} - '/pagerduty': target: https://api.pagerduty.com headers: Authorization: Token token=${PAGERDUTY_TOKEN} - '/buildkite/api': target: https://api.buildkite.com/v2/ headers: Authorization: ${BUILDKITE_TOKEN} - '/sentry/api': target: https://sentry.io/api/ allowedMethods: ['GET'] headers: Authorization: ${SENTRY_TOKEN} - '/ilert': target: https://api.ilert.com allowedMethods: ['GET', 'POST', 'PUT'] allowedHeaders: ['Authorization'] headers: Authorization: ${ILERT_AUTH_HEADER} - '/airflow': target: https://your.airflow.instance.com/api/v1 headers: Authorization: ${AIRFLOW_BASIC_AUTH_HEADER} - '/gocd': target: https://your.gocd.instance.com/go/api allowedMethods: ['GET'] allowedHeaders: ['Authorization'] headers: Authorization: Basic ${GOCD_AUTH_CREDENTIALS} - '/dynatrace': target: https://your.dynatrace.instance.com/api/v2 headers: Authorization: 'Api-Token ${DYNATRACE_ACCESS_TOKEN}' - '/stackstorm': target: https://your.stackstorm.instance.com/api headers: St2-Api-Key: ${ST2_API_KEY} - '/puppetdb': target: https://your.puppetdb.instance.com - organization: name: "QCraft" - # Reference documentation http://backstage.io/docs/features/techdocs/configuration # Note: After experimenting with basic setup, use CI/CD to generate docs # and an external cloud storage when deploying TechDocs for production use-case. @@ -139,36 +121,28 @@ backstage: # pullImage: true # or false to disable automatic pulling of image (e.g. if custom docker login is required) publisher: type: 'local' # Alternatives - 'googleGcs' or 'awsS3' or 'azureBlobStorage' or 'openStackSwift'. Read documentation for using alternatives. - dynatrace: baseUrl: https://your.dynatrace.instance.com - nomad: addr: 0.0.0.0 - # Score-cards sample configuration. scorecards: jsonDataUrl: https://raw.githubusercontent.com/Oriflame/backstage-plugins/main/plugins/score-card/sample-data/ wikiLinkTemplate: https://link-to-wiki/{id} - sentry: organization: my-company - rollbar: organization: my-company # NOTE: The rollbar-backend & accountToken key may be deprecated in the future (replaced by a proxy config) accountToken: my-rollbar-account-token - lighthouse: baseUrl: http://localhost:3003 - kubernetes: serviceLocatorMethod: type: 'multiTenant' clusterLocatorMethods: - type: 'config' clusters: [] - kafka: clientId: backstage clusters: @@ -176,10 +150,8 @@ backstage: dashboardUrl: https://akhq.io/ brokers: - localhost:9092 - allure: baseUrl: http://localhost:5050/allure-docker-service - integrations: github: - host: github.com @@ -214,7 +186,6 @@ backstage: - host: amazonaws.com accessKeyId: ${AWS_ACCESS_KEY_ID} secretAccessKey: ${AWS_SECRET_ACCESS_KEY} - catalog: import: entityFilename: catalog-info.yaml @@ -227,26 +198,24 @@ backstage: - System - Domain - Location - processors: ldapOrg: - ### Example for how to add your enterprise LDAP server - # providers: - # - target: ldaps://ds.example.net - # bind: - # dn: uid=ldap-reader-user,ou=people,ou=example,dc=example,dc=net - # secret: ${LDAP_SECRET} - # users: - # dn: ou=people,ou=example,dc=example,dc=net - # options: - # filter: (uid=*) - # map: - # description: l - # groups: - # dn: ou=access,ou=groups,ou=example,dc=example,dc=net - # options: - # filter: (&(objectClass=some-group-class)(!(groupType=email))) - + ### Example for how to add your enterprise LDAP server + # providers: + # - target: ldaps://ds.example.net + # bind: + # dn: uid=ldap-reader-user,ou=people,ou=example,dc=example,dc=net + # secret: ${LDAP_SECRET} + # users: + # dn: ou=people,ou=example,dc=example,dc=net + # options: + # filter: (uid=*) + # map: + # description: l + # groups: + # dn: ou=access,ou=groups,ou=example,dc=example,dc=net + # options: + # filter: (&(objectClass=some-group-class)(!(groupType=email))) locations: # Add a location here to ingest it, for example from a URL: # @@ -260,7 +229,7 @@ backstage: # # File locations are relative to the current working directory of the # backend, for example packages/backend/. - + # Backstage example entities - type: file target: ../catalog-model/examples/all.yaml @@ -287,13 +256,12 @@ backstage: - type: file target: ../../cypress/e2e-fixture.catalog.info.yaml scaffolder: - # Use to customize default commit author info used when new components are created - # defaultAuthor: - # name: Scaffolder - # email: scaffolder@backstage.io - # Use to customize the default commit message when new components are created - # defaultCommitMessage: 'Initial commit' - + # Use to customize default commit author info used when new components are created + # defaultAuthor: + # name: Scaffolder + # email: scaffolder@backstage.io + # Use to customize the default commit message when new components are created + # defaultCommitMessage: 'Initial commit' auth: environment: development costInsights: @@ -354,20 +322,15 @@ backstage: baseUrl: https://jenkins.example.com username: backstage-bot apiKey: 123456789abcdef0123456789abcedf012 - azureDevOps: host: dev.azure.com token: my-token organization: my-company - apacheAirflow: baseUrl: https://your.airflow.instance.com - gocd: baseUrl: https://your.gocd.instance.com - stackstorm: webUrl: https://your.stackstorm.webui.instance.com - permission: enabled: false diff --git a/deployment/foxglove/values.yaml b/deployment/foxglove/values.yaml index b24846e..5749caf 100644 --- a/deployment/foxglove/values.yaml +++ b/deployment/foxglove/values.yaml @@ -3,7 +3,6 @@ # Declare variables to be passed into your templates. replicaCount: 1 - ingress: enabled: true hosts: diff --git a/deployment/jupyterhub/values.yaml b/deployment/jupyterhub/values.yaml index cf8de4f..7fb18af 100644 --- a/deployment/jupyterhub/values.yaml +++ b/deployment/jupyterhub/values.yaml @@ -6,24 +6,19 @@ hub: tag: "2.0.0" pullPolicy: pullSecrets: [] - proxy: service: type: ClusterIP - ingress: enabled: true hosts: - - jupyterhub.ingress.k8s.cn - + - jupyterhub.ingress.k8s.cn cull: enabled: true adminUsers: false timeout: 3600 - debug: enabled: false - singleuser: podNameTemplate: extraTolerations: [] diff --git a/deployment/kuberay/values.yaml b/deployment/kuberay/values.yaml index 00b5588..5ada32a 100644 --- a/deployment/kuberay/values.yaml +++ b/deployment/kuberay/values.yaml @@ -2,13 +2,10 @@ image: repository: liushaohui/ray tag: 2.3.0 pullPolicy: Always - nameOverride: "bigdata" - ingress: enabled: true host: ray-cluster-bigdata.ingress.k8s.cn - head: resources: limits: @@ -17,7 +14,6 @@ head: requests: cpu: 4 memory: "8G" - worker: replicas: 1 minReplicas: 1 @@ -39,7 +35,6 @@ worker: requests: cpu: 4 memory: "16G" - additionalWorkerGroups: spotGroup: disabled: true @@ -50,28 +45,29 @@ additionalWorkerGroups: serviceAccountName: "" rayStartParams: block: 'true' - initContainerImage: 'busybox:1.28' # Enable users to specify the image for init container. Users can pull the busybox image from their private repositories. + initContainerImage: 'busybox:1.28' # Enable users to specify the image for init container. Users can pull the busybox image from their private repositories. # Security context for the init container. initContainerSecurityContext: {} - # containerEnv specifies environment variables for the Ray container, - # Follows standard K8s container env schema. + # containerEnv specifies environment variables for the Ray container, + # Follows standard K8s container env schema. containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + # - name: EXAMPLE_ENV + # value: "1" envFrom: [] - # - secretRef: - # name: my-env-secret + # - secretRef: + # name: my-env-secret # ports optionally allows specifying ports for the Ray container. # ports: [] - # resource requests and limits for the Ray head container. - # Modify as needed for your application. - # Note that the resources in this example are much too small for production; - # we don't recommend allocating less than 8G memory for a Ray pod in production. - # Ray pods should be sized to take up entire K8s nodes when possible. - # Always set CPU and memory limits for Ray pods. - # It is usually best to set requests equal to limits. - # See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources - # for further guidance. + + # resource requests and limits for the Ray head container. + # Modify as needed for your application. + # Note that the resources in this example are much too small for production; + # we don't recommend allocating less than 8G memory for a Ray pod in production. + # Ray pods should be sized to take up entire K8s nodes when possible. + # Always set CPU and memory limits for Ray pods. + # It is usually best to set requests equal to limits. + # See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources + # for further guidance. resources: limits: cpu: 1 @@ -88,7 +84,7 @@ additionalWorkerGroups: volumes: - name: log-volume emptyDir: {} - # Ray writes logs to /tmp/ray/session_latests/logs + # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: log-volume diff --git a/deployment/mlflow/values.yaml b/deployment/mlflow/values.yaml index 17d21ea..be84722 100644 --- a/deployment/mlflow/values.yaml +++ b/deployment/mlflow/values.yaml @@ -1,7 +1,6 @@ backendStore: databaseMigration: true databaseConnectionCheck: true - artifactRoot: proxiedArtifactStorage: false s3: @@ -15,7 +14,6 @@ artifactRoot: awsAccessKeyId: "" # (awsAccessKeyId and awsSecretAccessKey) or roleArn serviceaccount annotation required # -- AWS IAM user AWS_SECRET_ACCESS_KEY which has attached policy for access to the S3 bucket awsSecretAccessKey: "" # (awsAccessKeyId and awsSecretAccessKey) or roleArn serviceaccount annotation required - ingress: enabled: true hosts: @@ -23,4 +21,3 @@ ingress: paths: - path: / pathType: ImplementationSpecific - diff --git a/deployment/stable-diffusion/values.yaml b/deployment/stable-diffusion/values.yaml index da2686b..9175494 100644 --- a/deployment/stable-diffusion/values.yaml +++ b/deployment/stable-diffusion/values.yaml @@ -1,13 +1,10 @@ replicaCount: 1 - image: repository: liushaohui/stable-diffusion pullPolicy: Always tag: "latest" - models: sd14: https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt - ingress: enabled: true hosts: @@ -15,10 +12,9 @@ ingress: paths: - path: / pathType: ImplementationSpecific - nodeSelector: app: "run-analysis-gpu" tolerations: -- key: "spots-node" - operator: "Exists" - effect: "NoExecute" + - key: "spots-node" + operator: "Exists" + effect: "NoExecute" diff --git a/deployment/wandb/values.yaml b/deployment/wandb/values.yaml index f7ab87c..b18d1be 100644 --- a/deployment/wandb/values.yaml +++ b/deployment/wandb/values.yaml @@ -1,10 +1,8 @@ image: repository: wandb/local pullPolicy: IfNotPresent - host: wandb.ingress.k8s.cn enableAdminApi: true - ingress: enabled: true hosts: diff --git a/experimental/pybind11_example/example_test.py b/experimental/pybind11_example/example_test.py index f691d0f..be9dcd9 100644 --- a/experimental/pybind11_example/example_test.py +++ b/experimental/pybind11_example/example_test.py @@ -4,6 +4,7 @@ class TestBasic(unittest.TestCase): + def test_add(self): self.assertEqual(example.add(1, 2), 3) self.assertEqual(example.add(2, 2), 4) diff --git a/experimental/python_example/python_context_manager.py b/experimental/python_example/python_context_manager.py index a2f3c6d..8bcdfb5 100644 --- a/experimental/python_example/python_context_manager.py +++ b/experimental/python_example/python_context_manager.py @@ -6,6 +6,7 @@ class ContextExample(ContextDecorator): + def __enter__(self): self.start = time.time() print("Starting") @@ -17,6 +18,7 @@ def __exit__(self, *exc): return False def __call__(self, func): + def wrapper(*args, **kwargs): source = inspect.getsource(func) print(f"Source code of the function: {source}") diff --git a/experimental/python_example/python_decorator.py b/experimental/python_example/python_decorator.py index 0080821..5a4be4d 100644 --- a/experimental/python_example/python_decorator.py +++ b/experimental/python_example/python_decorator.py @@ -5,18 +5,22 @@ def timer(func): + def wrapper(*args, **kwargs): start = time.time() ret = func(*args, **kwargs) end = time.time() exec_time = end - start - print(f"Function {func.__name__} took {exec_time:.2f} seconds to execute") + print( + f"Function {func.__name__} took {exec_time:.2f} seconds to execute" + ) return ret return wrapper def tracing(func): + def wrapper(*args, **kwargs): print("Something is happening before the function is called.") print(f"Function name: {func.__name__}") diff --git a/experimental/ros2_example/ros_chatter.py b/experimental/ros2_example/ros_chatter.py index 8214dd9..6f88ddd 100644 --- a/experimental/ros2_example/ros_chatter.py +++ b/experimental/ros2_example/ros_chatter.py @@ -6,16 +6,14 @@ def generate_launch_description(): """Launch a talker and a listener.""" - return launch.LaunchDescription( - [ - launch_ros.actions.Node( - # Provide the rootpath for the node. - executable="chatter/talker", - output="screen", - name="talker", - ), - launch_ros.actions.Node( - executable="chatter/listener", output="screen", name="listener" - ), - ] - ) + return launch.LaunchDescription([ + launch_ros.actions.Node( + # Provide the rootpath for the node. + executable="chatter/talker", + output="screen", + name="talker", + ), + launch_ros.actions.Node(executable="chatter/listener", + output="screen", + name="listener"), + ]) diff --git a/experimental/streamlit/clip/app.py b/experimental/streamlit/clip/app.py index 345d382..3597708 100644 --- a/experimental/streamlit/clip/app.py +++ b/experimental/streamlit/clip/app.py @@ -5,40 +5,46 @@ st.title('Clip demo from: https://github.com/openai/CLIP') + def load_image(image_file): img = Image.open(image_file) return img + def on_clip(tokens, input_image): device = "cuda" if torch.cuda.is_available() else "cpu" model, preprocess = clip.load("ViT-B/32", device=device) - + image = preprocess(input_image).unsqueeze(0).to(device) text = clip.tokenize(tokens).to(device) with torch.no_grad(): image_features = model.encode_image(image) text_features = model.encode_text(text) - + logits_per_image, logits_per_text = model(image, text) probs = logits_per_image.softmax(dim=-1).cpu().numpy() # print("Label probs:", probs) # prints: [[0.9927937 0.00421068 0.00299572]] # st.table(probs) st.write("Predict result:") for token, prob in zip(tokens, probs[0]): - st.write("%s : %.3f" %(token, prob)) + st.write("%s : %.3f" % (token, prob)) -image_file = st.file_uploader("Upload a image", type=["png","jpg","jpeg"]) + +image_file = st.file_uploader("Upload a image", type=["png", "jpg", "jpeg"]) if image_file is not None: - file_details = {"filename":image_file.name, "filetype":image_file.type, - "filesize":image_file.size} + file_details = { + "filename": image_file.name, + "filetype": image_file.type, + "filesize": image_file.size + } # st.write(file_details) - image = load_image(image_file); + image = load_image(image_file) st.image(image) - + text = st.text_area("Enter multi tokens, one line each") - + if text is not None: tokens = text.splitlines() # st.write(tokens) diff --git a/experimental/triton_example/triton_example.py b/experimental/triton_example/triton_example.py index 875e760..35083ae 100644 --- a/experimental/triton_example/triton_example.py +++ b/experimental/triton_example/triton_example.py @@ -32,8 +32,7 @@ def softmax(Y, stride_ym, stride_yn, X, stride_xm, stride_xn, M, N): X = torch.normal(0, 1, size=(583, 931), device="cuda") Y = torch.empty_like(X) # SPMD launch grid -grid = (X.shape[0],) +grid = (X.shape[0], ) # enqueue GPU kernel -softmax[grid]( - Y, Y.stride(0), Y.stride(1), X, X.stride(0), X.stride(1), X.shape[0], X.shape[1] -) +softmax[grid](Y, Y.stride(0), Y.stride(1), X, X.stride(0), X.stride(1), + X.shape[0], X.shape[1]) diff --git a/training/mmengine/mmengine_demo.py b/training/mmengine/mmengine_demo.py index c40faa3..feaa625 100755 --- a/training/mmengine/mmengine_demo.py +++ b/training/mmengine/mmengine_demo.py @@ -10,6 +10,7 @@ class MMResNet50(BaseModel): + def __init__(self): super().__init__() self.resnet = torchvision.models.resnet50() @@ -21,7 +22,9 @@ def forward(self, imgs, labels, mode): elif mode == 'predict': return x, labels + class Accuracy(BaseMetric): + def process(self, data_batch, data_samples): score, gt = data_samples self.results.append({ @@ -60,7 +63,6 @@ def compute_metrics(self, results): transforms.Normalize(**norm_cfg) ]))) - default_hooks = dict( runtime_info=dict(type='RuntimeInfoHook'), timer=dict(type='IterTimerHook'), @@ -83,8 +85,8 @@ def compute_metrics(self, results): val_dataloader=val_dataloader, val_cfg=dict(), val_evaluator=dict(type=Accuracy), - visualizer=dict(type='Visualizer', vis_backends=[dict(type='TensorboardVisBackend')]), + visualizer=dict(type='Visualizer', + vis_backends=[dict(type='TensorboardVisBackend')]), ) runner.train() - diff --git a/training/mmengine/mmengine_dist_demo.py b/training/mmengine/mmengine_dist_demo.py index 15004f5..ebf3f26 100755 --- a/training/mmengine/mmengine_dist_demo.py +++ b/training/mmengine/mmengine_dist_demo.py @@ -4,36 +4,44 @@ import mmengine.dist as dist import torch + def show_reduce(rank): - data = torch.arange(2, dtype=torch.int64) + 1 + 2 * rank - data = dist.all_reduce(data, op="sum") - print(data) + data = torch.arange(2, dtype=torch.int64) + 1 + 2 * rank + data = dist.all_reduce(data, op="sum") + print(data) + def show_dist_info(): - print(dist.get_world_size()) - print(dist.get_rank()) - print(dist.get_backend()) - print(dist.get_dist_info()) + print(dist.get_world_size()) + print(dist.get_rank()) + print(dist.get_backend()) + print(dist.get_dist_info()) + def show_env(): - keys = ["WORLD_SIZE", "RANK", "LOCAL_WORLD_SIZE", "LOCAL_RANK", "GROUP_WORLD_SIZE", "GROUP_RANK", "ROLE_WORLD_SIZE", "ROLE_RANK", "ROLE_NAME"] - for key in keys: - value = os.environ.get(key) - if value is not None: - print(f'{key}: {value}') + keys = [ + "WORLD_SIZE", "RANK", "LOCAL_WORLD_SIZE", "LOCAL_RANK", + "GROUP_WORLD_SIZE", "GROUP_RANK", "ROLE_WORLD_SIZE", "ROLE_RANK", + "ROLE_NAME" + ] + for key in keys: + value = os.environ.get(key) + if value is not None: + print(f'{key}: {value}') + def parse_args(): parser = argparse.ArgumentParser(description='Distributed Training') - parser.add_argument( - '--launcher', - choices=['none', 'pytorch', 'slurm', 'mpi'], - default='none', - help='job launcher') + parser.add_argument('--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() return args + def main(): args = parse_args() show_env() @@ -43,5 +51,6 @@ def main(): show_reduce(dist.get_rank()) + if __name__ == '__main__': main() diff --git a/training/mmengine/mmengine_model_analysis.py b/training/mmengine/mmengine_model_analysis.py index b1b2600..fa12be8 100755 --- a/training/mmengine/mmengine_model_analysis.py +++ b/training/mmengine/mmengine_model_analysis.py @@ -2,7 +2,9 @@ import torchvision from mmengine.model import BaseModel + class MMResNet50(BaseModel): + def __init__(self): super().__init__() self.resnet = torchvision.models.resnet50() diff --git a/training/mmengine/pytorch_version.py b/training/mmengine/pytorch_version.py index b643512..2c99ced 100755 --- a/training/mmengine/pytorch_version.py +++ b/training/mmengine/pytorch_version.py @@ -1,4 +1,4 @@ -import torch; +import torch print('torch version: ', torch.__version__) print('torch.distributed.is_available: ', torch.distributed.is_available())