diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a7f71a0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,227 @@ +# Created by .ignore support plugin (hsz.mobi) +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +### macOS template +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +rsync_to_server.sh + diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..99b4f78 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..510a05c --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/nus_check_server_gpu.iml b/.idea/nus_check_server_gpu.iml new file mode 100644 index 0000000..85c7612 --- /dev/null +++ b/.idea/nus_check_server_gpu.iml @@ -0,0 +1,13 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/src/mi_gpu_slave_script.py b/src/mi_gpu_slave_script.py new file mode 100644 index 0000000..5246033 --- /dev/null +++ b/src/mi_gpu_slave_script.py @@ -0,0 +1,20 @@ +import GPUtil +import json + +def get_gpu_status_obj(gpu:GPUtil.GPU): + return { + 'id':gpu.id, + 'name':gpu.name, + 'load':gpu.load, + 'mem_free':gpu.memoryFree, + 'mem_total':gpu.memoryTotal, + 'mem_used':gpu.memoryUsed, + 'mem_util':gpu.memoryUtil, + } +devices = GPUtil.getGPUs() +gpu_status_obj_list = [] +for device in devices: + gpu_status_obj = get_gpu_status_obj(device) + gpu_status_obj_list.append(gpu_status_obj) + +print(json.dumps(gpu_status_obj_list)) diff --git a/src/mi_nus_soc_gpu_status_reader.py b/src/mi_nus_soc_gpu_status_reader.py new file mode 100644 index 0000000..274e573 --- /dev/null +++ b/src/mi_nus_soc_gpu_status_reader.py @@ -0,0 +1,78 @@ +import paramiko +import json +NUS_SERVER_HOSTS = [ + # add more server here + 'xgpf0', + 'xgpf1', + 'xgpf2', + 'xgpf3', + 'xgpf4', + 'xgpf5', + 'xgpf6', + 'xgpf7', + 'xgpf8', + 'xgpf9', + 'xgpf10', + 'xgpf11' +] + +def print_status_list(host, status_list_obj): + print("="*10,host,"="*10) + for status_obj in status_list_obj: + print_status(status_obj) + print("\n") + +def print_status(status_obj): + print("-" * 7, status_obj['id'], status_obj['name'], "-" * 7) + print(f"Load: {status_obj['load']}") + print(f"Memory Free: {status_obj['mem_free']}MB") + util = "{0:.2f}".format(float(status_obj['mem_util']) * 100) + print(f"Memory Used: {status_obj['mem_used']}MB / {status_obj['mem_total']}MB {util}%") + +def print_most_mem_free(status_obj): + print("*"*10,f"Most Free Mem: {status_obj['host']}","*"*10) + print_status(status_obj) + print('\n') + +def print_least_util(status_obj): + print("*"*10,f"Least Utilization: {status_obj['host']}","*"*10) + print_status(status_obj) + print('\n') + + + + + +status_list = [] +key = paramiko.RSAKey.from_private_key_file("/Users/mimimi/Documents/sshKeys/nus_server_key") +for host in NUS_SERVER_HOSTS: + try: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + # client.connect(f'{host}.comp.nus.edu.sg', username='mingda', password='PASSWORD_HERE') + client.connect(f'{host}.comp.nus.edu.sg', username='mingda', pkey=key) + stdin, stdout, stderr = client.exec_command('python3 mi_gpu_slave_script.py') + for line in stdout: + status_list_obj = json.loads(line) + print_status_list(host,status_list_obj) + for status_obj in status_list_obj: + status_obj['host']=host + status_list.append(status_obj) + + client.close() + except paramiko.ssh_exception.AuthenticationException as ex: + print(f'Auth fail for {host}, the host may be reserved.') + except Exception as ex: + print(f'Unexpected error for {host} error is {ex}') + +if len(status_list)>0: + status_list.sort(key=lambda x:-x['mem_free']) + print_most_mem_free(status_list[0]) + + status_list.sort(key=lambda x:x['mem_util']) + print_least_util(status_list[0]) + + + + +