# Google Colab script to run mprime on cpu and gpuowl on known gpu models, mfaktc otherwise; # if no gpu allocated, prompts for whether user wants to run cpu-only mprime (y) or exit (n) # gpu-model-specific-branching code (does not do any setup, result reporting, or work getting) # mprime and any gpu app will be run as background tasks, and top run in the foreground. # this allows either the cpu task or the gpu task to continue if there is an issue with the other, # and makes use of whatever resources are available. At termination the last top output gives a # pretty good idea of how long the Colab session lasted. # Note it is premised on separate subfolders for each gpu model for gpuowl, since their capabilities vary. # If implementing mfaktc for them instead, the optimal tunings probably differ, so use folders then too. import sys import os.path from google.colab import drive !nvidia-smi # one for the user to look at gpu_info = !nvidia-smi # and one for the script to look at gpu_info = '\n'.join(gpu_info) if gpu_info.find('failed') >= 0: print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ') print('and then re-execute this cell. Or try again later when a gpu may be available.') yn= input("Would you like to run mprime on cpu? (y for mprime only; n for exit)") if yn == 'n': print('User selected exit') sys.exit() elif yn == 'y': print('User selected continue') else: print('User selected neither y nor n, continuing') #google drive code and mprime-only code here if not os.path.exists('/content/drive/My Drive'): drive.mount('/content/drive') %cd '/content/drive/My Drive//' !chmod +w '/content/drive/My Drive' #section to resume a run of mprime on a Colab session %cd '/content/drive/My Drive/mprime//' !chmod +x ./mprime !./mprime -d >> mprimelog.txt 2>&1 & print('mprime launched in background') else: #google drive code, mprime, and gpu-model-specific code here #first, the basic common stuff for all Google Drive use and system info !cat /proc/meminfo !lscpu if not os.path.exists('/content/drive/My Drive'): drive.mount('/content/drive') !cd '/content/drive/My Drive/' !chmod +w '/content/drive/My Drive' #section to resume a run of mprime on a Colab session %cd '/content/drive/My Drive/mprime//' !chmod +x ./mprime !./mprime -d >> mprimelog.txt 2>&1 & print('mprime launched in background') #gpu-model-generic portion follows !chmod 777 '/content/drive/My Drive/gpuowl' %cd '/content/drive/My Drive/gpuowl//' #was gpuowl.exe below !chmod +x '/content/drive/My Drive/gpuowl/p100/gpuowl' !./p100/gpuowl -h print(gpu_info) #gpu-model-specific portions follow; note, using ini files and config.txt files so less gpu-specific here #-maxAlloc -user -cpu colab/gpumodel etc. are in individual config.txt files if gpu_info.find('Tesla T4') >= 0: print ('code here for Tesla T4 case.') %cd '/content/drive/My Drive/gpuowl/t4//' statinfo = os.stat('./worktodo.txt') if statinfo.st_size < 50: print ('WARNING, small file size indicates little or no Tesla T4 gpuowl work to do') !LD_LIBRARY_PATH="lib:${LD_LIBRARY_PATH}" && chmod 777 gpuowl && chmod 777 worktodo.txt !./gpuowl >>gpuowllog.txt 2>&1 & print('gpuowl for Tesla T4 launched in background') elif gpu_info.find('Tesla P4') >=0: print ('code here for Tesla P4 case.') %cd '/content/drive/My Drive/gpuowl/p4//' statinfo = os.stat('./worktodo.txt') if statinfo.st_size < 50: print ('WARNING, small file size indicates little or no Tesla P4 gpuowl work to do') !LD_LIBRARY_PATH="lib:${LD_LIBRARY_PATH}" && chmod 777 gpuowl && chmod 777 worktodo.txt !./gpuowl >>gpuowllog.txt 2>&1 & print('gpuowl for Tesla P4 launched in background') elif gpu_info.find('Tesla P100') >= 0: print ('code here for Tesla P100 case') %cd '/content/drive/My Drive/gpuowl/p100//' !echo ls -l ./ !ls -l ./ statinfo = os.stat('./worktodo.txt') if statinfo.st_size < 50: print ('WARNING, small file size indicates little or no Tesla P100 gpuowl work to do') !LD_LIBRARY_PATH="lib:${LD_LIBRARY_PATH}" && chmod 777 gpuowl && chmod 777 worktodo.txt !./gpuowl >>gpuowllog.txt 2>&1 & print('gpuowl for Tesla P100 launched in background') elif gpu_info.find('Tesla K80') >= 0: print ('code here for Tesla K80 case') %cd '/content/drive/My Drive/gpuowl/k80//' statinfo = os.stat('./worktodo.txt') if statinfo.st_size < 50: print ('WARNING, small file size indicates little or no Tesla K80 gpuowl work to do') !LD_LIBRARY_PATH="lib:${LD_LIBRARY_PATH}" && chmod 777 gpuowl && chmod 777 worktodo.txt !./gpuowl >>gpuowllog.txt 2>&1 & print('gpuowl for Tesla K80 launched in background') else: print ('unexpected gpu model: run some mfaktc') %cd '/content/drive/My Drive/mfaktc//' statinfo = os.stat('./worktodo.txt') if statinfo.st_size < 150: print ('WARNING, small file size indicates little or no generic TF work to do') !ls -l #as of about 2019 November 17, we need to install cuda libs also !apt-get install -y cuda-cudart-10-0 !chmod 755 '/content/drive/My Drive/mfaktc/mfaktc.exe' !./mfaktc.exe >> mfaktc-run.txt 2>&1 & print('mfaktc for unexpected gpu model launched in background') #monitoring code here; top helps keep the Colab session alive so the background tasks continue !sleep 8 if gpu_info.find('failed') < 0: !nvidia-smi #the current conditions, perhaps with gpu loaded. print gpu_info would give the idle state !top -d 120