-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
144 lines (110 loc) · 4.72 KB
/
run.py
File metadata and controls
144 lines (110 loc) · 4.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import logging
import subprocess
# Suppress TensorFlow INFO, WARNING, and ERROR logs
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
# Suppress absl logging (if used)
os.environ["ABSL_LOG_LEVEL"] = "3"
# Turn off oneDNN operations
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
# Restrict TensorFlow to only see the first GPU (GPU 0)
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
HOME: str = os.environ['HOME']
LOG: str = f'{HOME}/report.log'
logging.basicConfig(
format=">>> %(asctime)s | %(msg)s -> %(name)s @ %(filename)s",
datefmt="%H:%M:%S",
level=logging.INFO,
handlers=[logging.StreamHandler(), logging.FileHandler(LOG, encoding='utf-8')]
)
def checkCUDA() -> None:
"""
Checks whether the CUDA is installed properly.
Invokes CMD comamnds and inspects the outputs.
Results are saved to the report.
"""
logging.info("Checking CUDA installation.")
try:
nvccRes = subprocess.run(['nvcc', '--version'], stdout=subprocess.PIPE, text=True)
if nvccRes.returncode == 0:
logging.info(f"CUDA is installed:\n{nvccRes.stdout}")
else:
logging.warning(f'NVCC & NVIDIA Error: {nvccRes.stderr}')
except FileNotFoundError as FnFE:
logging.error(f'nvcc command NOT found! CUDA is not installed properly... {str(FnFE)}')
except Exception as E:
logging.error(f'An unexpected error occurred while checking CUDA: {str(E)}')
def checkSMI() -> None:
"""
Checks whether the NVIDIA System Management Interface (SMI) is installed.
Invokes CMD commands and inspects the outputs.
Results are saved to the report.
"""
logging.info("Checking NVIDIA SMI installation.")
try:
smiRes = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, text=True)
driverRes = subprocess.run(['nvidia-smi', '--query-gpu=driver_version', '--format=csv,noheader'], stdout=subprocess.PIPE, text=True)
logging.info(f"Driver:{driverRes.stdout}")
if smiRes.returncode == 0:
logging.info(f"{smiRes.stdout}")
else:
logging.warning(f'NVIDIA SMI Error: {smiRes.stderr}')
except FileNotFoundError as FnFE:
logging.error(f'nvidia-smi command NOT found! NVIDIA drivers are not installed properly... {str(FnFE)}')
except Exception as E:
logging.error(f'An unexpected error occurred while checking NVIDIA SMI: {str(E)}')
def checkTF() -> None:
"""
Checks the installation of TensorFlow and CUDA configuration.
Imports TensorFlow and checks the available CUDA devices.
Results are saved to the report.
"""
logging.info("Checking TensorFlow installation.")
try:
import tensorflow as tf # type: ignore
TF_VERSION: str = tf.__version__
logging.info(f'TensorFlow {TF_VERSION} installed.')
DEVICES = tf.config.list_physical_devices('GPU')
if DEVICES:
logging.info(f'GPUs detected: {len(DEVICES)} GPU(s)')
for DEVICE in DEVICES:
details = tf.config.experimental.get_device_details(DEVICE)
logging.info(f':{details.get("device_name", "Unknown Device")}')
# No GPU is detected
else:
logging.warning("TensorFlow DID NOT detect any GPUs!")
except ImportError as ImE:
logging.error(f'TensorFlow is NOT installed. {str(ImE)}')
except Exception as E:
logging.error(f'An unexpected error occured! {str(E)}')
def checkTorch() -> None:
"""
Checks the installation of PyTorch and CUDA configuration.
Imports PyTorch and checks the available CUDA devices.
Results are saved to the report.
"""
logging.info("Checking PyTorch installation.")
try:
import torch # type: ignore
PT_VERSION: str = torch.__version__
logging.info(f'PyTorch {PT_VERSION}::{torch.version.cuda} installed.')
if torch.cuda.is_available():
logging.info(f'GPUs detected: {torch.cuda.device_count()} GPU(s)')
for i in range(torch.cuda.device_count()):
logging.info(f':{torch.cuda.get_device_name(i)}')
else:
logging.warning("PyTorch DID NOT detect any GPUs!")
except ImportError as ImE:
logging.error(f'PyTorch is NOT installed. {str(ImE)}')
except Exception as E:
logging.error(f'An unexpected error occured! {str(E)}')
def main() -> None:
logging.info('Starting System Report.')
checkCUDA()
checkSMI()
checkTF()
checkTorch()
logging.info(f"Completed. Report has been created at '{LOG}'")
if __name__ == '__main__':
# Execute Checks
main()