Source code for recommenders.utils.gpu_utils

# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.

import sys
import os
import glob
import logging
from numba import cuda
from numba.cuda.cudadrv.error import CudaSupportError


logger = logging.getLogger(__name__)


DEFAULT_CUDA_PATH_LINUX = "/usr/local/cuda/version.txt"


[docs]def get_number_gpus():
    """Get the number of GPUs in the system.
    Returns:
        int: Number of GPUs.
    """
    try:
        import torch

        return torch.cuda.device_count()
    except (ImportError, ModuleNotFoundError):
        pass
    try:
        import numba

        return len(numba.cuda.gpus)
    except Exception:  # numba.cuda.cudadrv.error.CudaSupportError:
        return 0


[docs]def get_gpu_info():
    """Get information of GPUs.

    Returns:
        list: List of gpu information dictionary as with `device_name`, `total_memory` (in Mb) and `free_memory` (in Mb).
        Returns an empty list if there is no cuda device available.
    """
    gpus = []
    try:
        for gpu in cuda.gpus:
            with gpu:
                meminfo = cuda.current_context().get_memory_info()
                g = {
                    "device_name": gpu.name.decode("ASCII"),
                    "total_memory": meminfo[1] / 1048576,  # Mb
                    "free_memory": meminfo[0] / 1048576,  # Mb
                }
                gpus.append(g)
    except CudaSupportError:
        pass

    return gpus


[docs]def clear_memory_all_gpus():
    """Clear memory of all GPUs."""
    try:
        for gpu in cuda.gpus:
            with gpu:
                cuda.current_context().deallocations.clear()
    except CudaSupportError:
        logger.info("No CUDA available")


[docs]def get_cuda_version():
    """Get CUDA version

    Returns:
        str: Version of the library.
    """
    try:
        import torch

        return torch.version.cuda
    except (ImportError, ModuleNotFoundError):
        path = ""
        if sys.platform == "win32":
            candidate = (
                "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\version.txt"
            )
            path_list = glob.glob(candidate)
            if path_list:
                path = path_list[0]
        elif sys.platform == "linux" or sys.platform == "darwin":
            path = "/usr/local/cuda/version.txt"
        else:
            raise ValueError("Not in Windows, Linux or Mac")

        if os.path.isfile(path):
            with open(path, "r") as f:
                data = f.read().replace("\n", "")
            return data
        else:
            return None


[docs]def get_cudnn_version():
    """Get the CuDNN version

    Returns:
        str: Version of the library.
    """

    def find_cudnn_in_headers(candiates):
        for c in candidates:
            file = glob.glob(c)
            if file:
                break
        if file:
            with open(file[0], "r") as f:
                version = ""
                for line in f:
                    if "#define CUDNN_MAJOR" in line:
                        version = line.split()[-1]
                    if "#define CUDNN_MINOR" in line:
                        version += "." + line.split()[-1]
                    if "#define CUDNN_PATCHLEVEL" in line:
                        version += "." + line.split()[-1]
            if version:
                return version
            else:
                return None
        else:
            return None

    try:
        import torch

        return str(torch.backends.cudnn.version())
    except (ImportError, ModuleNotFoundError):
        if sys.platform == "win32":
            candidates = [r"C:\NVIDIA\cuda\include\cudnn.h"]
        elif sys.platform == "linux":
            candidates = [
                "/usr/include/cudnn_version.h",
                "/usr/include/x86_64-linux-gnu/cudnn_v[0-99].h",
                "/usr/local/cuda/include/cudnn.h",
                "/usr/include/cudnn.h",
            ]
        elif sys.platform == "darwin":
            candidates = ["/usr/local/cuda/include/cudnn.h", "/usr/include/cudnn.h"]
        else:
            raise ValueError("Not in Windows, Linux or Mac")
        return find_cudnn_in_headers(candidates)