From dbd15239582fadac68d6ba9cd9f090b1140373f3 Mon Sep 17 00:00:00 2001 From: one-lithe-rune Date: Sun, 19 Nov 2023 13:45:37 +0000 Subject: [PATCH] Use `vulkaninfo` for AMD gpu heuristics on Windows * Change exception paths for gpu heuristics function to return a two part tuple like the non-exception paths, rather than a three part one with `False`on the end. This prevents the caller from blowing up with an extra exception. * Added a path to get the AMD devices via vulkaninfo if we are on windows as we will not have have rocminfo there even if the rocm SDK is installed (windows rocm SDK :shrug:), and we don't require the windows SDK anyway, so we can't use hipinfo either. * The new code path, uses the existing AMD device/memory list to get the memory info, and only returns devices that are on that list. --- koboldcpp.py | 98 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 27 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 3a37e84774f4a..11ff261f7a114 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1148,6 +1148,66 @@ def getfilename(var, text): # todo: autopick the right number of layers when a model is selected. # run in new thread so it doesnt block. does not return anything, instead overwrites specific values and redraws GUI + amd_windows_hip_devices = { + 'W7900': 49152, # 48 GiB + 'W7800': 32768, # 32 GiB + 'W6800': 32768, # 32 GiB + '7900 XTX': 24560, # 24 GiB + '7900 XT': 20464, # 20 GiB + '7900 GRE': 16368, # 16 GiB + '7800 XT': 16368, # 16 GiB + '7600': 8176, # 8 GiB + '6950 XT': 16368, # 16 GiB + '6900 XT': 16368, # 16 GiB + '6800 XT': 16368, # 16 GiB + '6800': 16368 # 16 GiB + } + + def get_amd_hip_device_memory(device_name): + for key in amd_windows_hip_devices: + if key in device_name: + return amd_windows_hip_devices[key] + + return None + + def get_amd_gpu_info_windows(): + # Windows rocm doesn't have rocminfo, and we may not even have the rocm sdk installed so we can't rely + # on hipinfo either. So grab the devices through vulkaninfo, which should exist if any AMD GPU drivers are + # installed on the machine, and then check them against amd_windows_hip_devices above. + import re + from subprocess import run + FetchedAMDdevices = [] + FetchedAMDdeviceMem = [] + try: + output = run(['vulkaninfo', '--summary'], capture_output=True, text=True, check=True, encoding='utf-8').stdout + output = output.split("Devices:\n========\n")[1] + output = re.split(r"GPU\d+:", output) + + device_re = re.compile(r"^\s+deviceName\s+=\s+(.*)$", re.MULTILINE) + amd_re = re.compile(r"^\s+vendorID\s+=\s+0x1002$", re.MULTILINE) # 0x1002 is the AMD vendor id for vulkan + + for gpu in output: + if amd_re.search(gpu): + device_match = device_re.search(gpu) + if device_match: + device_name = device_match.group(1) + memSize = get_amd_hip_device_memory(device_name) + + # For now only list devices we know the memory amoutn for, that can use HIPBlas + # TODO: is this correct? Or do we want all AMD devices? + if memSize: + FetchedAMDdevices.append(device_name) + FetchedAMDdeviceMem.append(memSize) + + FetchedAMDdevices = [item.replace("AMD Radeon", "AMD") for item in FetchedAMDdevices] # Shorten Device Names + print(FetchedAMDdevices, FetchedAMDdeviceMem) + return FetchedAMDdevices, FetchedAMDdeviceMem + + except FileNotFoundError: + print("The command 'vulkaninfo' is not available on this system. Are GPU drivers installed?") + return [],[] + + def get_amd_gpu_info(): from subprocess import run, CalledProcessError FetchedCUdevices = [] @@ -1157,7 +1217,7 @@ def get_amd_gpu_info(): device_name = None for line in output.splitlines(): # read through the output line by line line = line.strip() - if line.startswith("Marketing Name:"): + if line.startswith("Marketing Name:"): device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list FetchedCUdevices.append(device_name) @@ -1165,43 +1225,24 @@ def get_amd_gpu_info(): if FetchedCUdevices: try: getamdvram = run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8').stdout # fetch VRAM of devices - if getamdvram: + if getamdvram: FetchedCUdeviceMem = [str(int(line.split(",")[1].strip()) // 1048576) for line in getamdvram.splitlines()[1:] if line.strip()] #return Mb from Bytes except Exception as e: pass try: if not FetchedCUdeviceMem and device_name: - for device_name in FetchedCUdevices: - amd_vram_dict = {# probably on windows, so use hardcoded values: - 'W7900': "49152", # 48 GiB - 'W7800': "32768", # 32 GiB - 'W6800': "32768", # 32 GiB - '7900 XTX': "24560", # 24 GiB - '7900 XT': "20464", # 20 GiB - '7900 GRE': "16368", # 16 GiB - '7800 XT': "16368", # 16 GiB - '7600': "8176", # 8 GiB - '6950 XT': "16368", # 16 GiB - '6900 XT': "16368", # 16 GiB - '6800 XT': "16368", # 16 GiB - '6800': "16368" # 16 GiB - } - for key in amd_vram_dict: - if key in device_name: - amd_device_vram = amd_vram_dict[key] - FetchedCUdeviceMem.append(amd_device_vram) - break + FetchedCUdeviceMem(get_amd_hip_device_memory(device_name)) except Exception as e: pass FetchedCUdevices = [item.replace("AMD Radeon", "AMD") for item in FetchedCUdevices] # Shorten Device Names - return FetchedCUdevices, FetchedCUdeviceMem - + return FetchedCUdevices, FetchedCUdeviceMem + except FileNotFoundError: print("The command 'rocminfo' is not available on this system.") - return [], [], False + return [], [] except Exception as e: print(f"An unexpected error occurred: {e}") - return [], [], False + return [], [] def auto_gpu_heuristics(): from subprocess import run, CalledProcessError @@ -1237,7 +1278,10 @@ def auto_gpu_heuristics(): pass if len(FetchedCUdevices)==0: # Get AMD GPU names - FetchedCUdevices, FetchedCUdeviceMem = get_amd_gpu_info() + if os.name == "nt": + FetchedCUdevices, FetchedCUdeviceMem = get_amd_gpu_info_windows() + else: + FetchedCUdevices, FetchedCUdeviceMem = get_amd_gpu_info() for idx in range(0,4): if(len(FetchedCUdevices)>idx):