Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/allocator/allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,63 @@ int free_raw(CUdeviceptr dptr){
return tmp;
}

int free_raw_async(CUdeviceptr dptr, CUstream hStream){
pthread_mutex_lock(&mutex);
unsigned int tmp = remove_chunk_async(device_overallocated,dptr,hStream);
pthread_mutex_unlock(&mutex);
return tmp;
}

int remove_chunk_async(allocated_list *a_list, CUdeviceptr dptr, CUstream hStream){
size_t t_size;
if (a_list->length==0) {
return -1;
}
allocated_list_entry *val;
for (val=a_list->head;val!=NULL;val=val->next){
if (val->entry->address==dptr){
t_size=val->entry->length;
cuMemFreeAsync(dptr,hStream);
LIST_REMOVE(a_list,val);

CUdevice dev;
cuCtxGetDevice(&dev);
rm_gpu_device_memory_usage(getpid(),dev,t_size,2);
return 0;
}
}
return -1;
}

int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream){
int tmp;
pthread_mutex_lock(&mutex);
tmp = add_chunk_async(dptr,size,hStream);
pthread_mutex_unlock(&mutex);
return tmp;
}

int add_chunk_async(CUdeviceptr *address,size_t size, CUstream hStream){
size_t addr=0;
size_t allocsize;
CUresult res = CUDA_SUCCESS;
CUdevice dev;
cuCtxGetDevice(&dev);
if (oom_check(dev,size))
return -1;

allocated_list_entry *e;
INIT_ALLOCATED_LIST_ENTRY(e,addr,size);
res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocAsync,&e->entry->address,size,hStream);
if (res!=CUDA_SUCCESS){
LOG_ERROR("cuMemoryAllocate failed res=%d",res);
return res;
}
LIST_ADD(device_overallocated,e);
//uint64_t t_size;
*address = e->entry->address;
allocsize = size;
cuCtxGetDevice(&dev);
add_gpu_device_memory_usage(getpid(),dev,allocsize,2);
return 0;
}
2 changes: 2 additions & 0 deletions src/allocator/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ int oom_check(const int dev,size_t addon);
int allocate_raw(CUdeviceptr *dptr, size_t size);
int free_raw(CUdeviceptr dptr);
int add_chunk_only(CUdeviceptr address,size_t size);
int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream);
int free_raw_async(CUdeviceptr dptr, CUstream hStream);

// Checks memory type
int check_memory_type(CUdeviceptr address);
Expand Down
1 change: 1 addition & 0 deletions src/cuda/hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ cuda_entry_t cuda_library_entry[] = {
{.name = "cuMemCreate"},
{.name = "cuMemMap"},
{.name = "cuMemAllocAsync"},
{.name = "cuMemFreeAsync"},
/* cuda11.7 new api memory part */
{.name = "cuMemHostGetDevicePointer_v2"},
{.name = "cuMemHostGetFlags"},
Expand Down
12 changes: 11 additions & 1 deletion src/cuda/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,17 @@ CUresult cuMemMap( CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllo

CUresult cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, CUstream hStream) {
LOG_DEBUG("cuMemAllocAsync:%ld",bytesize);
return CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemAllocAsync,dptr,bytesize,hStream);
return allocate_async_raw(dptr,bytesize,hStream);
}

CUresult cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) {
LOG_DEBUG("cuMemFreeAsync dptr=%llx",dptr);
if (dptr == 0) { // NULL
return CUDA_SUCCESS;
}
CUresult res = free_raw_async(dptr,hStream);
LOG_DEBUG("after free_raw_async dptr=%p res=%d",(void *)dptr,res);
return res;
}

CUresult cuMemHostGetDevicePointer_v2(CUdeviceptr *pdptr, void *p, unsigned int Flags){
Expand Down
1 change: 1 addition & 0 deletions src/include/libcuda_hook.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ typedef enum {
CUDA_OVERRIDE_ENUM(cuMemCreate),
CUDA_OVERRIDE_ENUM(cuMemMap),
CUDA_OVERRIDE_ENUM(cuMemAllocAsync),
CUDA_OVERRIDE_ENUM(cuMemFreeAsync),
/* cuda11.7 new api memory part */
CUDA_OVERRIDE_ENUM(cuMemHostGetDevicePointer_v2),
CUDA_OVERRIDE_ENUM(cuMemHostGetFlags),
Expand Down