Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cbdb757
add localized allocation and deallocation
dsding2 Jun 2, 2025
2fee158
delete commented out code
dsding2 Jun 2, 2025
8ace895
deal with base storage
dsding2 Jun 4, 2025
c4e635c
ruff check fixes
dsding2 Jun 5, 2025
24b1a47
rework to push allocations outside of loops
dsding2 Jun 8, 2025
be78797
add types, fix ruff
dsding2 Jun 9, 2025
461558d
Merge remote-tracking branch 'upstream/main' into opencl_allocation
dsding2 Jun 13, 2025
0bcf4df
Merge branch 'main' into opencl_allocation
dsding2 Jun 17, 2025
0b6abdd
refactor to make more target-generic
dsding2 Jun 17, 2025
4f95a6b
resolve lingering merge issues
dsding2 Jun 17, 2025
bd98636
fix to only allocate global temporaries
dsding2 Jun 17, 2025
47dda68
move temp declarations to ASTBuilder
dsding2 Jun 19, 2025
e494a3b
Merge branch 'main' into opencl_allocation
dsding2 Jun 19, 2025
88c436f
fix typing
dsding2 Jun 19, 2025
dae91e2
fix typing hopefully
dsding2 Jun 23, 2025
1cfe83a
add basic test
dsding2 Jun 23, 2025
3c3bb78
Merge branch 'main' into opencl_allocation
dsding2 Jun 30, 2025
3ef324c
more typing/ruff fixes
dsding2 Jun 30, 2025
f708b66
fix tutorial.rst and add to baseline
dsding2 Jun 30, 2025
a0a8365
Merge branch 'main' into opencl_allocation
inducer Jul 5, 2025
f12ce9f
Merge branch 'main' into opencl_allocation
inducer Jul 10, 2025
452be6b
Merge branch 'main' into opencl_allocation
inducer Jul 10, 2025
3985576
Update loopy/schedule/tools.py
dsding2 Jul 11, 2025
95e119e
Apply suggested test changes
dsding2 Jul 11, 2025
5cbfbf1
implement rename and documentation suggestions
dsding2 Jul 11, 2025
612b238
ruff fixes, revert broken change
dsding2 Jul 12, 2025
4b0d754
Merge branch 'main' into opencl_allocation
inducer Jul 28, 2025
e591ae6
Merge branch 'main' into opencl_allocation
inducer Jul 31, 2025
1290c64
Merge branch 'main' into opencl_allocation
inducer Aug 28, 2025
b24fe99
Improvements
inducer Aug 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ruff fixes, revert broken change
  • Loading branch information
dsding2 committed Jul 12, 2025
commit 612b2381c648b05baf433457298b4721f497a37e
11 changes: 7 additions & 4 deletions loopy/schedule/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,12 @@ def get_sched_index_to_first_and_last_used(
kernel: LoopKernel
) -> tuple[dict[int, frozenset[str]], dict[int, frozenset[str]]]:
"""
Returns the tuple (first_used, last_used), where first_used is a dict such that
first_used[sched_index] is the set of all temporary variable names first used at sched_index.
Likewise, last_used[sched_index] is the set of all temporary variable names last used at sched_index.
Returns the tuple (first_used, last_used), where first_used is
a dict such that first_used[sched_index] is the set of all temporary
variable names first used at sched_index.

Likewise, last_used[sched_index] is the set of all temporary variable names
last used at sched_index.
"""
from loopy.kernel.data import AddressSpace
from loopy.schedule import CallKernel, EnterLoop
Expand Down Expand Up @@ -221,7 +224,7 @@ def get_temporaries_in_bounds(
sched_index = 0
while sched_index < len(kernel.linearization):
sched_item = kernel.linearization[sched_index]
if isinstance(sched_item, EnterLoop) or isinstance(sched_item, CallKernel):
if isinstance(sched_item, (EnterLoop, CallKernel)):
if isinstance(sched_item, CallKernel):
block_end = block_boundaries[sched_index]
accessed_temporaries = (
Expand Down
6 changes: 5 additions & 1 deletion loopy/target/pyopencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,7 +849,9 @@ def get_temporary_decl_at_index(
self, codegen_state: CodeGenerationState, sched_index: int
) -> tuple[genpy.Generable | None, genpy.Generable | None]:
from loopy.schedule.tools import get_sched_index_to_first_and_last_used
first_accesses, last_accesses = get_sched_index_to_first_and_last_used(codegen_state.kernel)
first_accesses, last_accesses = get_sched_index_to_first_and_last_used(
codegen_state.kernel
)
prefixes, suffixes = None, None
if sched_index in first_accesses:
prefix_lines: list[genpy.Generable] = []
Expand Down Expand Up @@ -910,8 +912,10 @@ def get_kernel_call(
gsize: tuple[Expression, ...], lsize: tuple[Expression, ...]
) -> genpy.Suite:
from genpy import Assert, Assign, Comment, Line, Suite
from pymbolic.mapper.stringifier import PREC_NONE

kernel = codegen_state.kernel
ecm = self.get_expression_to_code_mapper(codegen_state)

from loopy.schedule.tools import get_subkernel_arg_info
skai = get_subkernel_arg_info(kernel, subkernel_name)
Expand Down
2 changes: 1 addition & 1 deletion test/test_loopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3742,7 +3742,7 @@ def test_temporary_memory_allocation(ctx_factory: cl.CtxFactory):

a = np.arange(n, dtype=np.float32)
knl(cq, a=a, allocator=mem_pool_alloc)

# FIXME This relies on the memory pool not freeing any memory it allocates
assert mem_pool_alloc.managed_bytes < len(temp_vars) * a.nbytes

Expand Down
Loading