import sys
import numpy as np
import ctypes as ct
# Stub code for OpenCL setup.

import pyopencl as cl
import numpy as np
import sys

if cl.version.VERSION < (2015,2):
    raise Exception('Futhark requires at least PyOpenCL version 2015.2.  Installed version is %s.' %
                    cl.version.VERSION_TEXT)

def parse_preferred_device(s):
    pref_num = 0
    if len(s) > 1 and s[0] == '#':
        i = 1
        while i < len(s):
            if not s[i].isdigit():
                break
            else:
                pref_num = pref_num * 10 + int(s[i])
            i += 1
        while i < len(s) and s[i].isspace():
            i += 1
        return (s[i:], pref_num)
    else:
        return (s, 0)

def get_prefered_context(interactive=False, platform_pref=None, device_pref=None):
    if device_pref != None:
        (device_pref, device_num) = parse_preferred_device(device_pref)
    else:
        device_num = 0

    if interactive:
        return cl.create_some_context(interactive=True)

    def blacklisted(p, d):
        return platform_pref == None and device_pref == None and \
            p.name == "Apple" and d.name.find("Intel(R) Core(TM)") >= 0
    def platform_ok(p):
        return not platform_pref or p.name.find(platform_pref) >= 0
    def device_ok(d):
        return not device_pref or d.name.find(device_pref) >= 0

    device_matches = 0

    for p in cl.get_platforms():
        if not platform_ok(p):
            continue
        for d in p.get_devices():
            if blacklisted(p,d) or not device_ok(d):
                continue
            if device_matches == device_num:
                return cl.Context(devices=[d])
            else:
                device_matches += 1
    raise Exception('No OpenCL platform and device matching constraints found.')

def size_assignment(s):
    name, value = s.split('=')
    return (name, int(value))

def check_types(self, required_types):
    if 'f64' in required_types:
        if self.device.get_info(cl.device_info.PREFERRED_VECTOR_WIDTH_DOUBLE) == 0:
            raise Exception('Program uses double-precision floats, but this is not supported on chosen device: %s' % self.device.name)

def apply_size_heuristics(self, size_heuristics, sizes):
    for (platform_name, device_type, size, valuef) in size_heuristics:
        if sizes[size] == None \
           and self.platform.name.find(platform_name) >= 0 \
           and (self.device.type & device_type) == device_type:
               sizes[size] = valuef(self.device)
    return sizes

def initialise_opencl_object(self,
                             program_src='',
                             command_queue=None,
                             interactive=False,
                             platform_pref=None,
                             device_pref=None,
                             default_group_size=None,
                             default_num_groups=None,
                             default_tile_size=None,
                             default_reg_tile_size=None,
                             default_threshold=None,
                             size_heuristics=[],
                             required_types=[],
                             all_sizes={},
                             user_sizes={}):
    if command_queue is None:
        self.ctx = get_prefered_context(interactive, platform_pref, device_pref)
        self.queue = cl.CommandQueue(self.ctx)
    else:
        self.ctx = command_queue.context
        self.queue = command_queue
    self.device = self.queue.device
    self.platform = self.device.platform
    self.pool = cl.tools.MemoryPool(cl.tools.ImmediateAllocator(self.queue))
    device_type = self.device.type

    check_types(self, required_types)

    max_group_size = int(self.device.max_work_group_size)
    max_tile_size = int(np.sqrt(self.device.max_work_group_size))

    self.max_group_size = max_group_size
    self.max_tile_size = max_tile_size
    self.max_threshold = 0
    self.max_num_groups = 0

    self.max_local_memory = int(self.device.local_mem_size)

    # Futhark reserves 4 bytes of local memory for its own purposes.
    self.max_local_memory -= 4

    # See comment in rts/c/opencl.h.
    if self.platform.name.find('NVIDIA CUDA') >= 0:
        self.max_local_memory -= 12
    elif self.platform.name.find('AMD') >= 0:
        self.max_local_memory -= 16

    self.free_list = {}

    self.global_failure = self.pool.allocate(np.int32().itemsize)
    cl.enqueue_fill_buffer(self.queue, self.global_failure, np.int32(-1), 0, np.int32().itemsize)
    self.global_failure_args = self.pool.allocate(np.int64().itemsize *
                                                  (self.global_failure_args_max+1))
    self.failure_is_an_option = np.int32(0)

    if 'default_group_size' in sizes:
        default_group_size = sizes['default_group_size']
        del sizes['default_group_size']

    if 'default_num_groups' in sizes:
        default_num_groups = sizes['default_num_groups']
        del sizes['default_num_groups']

    if 'default_tile_size' in sizes:
        default_tile_size = sizes['default_tile_size']
        del sizes['default_tile_size']

    if 'default_reg_tile_size' in sizes:
        default_reg_tile_size = sizes['default_reg_tile_size']
        del sizes['default_reg_tile_size']

    if 'default_threshold' in sizes:
        default_threshold = sizes['default_threshold']
        del sizes['default_threshold']

    default_group_size_set = default_group_size != None
    default_tile_size_set = default_tile_size != None
    default_sizes = apply_size_heuristics(self, size_heuristics,
                                          {'group_size': default_group_size,
                                           'tile_size': default_tile_size,
                                           'reg_tile_size': default_reg_tile_size,
                                           'num_groups': default_num_groups,
                                           'lockstep_width': None,
                                           'threshold': default_threshold})
    default_group_size = default_sizes['group_size']
    default_num_groups = default_sizes['num_groups']
    default_threshold = default_sizes['threshold']
    default_tile_size = default_sizes['tile_size']
    default_reg_tile_size = default_sizes['reg_tile_size']
    lockstep_width = default_sizes['lockstep_width']

    if default_group_size > max_group_size:
        if default_group_size_set:
            sys.stderr.write('Note: Device limits group size to {} (down from {})\n'.
                             format(max_tile_size, default_group_size))
        default_group_size = max_group_size

    if default_tile_size > max_tile_size:
        if default_tile_size_set:
            sys.stderr.write('Note: Device limits tile size to {} (down from {})\n'.
                             format(max_tile_size, default_tile_size))
        default_tile_size = max_tile_size

    for (k,v) in user_sizes.items():
        if k in all_sizes:
            all_sizes[k]['value'] = v
        else:
            raise Exception('Unknown size: {}\nKnown sizes: {}'.format(k, ' '.join(all_sizes.keys())))

    self.sizes = {}
    for (k,v) in all_sizes.items():
        if v['class'] == 'group_size':
            max_value = max_group_size
            default_value = default_group_size
        elif v['class'] == 'num_groups':
            max_value = max_group_size # Intentional!
            default_value = default_num_groups
        elif v['class'] == 'tile_size':
            max_value = max_tile_size
            default_value = default_tile_size
        elif v['class'] == 'reg_tile_size':
            max_value = None
            default_value = default_reg_tile_size
        elif v['class'].startswith('threshold'):
            max_value = None
            default_value = default_threshold
        else:
            # Bespoke sizes have no limit or default.
            max_value = None
        if v['value'] == None:
            self.sizes[k] = default_value
        elif max_value != None and v['value'] > max_value:
            sys.stderr.write('Note: Device limits {} to {} (down from {}\n'.
                             format(k, max_value, v['value']))
            self.sizes[k] = max_value
        else:
            self.sizes[k] = v['value']

    # XXX: we perform only a subset of z-encoding here.  Really, the
    # compiler should provide us with the variables to which
    # parameters are mapped.
    if (len(program_src) >= 0):
        return cl.Program(self.ctx, program_src).build(
            ["-DLOCKSTEP_WIDTH={}".format(lockstep_width)]
            + ["-D{}={}".format(s.replace('z', 'zz').replace('.', 'zi').replace('#', 'zh'),v) for (s,v) in self.sizes.items()])

def opencl_alloc(self, min_size, tag):
    min_size = 1 if min_size == 0 else min_size
    assert min_size > 0
    return self.pool.allocate(min_size)

def opencl_free_all(self):
    self.pool.free_held()

def sync(self):
    failure = np.empty(1, dtype=np.int32)
    cl.enqueue_copy(self.queue, failure, self.global_failure, is_blocking=True)
    self.failure_is_an_option = np.int32(0)
    if failure[0] >= 0:
        # Reset failure information.
        cl.enqueue_fill_buffer(self.queue, self.global_failure, np.int32(-1), 0, np.int32().itemsize)

        # Read failure args.
        failure_args = np.empty(self.global_failure_args_max+1, dtype=np.int64)
        cl.enqueue_copy(self.queue, failure_args, self.global_failure_args, is_blocking=True)

        raise Exception(self.failure_msgs[failure[0]].format(*failure_args))
import pyopencl.array
import time
import argparse
sizes = {}
synchronous = False
preferred_platform = None
preferred_device = None
default_threshold = None
default_group_size = None
default_num_groups = None
default_tile_size = None
default_reg_tile_size = None
fut_opencl_src = """#ifdef cl_clang_storage_class_specifiers
#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
#endif
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
__kernel void dummy_kernel(__global unsigned char *dummy, int n)
{
    const int thread_gid = get_global_id(0);
    
    if (thread_gid >= n)
        return;
}
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
typedef char int8_t;
typedef short int16_t;
typedef int int32_t;
typedef long int64_t;
typedef uchar uint8_t;
typedef ushort uint16_t;
typedef uint uint32_t;
typedef ulong uint64_t;
#ifdef cl_nv_pragma_unroll
static inline void mem_fence_global()
{
    asm("membar.gl;");
}
#else
static inline void mem_fence_global()
{
    mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
}
#endif
static inline void mem_fence_local()
{
    mem_fence(CLK_LOCAL_MEM_FENCE);
}
static inline uint8_t add8(uint8_t x, uint8_t y)
{
    return x + y;
}
static inline uint16_t add16(uint16_t x, uint16_t y)
{
    return x + y;
}
static inline uint32_t add32(uint32_t x, uint32_t y)
{
    return x + y;
}
static inline uint64_t add64(uint64_t x, uint64_t y)
{
    return x + y;
}
static inline uint8_t sub8(uint8_t x, uint8_t y)
{
    return x - y;
}
static inline uint16_t sub16(uint16_t x, uint16_t y)
{
    return x - y;
}
static inline uint32_t sub32(uint32_t x, uint32_t y)
{
    return x - y;
}
static inline uint64_t sub64(uint64_t x, uint64_t y)
{
    return x - y;
}
static inline uint8_t mul8(uint8_t x, uint8_t y)
{
    return x * y;
}
static inline uint16_t mul16(uint16_t x, uint16_t y)
{
    return x * y;
}
static inline uint32_t mul32(uint32_t x, uint32_t y)
{
    return x * y;
}
static inline uint64_t mul64(uint64_t x, uint64_t y)
{
    return x * y;
}
static inline uint8_t udiv8(uint8_t x, uint8_t y)
{
    return x / y;
}
static inline uint16_t udiv16(uint16_t x, uint16_t y)
{
    return x / y;
}
static inline uint32_t udiv32(uint32_t x, uint32_t y)
{
    return x / y;
}
static inline uint64_t udiv64(uint64_t x, uint64_t y)
{
    return x / y;
}
static inline uint8_t udiv_up8(uint8_t x, uint8_t y)
{
    return (x + y - 1) / y;
}
static inline uint16_t udiv_up16(uint16_t x, uint16_t y)
{
    return (x + y - 1) / y;
}
static inline uint32_t udiv_up32(uint32_t x, uint32_t y)
{
    return (x + y - 1) / y;
}
static inline uint64_t udiv_up64(uint64_t x, uint64_t y)
{
    return (x + y - 1) / y;
}
static inline uint8_t umod8(uint8_t x, uint8_t y)
{
    return x % y;
}
static inline uint16_t umod16(uint16_t x, uint16_t y)
{
    return x % y;
}
static inline uint32_t umod32(uint32_t x, uint32_t y)
{
    return x % y;
}
static inline uint64_t umod64(uint64_t x, uint64_t y)
{
    return x % y;
}
static inline uint8_t udiv_safe8(uint8_t x, uint8_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline uint16_t udiv_safe16(uint16_t x, uint16_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline uint32_t udiv_safe32(uint32_t x, uint32_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline uint64_t udiv_safe64(uint64_t x, uint64_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y)
{
    return y == 0 ? 0 : (x + y - 1) / y;
}
static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y)
{
    return y == 0 ? 0 : (x + y - 1) / y;
}
static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y)
{
    return y == 0 ? 0 : (x + y - 1) / y;
}
static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y)
{
    return y == 0 ? 0 : (x + y - 1) / y;
}
static inline uint8_t umod_safe8(uint8_t x, uint8_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline uint16_t umod_safe16(uint16_t x, uint16_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline uint32_t umod_safe32(uint32_t x, uint32_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline uint64_t umod_safe64(uint64_t x, uint64_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline int8_t sdiv8(int8_t x, int8_t y)
{
    int8_t q = x / y;
    int8_t r = x % y;
    
    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
}
static inline int16_t sdiv16(int16_t x, int16_t y)
{
    int16_t q = x / y;
    int16_t r = x % y;
    
    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
}
static inline int32_t sdiv32(int32_t x, int32_t y)
{
    int32_t q = x / y;
    int32_t r = x % y;
    
    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
}
static inline int64_t sdiv64(int64_t x, int64_t y)
{
    int64_t q = x / y;
    int64_t r = x % y;
    
    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
}
static inline int8_t sdiv_up8(int8_t x, int8_t y)
{
    return sdiv8(x + y - 1, y);
}
static inline int16_t sdiv_up16(int16_t x, int16_t y)
{
    return sdiv16(x + y - 1, y);
}
static inline int32_t sdiv_up32(int32_t x, int32_t y)
{
    return sdiv32(x + y - 1, y);
}
static inline int64_t sdiv_up64(int64_t x, int64_t y)
{
    return sdiv64(x + y - 1, y);
}
static inline int8_t smod8(int8_t x, int8_t y)
{
    int8_t r = x % y;
    
    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
}
static inline int16_t smod16(int16_t x, int16_t y)
{
    int16_t r = x % y;
    
    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
}
static inline int32_t smod32(int32_t x, int32_t y)
{
    int32_t r = x % y;
    
    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
}
static inline int64_t smod64(int64_t x, int64_t y)
{
    int64_t r = x % y;
    
    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
}
static inline int8_t sdiv_safe8(int8_t x, int8_t y)
{
    return y == 0 ? 0 : sdiv8(x, y);
}
static inline int16_t sdiv_safe16(int16_t x, int16_t y)
{
    return y == 0 ? 0 : sdiv16(x, y);
}
static inline int32_t sdiv_safe32(int32_t x, int32_t y)
{
    return y == 0 ? 0 : sdiv32(x, y);
}
static inline int64_t sdiv_safe64(int64_t x, int64_t y)
{
    return y == 0 ? 0 : sdiv64(x, y);
}
static inline int8_t sdiv_up_safe8(int8_t x, int8_t y)
{
    return sdiv_safe8(x + y - 1, y);
}
static inline int16_t sdiv_up_safe16(int16_t x, int16_t y)
{
    return sdiv_safe16(x + y - 1, y);
}
static inline int32_t sdiv_up_safe32(int32_t x, int32_t y)
{
    return sdiv_safe32(x + y - 1, y);
}
static inline int64_t sdiv_up_safe64(int64_t x, int64_t y)
{
    return sdiv_safe64(x + y - 1, y);
}
static inline int8_t smod_safe8(int8_t x, int8_t y)
{
    return y == 0 ? 0 : smod8(x, y);
}
static inline int16_t smod_safe16(int16_t x, int16_t y)
{
    return y == 0 ? 0 : smod16(x, y);
}
static inline int32_t smod_safe32(int32_t x, int32_t y)
{
    return y == 0 ? 0 : smod32(x, y);
}
static inline int64_t smod_safe64(int64_t x, int64_t y)
{
    return y == 0 ? 0 : smod64(x, y);
}
static inline int8_t squot8(int8_t x, int8_t y)
{
    return x / y;
}
static inline int16_t squot16(int16_t x, int16_t y)
{
    return x / y;
}
static inline int32_t squot32(int32_t x, int32_t y)
{
    return x / y;
}
static inline int64_t squot64(int64_t x, int64_t y)
{
    return x / y;
}
static inline int8_t srem8(int8_t x, int8_t y)
{
    return x % y;
}
static inline int16_t srem16(int16_t x, int16_t y)
{
    return x % y;
}
static inline int32_t srem32(int32_t x, int32_t y)
{
    return x % y;
}
static inline int64_t srem64(int64_t x, int64_t y)
{
    return x % y;
}
static inline int8_t squot_safe8(int8_t x, int8_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline int16_t squot_safe16(int16_t x, int16_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline int32_t squot_safe32(int32_t x, int32_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline int64_t squot_safe64(int64_t x, int64_t y)
{
    return y == 0 ? 0 : x / y;
}
static inline int8_t srem_safe8(int8_t x, int8_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline int16_t srem_safe16(int16_t x, int16_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline int32_t srem_safe32(int32_t x, int32_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline int64_t srem_safe64(int64_t x, int64_t y)
{
    return y == 0 ? 0 : x % y;
}
static inline int8_t smin8(int8_t x, int8_t y)
{
    return x < y ? x : y;
}
static inline int16_t smin16(int16_t x, int16_t y)
{
    return x < y ? x : y;
}
static inline int32_t smin32(int32_t x, int32_t y)
{
    return x < y ? x : y;
}
static inline int64_t smin64(int64_t x, int64_t y)
{
    return x < y ? x : y;
}
static inline uint8_t umin8(uint8_t x, uint8_t y)
{
    return x < y ? x : y;
}
static inline uint16_t umin16(uint16_t x, uint16_t y)
{
    return x < y ? x : y;
}
static inline uint32_t umin32(uint32_t x, uint32_t y)
{
    return x < y ? x : y;
}
static inline uint64_t umin64(uint64_t x, uint64_t y)
{
    return x < y ? x : y;
}
static inline int8_t smax8(int8_t x, int8_t y)
{
    return x < y ? y : x;
}
static inline int16_t smax16(int16_t x, int16_t y)
{
    return x < y ? y : x;
}
static inline int32_t smax32(int32_t x, int32_t y)
{
    return x < y ? y : x;
}
static inline int64_t smax64(int64_t x, int64_t y)
{
    return x < y ? y : x;
}
static inline uint8_t umax8(uint8_t x, uint8_t y)
{
    return x < y ? y : x;
}
static inline uint16_t umax16(uint16_t x, uint16_t y)
{
    return x < y ? y : x;
}
static inline uint32_t umax32(uint32_t x, uint32_t y)
{
    return x < y ? y : x;
}
static inline uint64_t umax64(uint64_t x, uint64_t y)
{
    return x < y ? y : x;
}
static inline uint8_t shl8(uint8_t x, uint8_t y)
{
    return x << y;
}
static inline uint16_t shl16(uint16_t x, uint16_t y)
{
    return x << y;
}
static inline uint32_t shl32(uint32_t x, uint32_t y)
{
    return x << y;
}
static inline uint64_t shl64(uint64_t x, uint64_t y)
{
    return x << y;
}
static inline uint8_t lshr8(uint8_t x, uint8_t y)
{
    return x >> y;
}
static inline uint16_t lshr16(uint16_t x, uint16_t y)
{
    return x >> y;
}
static inline uint32_t lshr32(uint32_t x, uint32_t y)
{
    return x >> y;
}
static inline uint64_t lshr64(uint64_t x, uint64_t y)
{
    return x >> y;
}
static inline int8_t ashr8(int8_t x, int8_t y)
{
    return x >> y;
}
static inline int16_t ashr16(int16_t x, int16_t y)
{
    return x >> y;
}
static inline int32_t ashr32(int32_t x, int32_t y)
{
    return x >> y;
}
static inline int64_t ashr64(int64_t x, int64_t y)
{
    return x >> y;
}
static inline uint8_t and8(uint8_t x, uint8_t y)
{
    return x & y;
}
static inline uint16_t and16(uint16_t x, uint16_t y)
{
    return x & y;
}
static inline uint32_t and32(uint32_t x, uint32_t y)
{
    return x & y;
}
static inline uint64_t and64(uint64_t x, uint64_t y)
{
    return x & y;
}
static inline uint8_t or8(uint8_t x, uint8_t y)
{
    return x | y;
}
static inline uint16_t or16(uint16_t x, uint16_t y)
{
    return x | y;
}
static inline uint32_t or32(uint32_t x, uint32_t y)
{
    return x | y;
}
static inline uint64_t or64(uint64_t x, uint64_t y)
{
    return x | y;
}
static inline uint8_t xor8(uint8_t x, uint8_t y)
{
    return x ^ y;
}
static inline uint16_t xor16(uint16_t x, uint16_t y)
{
    return x ^ y;
}
static inline uint32_t xor32(uint32_t x, uint32_t y)
{
    return x ^ y;
}
static inline uint64_t xor64(uint64_t x, uint64_t y)
{
    return x ^ y;
}
static inline bool ult8(uint8_t x, uint8_t y)
{
    return x < y;
}
static inline bool ult16(uint16_t x, uint16_t y)
{
    return x < y;
}
static inline bool ult32(uint32_t x, uint32_t y)
{
    return x < y;
}
static inline bool ult64(uint64_t x, uint64_t y)
{
    return x < y;
}
static inline bool ule8(uint8_t x, uint8_t y)
{
    return x <= y;
}
static inline bool ule16(uint16_t x, uint16_t y)
{
    return x <= y;
}
static inline bool ule32(uint32_t x, uint32_t y)
{
    return x <= y;
}
static inline bool ule64(uint64_t x, uint64_t y)
{
    return x <= y;
}
static inline bool slt8(int8_t x, int8_t y)
{
    return x < y;
}
static inline bool slt16(int16_t x, int16_t y)
{
    return x < y;
}
static inline bool slt32(int32_t x, int32_t y)
{
    return x < y;
}
static inline bool slt64(int64_t x, int64_t y)
{
    return x < y;
}
static inline bool sle8(int8_t x, int8_t y)
{
    return x <= y;
}
static inline bool sle16(int16_t x, int16_t y)
{
    return x <= y;
}
static inline bool sle32(int32_t x, int32_t y)
{
    return x <= y;
}
static inline bool sle64(int64_t x, int64_t y)
{
    return x <= y;
}
static inline int8_t pow8(int8_t x, int8_t y)
{
    int8_t res = 1, rem = y;
    
    while (rem != 0) {
        if (rem & 1)
            res *= x;
        rem >>= 1;
        x *= x;
    }
    return res;
}
static inline int16_t pow16(int16_t x, int16_t y)
{
    int16_t res = 1, rem = y;
    
    while (rem != 0) {
        if (rem & 1)
            res *= x;
        rem >>= 1;
        x *= x;
    }
    return res;
}
static inline int32_t pow32(int32_t x, int32_t y)
{
    int32_t res = 1, rem = y;
    
    while (rem != 0) {
        if (rem & 1)
            res *= x;
        rem >>= 1;
        x *= x;
    }
    return res;
}
static inline int64_t pow64(int64_t x, int64_t y)
{
    int64_t res = 1, rem = y;
    
    while (rem != 0) {
        if (rem & 1)
            res *= x;
        rem >>= 1;
        x *= x;
    }
    return res;
}
static inline bool itob_i8_bool(int8_t x)
{
    return x;
}
static inline bool itob_i16_bool(int16_t x)
{
    return x;
}
static inline bool itob_i32_bool(int32_t x)
{
    return x;
}
static inline bool itob_i64_bool(int64_t x)
{
    return x;
}
static inline int8_t btoi_bool_i8(bool x)
{
    return x;
}
static inline int16_t btoi_bool_i16(bool x)
{
    return x;
}
static inline int32_t btoi_bool_i32(bool x)
{
    return x;
}
static inline int64_t btoi_bool_i64(bool x)
{
    return x;
}
#define sext_i8_i8(x) ((int8_t) (int8_t) x)
#define sext_i8_i16(x) ((int16_t) (int8_t) x)
#define sext_i8_i32(x) ((int32_t) (int8_t) x)
#define sext_i8_i64(x) ((int64_t) (int8_t) x)
#define sext_i16_i8(x) ((int8_t) (int16_t) x)
#define sext_i16_i16(x) ((int16_t) (int16_t) x)
#define sext_i16_i32(x) ((int32_t) (int16_t) x)
#define sext_i16_i64(x) ((int64_t) (int16_t) x)
#define sext_i32_i8(x) ((int8_t) (int32_t) x)
#define sext_i32_i16(x) ((int16_t) (int32_t) x)
#define sext_i32_i32(x) ((int32_t) (int32_t) x)
#define sext_i32_i64(x) ((int64_t) (int32_t) x)
#define sext_i64_i8(x) ((int8_t) (int64_t) x)
#define sext_i64_i16(x) ((int16_t) (int64_t) x)
#define sext_i64_i32(x) ((int32_t) (int64_t) x)
#define sext_i64_i64(x) ((int64_t) (int64_t) x)
#define zext_i8_i8(x) ((int8_t) (uint8_t) x)
#define zext_i8_i16(x) ((int16_t) (uint8_t) x)
#define zext_i8_i32(x) ((int32_t) (uint8_t) x)
#define zext_i8_i64(x) ((int64_t) (uint8_t) x)
#define zext_i16_i8(x) ((int8_t) (uint16_t) x)
#define zext_i16_i16(x) ((int16_t) (uint16_t) x)
#define zext_i16_i32(x) ((int32_t) (uint16_t) x)
#define zext_i16_i64(x) ((int64_t) (uint16_t) x)
#define zext_i32_i8(x) ((int8_t) (uint32_t) x)
#define zext_i32_i16(x) ((int16_t) (uint32_t) x)
#define zext_i32_i32(x) ((int32_t) (uint32_t) x)
#define zext_i32_i64(x) ((int64_t) (uint32_t) x)
#define zext_i64_i8(x) ((int8_t) (uint64_t) x)
#define zext_i64_i16(x) ((int16_t) (uint64_t) x)
#define zext_i64_i32(x) ((int32_t) (uint64_t) x)
#define zext_i64_i64(x) ((int64_t) (uint64_t) x)
#if defined(__OPENCL_VERSION__)
static int32_t futrts_popc8(int8_t x)
{
    return popcount(x);
}
static int32_t futrts_popc16(int16_t x)
{
    return popcount(x);
}
static int32_t futrts_popc32(int32_t x)
{
    return popcount(x);
}
static int32_t futrts_popc64(int64_t x)
{
    return popcount(x);
}
#elif defined(__CUDA_ARCH__)
static int32_t futrts_popc8(int8_t x)
{
    return __popc(zext_i8_i32(x));
}
static int32_t futrts_popc16(int16_t x)
{
    return __popc(zext_i16_i32(x));
}
static int32_t futrts_popc32(int32_t x)
{
    return __popc(x);
}
static int32_t futrts_popc64(int64_t x)
{
    return __popcll(x);
}
#else
static int32_t futrts_popc8(int8_t x)
{
    int c = 0;
    
    for (; x; ++c)
        x &= x - 1;
    return c;
}
static int32_t futrts_popc16(int16_t x)
{
    int c = 0;
    
    for (; x; ++c)
        x &= x - 1;
    return c;
}
static int32_t futrts_popc32(int32_t x)
{
    int c = 0;
    
    for (; x; ++c)
        x &= x - 1;
    return c;
}
static int32_t futrts_popc64(int64_t x)
{
    int c = 0;
    
    for (; x; ++c)
        x &= x - 1;
    return c;
}
#endif
#if defined(__OPENCL_VERSION__)
static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)
{
    return mul_hi(a, b);
}
static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)
{
    return mul_hi(a, b);
}
static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)
{
    return mul_hi(a, b);
}
static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)
{
    return mul_hi(a, b);
}
#elif defined(__CUDA_ARCH__)
static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)
{
    uint16_t aa = a;
    uint16_t bb = b;
    
    return aa * bb >> 8;
}
static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)
{
    uint32_t aa = a;
    uint32_t bb = b;
    
    return aa * bb >> 16;
}
static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)
{
    return mulhi(a, b);
}
static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)
{
    return mul64hi(a, b);
}
#else
static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)
{
    uint16_t aa = a;
    uint16_t bb = b;
    
    return aa * bb >> 8;
}
static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)
{
    uint32_t aa = a;
    uint32_t bb = b;
    
    return aa * bb >> 16;
}
static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)
{
    uint64_t aa = a;
    uint64_t bb = b;
    
    return aa * bb >> 32;
}
static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)
{
    __uint128_t aa = a;
    __uint128_t bb = b;
    
    return aa * bb >> 64;
}
#endif
#if defined(__OPENCL_VERSION__)
static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c)
{
    return mad_hi(a, b, c);
}
static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c)
{
    return mad_hi(a, b, c);
}
static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c)
{
    return mad_hi(a, b, c);
}
static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c)
{
    return mad_hi(a, b, c);
}
#else
static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c)
{
    return futrts_mul_hi8(a, b) + c;
}
static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c)
{
    return futrts_mul_hi16(a, b) + c;
}
static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c)
{
    return futrts_mul_hi32(a, b) + c;
}
static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c)
{
    return futrts_mul_hi64(a, b) + c;
}
#endif
#if defined(__OPENCL_VERSION__)
static int32_t futrts_clzz8(int8_t x)
{
    return clz(x);
}
static int32_t futrts_clzz16(int16_t x)
{
    return clz(x);
}
static int32_t futrts_clzz32(int32_t x)
{
    return clz(x);
}
static int32_t futrts_clzz64(int64_t x)
{
    return clz(x);
}
#elif defined(__CUDA_ARCH__)
static int32_t futrts_clzz8(int8_t x)
{
    return __clz(zext_i8_i32(x)) - 24;
}
static int32_t futrts_clzz16(int16_t x)
{
    return __clz(zext_i16_i32(x)) - 16;
}
static int32_t futrts_clzz32(int32_t x)
{
    return __clz(x);
}
static int32_t futrts_clzz64(int64_t x)
{
    return __clzll(x);
}
#else
static int32_t futrts_clzz8(int8_t x)
{
    int n = 0;
    int bits = sizeof(x) * 8;
    
    for (int i = 0; i < bits; i++) {
        if (x < 0)
            break;
        n++;
        x <<= 1;
    }
    return n;
}
static int32_t futrts_clzz16(int16_t x)
{
    int n = 0;
    int bits = sizeof(x) * 8;
    
    for (int i = 0; i < bits; i++) {
        if (x < 0)
            break;
        n++;
        x <<= 1;
    }
    return n;
}
static int32_t futrts_clzz32(int32_t x)
{
    int n = 0;
    int bits = sizeof(x) * 8;
    
    for (int i = 0; i < bits; i++) {
        if (x < 0)
            break;
        n++;
        x <<= 1;
    }
    return n;
}
static int32_t futrts_clzz64(int64_t x)
{
    int n = 0;
    int bits = sizeof(x) * 8;
    
    for (int i = 0; i < bits; i++) {
        if (x < 0)
            break;
        n++;
        x <<= 1;
    }
    return n;
}
#endif
#if defined(__OPENCL_VERSION__)
static int32_t futrts_ctzz8(int8_t x)
{
    int i = 0;
    
    for (; i < 8 && (x & 1) == 0; i++, x >>= 1)
        ;
    return i;
}
static int32_t futrts_ctzz16(int16_t x)
{
    int i = 0;
    
    for (; i < 16 && (x & 1) == 0; i++, x >>= 1)
        ;
    return i;
}
static int32_t futrts_ctzz32(int32_t x)
{
    int i = 0;
    
    for (; i < 32 && (x & 1) == 0; i++, x >>= 1)
        ;
    return i;
}
static int32_t futrts_ctzz64(int64_t x)
{
    int i = 0;
    
    for (; i < 64 && (x & 1) == 0; i++, x >>= 1)
        ;
    return i;
}
#elif defined(__CUDA_ARCH__)
static int32_t futrts_ctzz8(int8_t x)
{
    int y = __ffs(x);
    
    return y == 0 ? 8 : y - 1;
}
static int32_t futrts_ctzz16(int16_t x)
{
    int y = __ffs(x);
    
    return y == 0 ? 16 : y - 1;
}
static int32_t futrts_ctzz32(int32_t x)
{
    int y = __ffs(x);
    
    return y == 0 ? 32 : y - 1;
}
static int32_t futrts_ctzz64(int64_t x)
{
    int y = __ffsll(x);
    
    return y == 0 ? 64 : y - 1;
}
#else
static int32_t futrts_ctzz8(int8_t x)
{
    return x == 0 ? 8 : __builtin_ctz((uint32_t) x);
}
static int32_t futrts_ctzz16(int16_t x)
{
    return x == 0 ? 16 : __builtin_ctz((uint32_t) x);
}
static int32_t futrts_ctzz32(int32_t x)
{
    return x == 0 ? 32 : __builtin_ctz(x);
}
static int32_t futrts_ctzz64(int64_t x)
{
    return x == 0 ? 64 : __builtin_ctzll(x);
}
#endif
static inline float fdiv32(float x, float y)
{
    return x / y;
}
static inline float fadd32(float x, float y)
{
    return x + y;
}
static inline float fsub32(float x, float y)
{
    return x - y;
}
static inline float fmul32(float x, float y)
{
    return x * y;
}
static inline float fmin32(float x, float y)
{
    return fmin(x, y);
}
static inline float fmax32(float x, float y)
{
    return fmax(x, y);
}
static inline float fpow32(float x, float y)
{
    return pow(x, y);
}
static inline bool cmplt32(float x, float y)
{
    return x < y;
}
static inline bool cmple32(float x, float y)
{
    return x <= y;
}
static inline float sitofp_i8_f32(int8_t x)
{
    return (float) x;
}
static inline float sitofp_i16_f32(int16_t x)
{
    return (float) x;
}
static inline float sitofp_i32_f32(int32_t x)
{
    return (float) x;
}
static inline float sitofp_i64_f32(int64_t x)
{
    return (float) x;
}
static inline float uitofp_i8_f32(uint8_t x)
{
    return (float) x;
}
static inline float uitofp_i16_f32(uint16_t x)
{
    return (float) x;
}
static inline float uitofp_i32_f32(uint32_t x)
{
    return (float) x;
}
static inline float uitofp_i64_f32(uint64_t x)
{
    return (float) x;
}
static inline int8_t fptosi_f32_i8(float x)
{
    return (int8_t) x;
}
static inline int16_t fptosi_f32_i16(float x)
{
    return (int16_t) x;
}
static inline int32_t fptosi_f32_i32(float x)
{
    return (int32_t) x;
}
static inline int64_t fptosi_f32_i64(float x)
{
    return (int64_t) x;
}
static inline uint8_t fptoui_f32_i8(float x)
{
    return (uint8_t) x;
}
static inline uint16_t fptoui_f32_i16(float x)
{
    return (uint16_t) x;
}
static inline uint32_t fptoui_f32_i32(float x)
{
    return (uint32_t) x;
}
static inline uint64_t fptoui_f32_i64(float x)
{
    return (uint64_t) x;
}
static inline float futrts_log32(float x)
{
    return log(x);
}
static inline float futrts_log2_32(float x)
{
    return log2(x);
}
static inline float futrts_log10_32(float x)
{
    return log10(x);
}
static inline float futrts_sqrt32(float x)
{
    return sqrt(x);
}
static inline float futrts_exp32(float x)
{
    return exp(x);
}
static inline float futrts_cos32(float x)
{
    return cos(x);
}
static inline float futrts_sin32(float x)
{
    return sin(x);
}
static inline float futrts_tan32(float x)
{
    return tan(x);
}
static inline float futrts_acos32(float x)
{
    return acos(x);
}
static inline float futrts_asin32(float x)
{
    return asin(x);
}
static inline float futrts_atan32(float x)
{
    return atan(x);
}
static inline float futrts_cosh32(float x)
{
    return cosh(x);
}
static inline float futrts_sinh32(float x)
{
    return sinh(x);
}
static inline float futrts_tanh32(float x)
{
    return tanh(x);
}
static inline float futrts_acosh32(float x)
{
    return acosh(x);
}
static inline float futrts_asinh32(float x)
{
    return asinh(x);
}
static inline float futrts_atanh32(float x)
{
    return atanh(x);
}
static inline float futrts_atan2_32(float x, float y)
{
    return atan2(x, y);
}
static inline float futrts_gamma32(float x)
{
    return tgamma(x);
}
static inline float futrts_lgamma32(float x)
{
    return lgamma(x);
}
static inline bool futrts_isnan32(float x)
{
    return isnan(x);
}
static inline bool futrts_isinf32(float x)
{
    return isinf(x);
}
static inline int32_t futrts_to_bits32(float x)
{
    union {
        float f;
        int32_t t;
    } p;
    
    p.f = x;
    return p.t;
}
static inline float futrts_from_bits32(int32_t x)
{
    union {
        int32_t f;
        float t;
    } p;
    
    p.f = x;
    return p.t;
}
static inline double fsignum32(double x)
{
    return futrts_isnan32(x) ? x : (x > 0) - (x < 0);
}
#ifdef __OPENCL_VERSION__
static inline float fmod32(float x, float y)
{
    return fmod(x, y);
}
static inline float futrts_round32(float x)
{
    return rint(x);
}
static inline float futrts_floor32(float x)
{
    return floor(x);
}
static inline float futrts_ceil32(float x)
{
    return ceil(x);
}
static inline float futrts_lerp32(float v0, float v1, float t)
{
    return mix(v0, v1, t);
}
static inline float futrts_mad32(float a, float b, float c)
{
    return mad(a, b, c);
}
static inline float futrts_fma32(float a, float b, float c)
{
    return fma(a, b, c);
}
#else
static inline float fmod32(float x, float y)
{
    return fmodf(x, y);
}
static inline float futrts_round32(float x)
{
    return rintf(x);
}
static inline float futrts_floor32(float x)
{
    return floorf(x);
}
static inline float futrts_ceil32(float x)
{
    return ceilf(x);
}
static inline float futrts_lerp32(float v0, float v1, float t)
{
    return v0 + (v1 - v0) * t;
}
static inline float futrts_mad32(float a, float b, float c)
{
    return a * b + c;
}
static inline float futrts_fma32(float a, float b, float c)
{
    return fmaf(a, b, c);
}
#endif
// Start of atomics.h

inline int32_t atomic_xchg_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicExch((int32_t*)p, x);
#else
  return atomic_xor(p, x);
#endif
}

inline int32_t atomic_xchg_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicExch((int32_t*)p, x);
#else
  return atomic_xor(p, x);
#endif
}

inline int32_t atomic_cmpxchg_i32_global(volatile __global int32_t *p,
                                         int32_t cmp, int32_t val) {
#ifdef FUTHARK_CUDA
  return atomicCAS((int32_t*)p, cmp, val);
#else
  return atomic_cmpxchg(p, cmp, val);
#endif
}

inline int32_t atomic_cmpxchg_i32_local(volatile __local int32_t *p,
                                        int32_t cmp, int32_t val) {
#ifdef FUTHARK_CUDA
  return atomicCAS((int32_t*)p, cmp, val);
#else
  return atomic_cmpxchg(p, cmp, val);
#endif
}

inline int32_t atomic_add_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicAdd((int32_t*)p, x);
#else
  return atomic_add(p, x);
#endif
}

inline int32_t atomic_add_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicAdd((int32_t*)p, x);
#else
  return atomic_add(p, x);
#endif
}

inline float atomic_fadd_f32_global(volatile __global float *p, float x) {
#ifdef FUTHARK_CUDA
  return atomicAdd((float*)p, x);
#else
  union { int32_t i; float f; } old;
  union { int32_t i; float f; } assumed;
  old.f = *p;
  do {
    assumed.f = old.f;
    old.f = old.f + x;
    old.i = atomic_cmpxchg_i32_global((volatile __global int32_t*)p, assumed.i, old.i);
  } while (assumed.i != old.i);
  return old.f;
#endif
}

inline float atomic_fadd_f32_local(volatile __local float *p, float x) {
#ifdef FUTHARK_CUDA
  return atomicAdd((float*)p, x);
#else
  union { int32_t i; float f; } old;
  union { int32_t i; float f; } assumed;
  old.f = *p;
  do {
    assumed.f = old.f;
    old.f = old.f + x;
    old.i = atomic_cmpxchg_i32_local((volatile __local int32_t*)p, assumed.i, old.i);
  } while (assumed.i != old.i);
  return old.f;
#endif
}

inline int32_t atomic_smax_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((int32_t*)p, x);
#else
  return atomic_max(p, x);
#endif
}

inline int32_t atomic_smax_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((int32_t*)p, x);
#else
  return atomic_max(p, x);
#endif
}

inline int32_t atomic_smin_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((int32_t*)p, x);
#else
  return atomic_min(p, x);
#endif
}

inline int32_t atomic_smin_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((int32_t*)p, x);
#else
  return atomic_min(p, x);
#endif
}

inline uint32_t atomic_umax_i32_global(volatile __global uint32_t *p, uint32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((uint32_t*)p, x);
#else
  return atomic_max(p, x);
#endif
}

inline uint32_t atomic_umax_i32_local(volatile __local uint32_t *p, uint32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((uint32_t*)p, x);
#else
  return atomic_max(p, x);
#endif
}

inline uint32_t atomic_umin_i32_global(volatile __global uint32_t *p, uint32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((uint32_t*)p, x);
#else
  return atomic_min(p, x);
#endif
}

inline uint32_t atomic_umin_i32_local(volatile __local uint32_t *p, uint32_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((uint32_t*)p, x);
#else
  return atomic_min(p, x);
#endif
}

inline int32_t atomic_and_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicAnd((int32_t*)p, x);
#else
  return atomic_and(p, x);
#endif
}

inline int32_t atomic_and_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicAnd((int32_t*)p, x);
#else
  return atomic_and(p, x);
#endif
}

inline int32_t atomic_or_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicOr((int32_t*)p, x);
#else
  return atomic_or(p, x);
#endif
}

inline int32_t atomic_or_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicOr((int32_t*)p, x);
#else
  return atomic_or(p, x);
#endif
}

inline int32_t atomic_xor_i32_global(volatile __global int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicXor((int32_t*)p, x);
#else
  return atomic_xor(p, x);
#endif
}

inline int32_t atomic_xor_i32_local(volatile __local int32_t *p, int32_t x) {
#ifdef FUTHARK_CUDA
  return atomicXor((int32_t*)p, x);
#else
  return atomic_xor(p, x);
#endif
}

// Start of 64 bit atomics

inline int64_t atomic_xchg_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicExch((uint64_t*)p, x);
#else
  return atom_xor(p, x);
#endif
}

inline int64_t atomic_xchg_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicExch((uint64_t*)p, x);
#else
  return atom_xor(p, x);
#endif
}

inline int64_t atomic_cmpxchg_i64_global(volatile __global int64_t *p,
                                         int64_t cmp, int64_t val) {
#ifdef FUTHARK_CUDA
  return atomicCAS((uint64_t*)p, cmp, val);
#else
  return atom_cmpxchg(p, cmp, val);
#endif
}

inline int64_t atomic_cmpxchg_i64_local(volatile __local int64_t *p,
                                        int64_t cmp, int64_t val) {
#ifdef FUTHARK_CUDA
  return atomicCAS((uint64_t*)p, cmp, val);
#else
  return atom_cmpxchg(p, cmp, val);
#endif
}

inline int64_t atomic_add_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicAdd((uint64_t*)p, x);
#else
  return atom_add(p, x);
#endif
}

inline int64_t atomic_add_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicAdd((uint64_t*)p, x);
#else
  return atom_add(p, x);
#endif
}

inline double atomic_fadd_f64_global(volatile __global double *p, double x) {
#if defined(FUTHARK_CUDA) && __CUDA_ARCH__ >= 600
  return atomicAdd((double*)p, x);
#else
  union { int64_t i; double f; } old;
  union { int64_t i; double f; } assumed;
  old.f = *p;
  do {
    assumed.f = old.f;
    old.f = old.f + x;
    old.i = atomic_cmpxchg_i64_global((volatile __global int64_t*)p, assumed.i, old.i);
  } while (assumed.i != old.i);
  return old.f;
#endif
}

inline double atomic_fadd_f64_local(volatile __local double *p, double x) {
#if defined(FUTHARK_CUDA) && __CUDA_ARCH__ >= 600
  return atomicAdd((double*)p, x);
#else
  union { int64_t i; double f; } old;
  union { int64_t i; double f; } assumed;
  old.f = *p;
  do {
    assumed.f = old.f;
    old.f = old.f + x;
    old.i = atomic_cmpxchg_i64_local((volatile __local int64_t*)p, assumed.i, old.i);
  } while (assumed.i != old.i);
  return old.f;
#endif
}

inline int64_t atomic_smax_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((int64_t*)p, x);
#else
  return atom_max(p, x);
#endif
}

inline int64_t atomic_smax_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((int64_t*)p, x);
#else
  return atom_max(p, x);
#endif
}

inline int64_t atomic_smin_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((int64_t*)p, x);
#else
  return atom_min(p, x);
#endif
}

inline int64_t atomic_smin_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((int64_t*)p, x);
#else
  return atom_min(p, x);
#endif
}

inline uint64_t atomic_umax_i64_global(volatile __global uint64_t *p, uint64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((uint64_t*)p, x);
#else
  return atom_max(p, x);
#endif
}

inline uint64_t atomic_umax_i64_local(volatile __local uint64_t *p, uint64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMax((uint64_t*)p, x);
#else
  return atom_max(p, x);
#endif
}

inline uint64_t atomic_umin_i64_global(volatile __global uint64_t *p, uint64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((uint64_t*)p, x);
#else
  return atom_min(p, x);
#endif
}

inline uint64_t atomic_umin_i64_local(volatile __local uint64_t *p, uint64_t x) {
#ifdef FUTHARK_CUDA
  return atomicMin((uint64_t*)p, x);
#else
  return atom_min(p, x);
#endif
}

inline int64_t atomic_and_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicAnd((int64_t*)p, x);
#else
  return atom_and(p, x);
#endif
}

inline int64_t atomic_and_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicAnd((int64_t*)p, x);
#else
  return atom_and(p, x);
#endif
}

inline int64_t atomic_or_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicOr((int64_t*)p, x);
#else
  return atom_or(p, x);
#endif
}

inline int64_t atomic_or_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicOr((int64_t*)p, x);
#else
  return atom_or(p, x);
#endif
}

inline int64_t atomic_xor_i64_global(volatile __global int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicXor((int64_t*)p, x);
#else
  return atom_xor(p, x);
#endif
}

inline int64_t atomic_xor_i64_local(volatile __local int64_t *p, int64_t x) {
#ifdef FUTHARK_CUDA
  return atomicXor((int64_t*)p, x);
#else
  return atom_xor(p, x);
#endif
}

// End of atomics.h




__kernel void builtinzhreplicate_f32zireplicate_46318(int32_t num_elems_46315,
                                                      float val_46316, __global
                                                      unsigned char *mem_46314)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    int32_t replicate_gtid_46318;
    int32_t replicate_ltid_46319;
    int32_t replicate_gid_46320;
    
    replicate_gtid_46318 = get_global_id(0);
    replicate_ltid_46319 = get_local_id(0);
    replicate_gid_46320 = get_group_id(0);
    if (slt64(replicate_gtid_46318, num_elems_46315)) {
        ((__global float *) mem_46314)[sext_i32_i64(replicate_gtid_46318)] =
            val_46316;
    }
    
  error_0:
    return;
}
__kernel void builtinzhreplicate_i32zireplicate_46327(int32_t num_elems_46324,
                                                      int32_t val_46325,
                                                      __global
                                                      unsigned char *mem_46323)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    int32_t replicate_gtid_46327;
    int32_t replicate_ltid_46328;
    int32_t replicate_gid_46329;
    
    replicate_gtid_46327 = get_global_id(0);
    replicate_ltid_46328 = get_local_id(0);
    replicate_gid_46329 = get_group_id(0);
    if (slt64(replicate_gtid_46327, num_elems_46324)) {
        ((__global int32_t *) mem_46323)[sext_i32_i64(replicate_gtid_46327)] =
            val_46325;
    }
    
  error_0:
    return;
}
__kernel void convertToFloatzisegmap_29816(__global int *global_failure,
                                           int64_t m_27757, int64_t n_27758,
                                           int64_t p_27759,
                                           int16_t nan_value_27760, __global
                                           unsigned char *images_mem_44380,
                                           __global unsigned char *mem_44385)
{
    #define segmap_group_sizze_29877 (convertToFloatzisegmap_group_sizze_29820)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45677;
    int32_t local_tid_45678;
    int64_t group_sizze_45681;
    int32_t wave_sizze_45680;
    int32_t group_tid_45679;
    
    global_tid_45677 = get_global_id(0);
    local_tid_45678 = get_local_id(0);
    group_sizze_45681 = get_local_size(0);
    wave_sizze_45680 = LOCKSTEP_WIDTH;
    group_tid_45679 = get_group_id(0);
    
    int32_t phys_tid_29816;
    
    phys_tid_29816 = global_tid_45677;
    
    int64_t gtid_29813;
    
    gtid_29813 = squot64(sext_i32_i64(group_tid_45679) *
                         segmap_group_sizze_29877 +
                         sext_i32_i64(local_tid_45678), n_27758 * p_27759);
    
    int64_t gtid_29814;
    
    gtid_29814 = squot64(sext_i32_i64(group_tid_45679) *
                         segmap_group_sizze_29877 +
                         sext_i32_i64(local_tid_45678) -
                         squot64(sext_i32_i64(group_tid_45679) *
                                 segmap_group_sizze_29877 +
                                 sext_i32_i64(local_tid_45678), n_27758 *
                                 p_27759) * (n_27758 * p_27759), p_27759);
    
    int64_t gtid_29815;
    
    gtid_29815 = sext_i32_i64(group_tid_45679) * segmap_group_sizze_29877 +
        sext_i32_i64(local_tid_45678) - squot64(sext_i32_i64(group_tid_45679) *
                                                segmap_group_sizze_29877 +
                                                sext_i32_i64(local_tid_45678),
                                                n_27758 * p_27759) * (n_27758 *
                                                                      p_27759) -
        squot64(sext_i32_i64(group_tid_45679) * segmap_group_sizze_29877 +
                sext_i32_i64(local_tid_45678) -
                squot64(sext_i32_i64(group_tid_45679) *
                        segmap_group_sizze_29877 +
                        sext_i32_i64(local_tid_45678), n_27758 * p_27759) *
                (n_27758 * p_27759), p_27759) * p_27759;
    if ((slt64(gtid_29813, m_27757) && slt64(gtid_29814, n_27758)) &&
        slt64(gtid_29815, p_27759)) {
        int16_t x_29880 = ((__global int16_t *) images_mem_44380)[gtid_29813 *
                                                                  (p_27759 *
                                                                   n_27758) +
                                                                  gtid_29814 *
                                                                  p_27759 +
                                                                  gtid_29815];
        bool cond_29881 = x_29880 == nan_value_27760;
        float defunc_0_f_res_29882;
        
        if (cond_29881) {
            defunc_0_f_res_29882 = NAN;
        } else {
            float i16_res_29883 = sitofp_i16_f32(x_29880);
            
            defunc_0_f_res_29882 = i16_res_29883;
        }
        ((__global float *) mem_44385)[gtid_29813 * (p_27759 * n_27758) +
                                       gtid_29814 * p_27759 + gtid_29815] =
            defunc_0_f_res_29882;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_29877
}
__kernel void gpu_map_transpose_f32(__local volatile
                                    int64_t *block_9_backing_aligned_0,
                                    int32_t destoffset_1, int32_t srcoffset_3,
                                    int32_t num_arrays_4, int32_t x_elems_5,
                                    int32_t y_elems_6, int32_t mulx_7,
                                    int32_t muly_8, __global
                                    unsigned char *destmem_0, __global
                                    unsigned char *srcmem_2)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict block_9_backing_0 = (__local volatile
                                                         char *) block_9_backing_aligned_0;
    __local char *block_9;
    
    block_9 = (__local char *) block_9_backing_0;
    
    int32_t get_global_id_0_37;
    
    get_global_id_0_37 = get_global_id(0);
    
    int32_t get_local_id_0_38;
    
    get_local_id_0_38 = get_local_id(0);
    
    int32_t get_local_id_1_39;
    
    get_local_id_1_39 = get_local_id(1);
    
    int32_t get_group_id_0_40;
    
    get_group_id_0_40 = get_group_id(0);
    
    int32_t get_group_id_1_41;
    
    get_group_id_1_41 = get_group_id(1);
    
    int32_t get_group_id_2_42;
    
    get_group_id_2_42 = get_group_id(2);
    
    int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6;
    int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30;
    int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30;
    int32_t x_index_31 = get_global_id_0_37;
    int32_t y_index_32 = get_group_id_1_41 * 32 + get_local_id_1_39;
    
    if (slt32(x_index_31, x_elems_5)) {
        for (int32_t j_43 = 0; j_43 < 4; j_43++) {
            int32_t index_in_35 = (y_index_32 + j_43 * 8) * x_elems_5 +
                    x_index_31;
            
            if (slt32(y_index_32 + j_43 * 8, y_elems_6)) {
                ((__local float *) block_9)[sext_i32_i64((get_local_id_1_39 +
                                                          j_43 * 8) * 33 +
                                            get_local_id_0_38)] = ((__global
                                                                    float *) srcmem_2)[sext_i32_i64(idata_offset_34 +
                                                                                       index_in_35)];
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    x_index_31 = get_group_id_1_41 * 32 + get_local_id_0_38;
    y_index_32 = get_group_id_0_40 * 32 + get_local_id_1_39;
    if (slt32(x_index_31, y_elems_6)) {
        for (int32_t j_43 = 0; j_43 < 4; j_43++) {
            int32_t index_out_36 = (y_index_32 + j_43 * 8) * y_elems_6 +
                    x_index_31;
            
            if (slt32(y_index_32 + j_43 * 8, x_elems_5)) {
                ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 +
                                               index_out_36)] = ((__local
                                                                  float *) block_9)[sext_i32_i64(get_local_id_0_38 *
                                                                                    33 +
                                                                                    get_local_id_1_39 +
                                                                                    j_43 *
                                                                                    8)];
            }
        }
    }
    
  error_0:
    return;
}
__kernel void gpu_map_transpose_f32_low_height(__local volatile
                                               int64_t *block_9_backing_aligned_0,
                                               int32_t destoffset_1,
                                               int32_t srcoffset_3,
                                               int32_t num_arrays_4,
                                               int32_t x_elems_5,
                                               int32_t y_elems_6,
                                               int32_t mulx_7, int32_t muly_8,
                                               __global
                                               unsigned char *destmem_0,
                                               __global unsigned char *srcmem_2)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict block_9_backing_0 = (__local volatile
                                                         char *) block_9_backing_aligned_0;
    __local char *block_9;
    
    block_9 = (__local char *) block_9_backing_0;
    
    int32_t get_global_id_0_37;
    
    get_global_id_0_37 = get_global_id(0);
    
    int32_t get_local_id_0_38;
    
    get_local_id_0_38 = get_local_id(0);
    
    int32_t get_local_id_1_39;
    
    get_local_id_1_39 = get_local_id(1);
    
    int32_t get_group_id_0_40;
    
    get_group_id_0_40 = get_group_id(0);
    
    int32_t get_group_id_1_41;
    
    get_group_id_1_41 = get_group_id(1);
    
    int32_t get_group_id_2_42;
    
    get_group_id_2_42 = get_group_id(2);
    
    int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6;
    int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30;
    int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30;
    int32_t x_index_31 = get_group_id_0_40 * 16 * mulx_7 + get_local_id_0_38 +
            srem32(get_local_id_1_39, mulx_7) * 16;
    int32_t y_index_32 = get_group_id_1_41 * 16 + squot32(get_local_id_1_39,
                                                          mulx_7);
    int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31;
    
    if (slt32(x_index_31, x_elems_5) && slt32(y_index_32, y_elems_6)) {
        ((__local float *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 +
                                    get_local_id_0_38)] = ((__global
                                                            float *) srcmem_2)[sext_i32_i64(idata_offset_34 +
                                                                               index_in_35)];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    x_index_31 = get_group_id_1_41 * 16 + squot32(get_local_id_0_38, mulx_7);
    y_index_32 = get_group_id_0_40 * 16 * mulx_7 + get_local_id_1_39 +
        srem32(get_local_id_0_38, mulx_7) * 16;
    
    int32_t index_out_36 = y_index_32 * y_elems_6 + x_index_31;
    
    if (slt32(x_index_31, y_elems_6) && slt32(y_index_32, x_elems_5)) {
        ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 +
                                       index_out_36)] = ((__local
                                                          float *) block_9)[sext_i32_i64(get_local_id_0_38 *
                                                                            17 +
                                                                            get_local_id_1_39)];
    }
    
  error_0:
    return;
}
__kernel void gpu_map_transpose_f32_low_width(__local volatile
                                              int64_t *block_9_backing_aligned_0,
                                              int32_t destoffset_1,
                                              int32_t srcoffset_3,
                                              int32_t num_arrays_4,
                                              int32_t x_elems_5,
                                              int32_t y_elems_6, int32_t mulx_7,
                                              int32_t muly_8, __global
                                              unsigned char *destmem_0, __global
                                              unsigned char *srcmem_2)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict block_9_backing_0 = (__local volatile
                                                         char *) block_9_backing_aligned_0;
    __local char *block_9;
    
    block_9 = (__local char *) block_9_backing_0;
    
    int32_t get_global_id_0_37;
    
    get_global_id_0_37 = get_global_id(0);
    
    int32_t get_local_id_0_38;
    
    get_local_id_0_38 = get_local_id(0);
    
    int32_t get_local_id_1_39;
    
    get_local_id_1_39 = get_local_id(1);
    
    int32_t get_group_id_0_40;
    
    get_group_id_0_40 = get_group_id(0);
    
    int32_t get_group_id_1_41;
    
    get_group_id_1_41 = get_group_id(1);
    
    int32_t get_group_id_2_42;
    
    get_group_id_2_42 = get_group_id(2);
    
    int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6;
    int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30;
    int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30;
    int32_t x_index_31 = get_group_id_0_40 * 16 + squot32(get_local_id_0_38,
                                                          muly_8);
    int32_t y_index_32 = get_group_id_1_41 * 16 * muly_8 + get_local_id_1_39 +
            srem32(get_local_id_0_38, muly_8) * 16;
    int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31;
    
    if (slt32(x_index_31, x_elems_5) && slt32(y_index_32, y_elems_6)) {
        ((__local float *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 +
                                    get_local_id_0_38)] = ((__global
                                                            float *) srcmem_2)[sext_i32_i64(idata_offset_34 +
                                                                               index_in_35)];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    x_index_31 = get_group_id_1_41 * 16 * muly_8 + get_local_id_0_38 +
        srem32(get_local_id_1_39, muly_8) * 16;
    y_index_32 = get_group_id_0_40 * 16 + squot32(get_local_id_1_39, muly_8);
    
    int32_t index_out_36 = y_index_32 * y_elems_6 + x_index_31;
    
    if (slt32(x_index_31, y_elems_6) && slt32(y_index_32, x_elems_5)) {
        ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 +
                                       index_out_36)] = ((__local
                                                          float *) block_9)[sext_i32_i64(get_local_id_0_38 *
                                                                            17 +
                                                                            get_local_id_1_39)];
    }
    
  error_0:
    return;
}
__kernel void gpu_map_transpose_f32_small(__local volatile
                                          int64_t *block_9_backing_aligned_0,
                                          int32_t destoffset_1,
                                          int32_t srcoffset_3,
                                          int32_t num_arrays_4,
                                          int32_t x_elems_5, int32_t y_elems_6,
                                          int32_t mulx_7, int32_t muly_8,
                                          __global unsigned char *destmem_0,
                                          __global unsigned char *srcmem_2)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict block_9_backing_0 = (__local volatile
                                                         char *) block_9_backing_aligned_0;
    __local char *block_9;
    
    block_9 = (__local char *) block_9_backing_0;
    
    int32_t get_global_id_0_37;
    
    get_global_id_0_37 = get_global_id(0);
    
    int32_t get_local_id_0_38;
    
    get_local_id_0_38 = get_local_id(0);
    
    int32_t get_local_id_1_39;
    
    get_local_id_1_39 = get_local_id(1);
    
    int32_t get_group_id_0_40;
    
    get_group_id_0_40 = get_group_id(0);
    
    int32_t get_group_id_1_41;
    
    get_group_id_1_41 = get_group_id(1);
    
    int32_t get_group_id_2_42;
    
    get_group_id_2_42 = get_group_id(2);
    
    int32_t our_array_offset_30 = squot32(get_global_id_0_37, y_elems_6 *
                                          x_elems_5) * (y_elems_6 * x_elems_5);
    int32_t x_index_31 = squot32(srem32(get_global_id_0_37, y_elems_6 *
                                        x_elems_5), y_elems_6);
    int32_t y_index_32 = srem32(get_global_id_0_37, y_elems_6);
    int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30;
    int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30;
    int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31;
    int32_t index_out_36 = x_index_31 * y_elems_6 + y_index_32;
    
    if (slt32(get_global_id_0_37, x_elems_5 * y_elems_6 * num_arrays_4)) {
        ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 +
                                       index_out_36)] = ((__global
                                                          float *) srcmem_2)[sext_i32_i64(idata_offset_34 +
                                                                             index_in_35)];
    }
    
  error_0:
    return;
}
__kernel void mainzicopy_45849(int64_t m_29166, int64_t nm_29314,
                               int64_t ctx_param_ext_44580,
                               int64_t ctx_param_ext_44581,
                               int64_t ctx_param_ext_44583, __global
                               unsigned char *mem_param_44585, __global
                               unsigned char *mem_44590)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    int32_t copy_gtid_45849;
    int32_t copy_ltid_45850;
    int32_t copy_gid_45851;
    
    copy_gtid_45849 = get_global_id(0);
    copy_ltid_45850 = get_local_id(0);
    copy_gid_45851 = get_group_id(0);
    if (slt64(sext_i32_i64(copy_gtid_45849), m_29166 * nm_29314)) {
        ((__global float *) mem_44590)[(sext_i32_i64(copy_gtid_45849) -
                                        squot64(sext_i32_i64(copy_gtid_45849),
                                                nm_29314) * nm_29314) *
                                       m_29166 +
                                       squot64(sext_i32_i64(copy_gtid_45849),
                                               nm_29314)] = ((__global
                                                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                                                        (squot64(sext_i32_i64(copy_gtid_45849),
                                                                                                 nm_29314) *
                                                                                         ctx_param_ext_44581 +
                                                                                         (sext_i32_i64(copy_gtid_45849) -
                                                                                          squot64(sext_i32_i64(copy_gtid_45849),
                                                                                                  nm_29314) *
                                                                                          nm_29314) *
                                                                                         ctx_param_ext_44583)];
    }
    
  error_0:
    return;
}
__kernel void mainziscan_stage1_41042(__global int *global_failure,
                                      __local volatile
                                      int64_t *scan_arr_mem_46261_backing_aligned_0,
                                      int64_t N_29165, int64_t m_29166,
                                      int32_t num_threads_46255, __global
                                      unsigned char *images_mem_44381, __global
                                      unsigned char *defunc_3_map_res_mem_45140,
                                      __global unsigned char *mem_45163,
                                      __global unsigned char *mem_45166)
{
    #define segscan_group_sizze_41059 (mainzisegscan_group_sizze_41036)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46261_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46261_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46256;
    int32_t local_tid_46257;
    int64_t group_sizze_46260;
    int32_t wave_sizze_46259;
    int32_t group_tid_46258;
    
    global_tid_46256 = get_global_id(0);
    local_tid_46257 = get_local_id(0);
    group_sizze_46260 = get_local_size(0);
    wave_sizze_46259 = LOCKSTEP_WIDTH;
    group_tid_46258 = get_group_id(0);
    
    int32_t phys_tid_41042;
    
    phys_tid_41042 = global_tid_46256;
    
    __local char *scan_arr_mem_46261;
    
    scan_arr_mem_46261 = (__local char *) scan_arr_mem_46261_backing_0;
    
    int64_t x_41064;
    int64_t x_41065;
    
    x_41064 = (int64_t) 0;
    for (int64_t j_46263 = 0; j_46263 < sdiv_up64(m_29166 * N_29165,
                                                  sext_i32_i64(num_threads_46255));
         j_46263++) {
        int64_t chunk_offset_46264 = segscan_group_sizze_41059 * j_46263 +
                sext_i32_i64(group_tid_46258) * (segscan_group_sizze_41059 *
                                                 sdiv_up64(m_29166 * N_29165,
                                                           sext_i32_i64(num_threads_46255)));
        int64_t flat_idx_46265 = chunk_offset_46264 +
                sext_i32_i64(local_tid_46257);
        int64_t gtid_41033 = squot64(flat_idx_46265, N_29165);
        int64_t gtid_41041 = flat_idx_46265 - squot64(flat_idx_46265, N_29165) *
                N_29165;
        
        // threads in bounds read input
        {
            if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) {
                float x_41069 = ((__global
                                  float *) images_mem_44381)[gtid_41033 *
                                                             N_29165 +
                                                             gtid_41041];
                bool isnan_res_41071;
                
                isnan_res_41071 = futrts_isnan32(x_41069);
                
                bool cond_41072 = !isnan_res_41071;
                float defunc_1_f_res_41073;
                
                if (cond_41072) {
                    float x_41070 = ((__global
                                      float *) defunc_3_map_res_mem_45140)[gtid_41033 *
                                                                           N_29165 +
                                                                           gtid_41041];
                    float defunc_1_f_res_t_res_41074 = x_41069 - x_41070;
                    
                    defunc_1_f_res_41073 = defunc_1_f_res_t_res_41074;
                } else {
                    defunc_1_f_res_41073 = NAN;
                }
                
                bool isnan_res_41075;
                
                isnan_res_41075 = futrts_isnan32(defunc_1_f_res_41073);
                
                bool defunc_0_p_res_41076 = !isnan_res_41075;
                int64_t defunc_0_f_res_41077 =
                        btoi_bool_i64(defunc_0_p_res_41076);
                
                // write to-scan values to parameters
                {
                    x_41065 = defunc_0_f_res_41077;
                }
                // write mapped values results to global memory
                {
                    ((__global float *) mem_45166)[gtid_41033 * N_29165 +
                                                   gtid_41041] =
                        defunc_1_f_res_41073;
                }
            }
        }
        // do one intra-group scan operation
        {
            // maybe restore some to-scan values to parameters, or read neutral
            {
                if (!(slt64(gtid_41033, m_29166) && slt64(gtid_41041,
                                                          N_29165))) {
                    x_41065 = (int64_t) 0;
                }
            }
            // combine with carry and write to local memory
            {
                int64_t defunc_1_op_res_41066 = add64(x_41064, x_41065);
                
                ((__local
                  int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] =
                    defunc_1_op_res_41066;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            int64_t x_46266;
            int64_t x_46267;
            int64_t x_46269;
            int64_t x_46270;
            bool ltid_in_bounds_46272;
            
            ltid_in_bounds_46272 = slt64(sext_i32_i64(local_tid_46257),
                                         segscan_group_sizze_41059);
            
            int32_t skip_threads_46273;
            
            // read input for in-block scan
            {
                if (ltid_in_bounds_46272) {
                    x_46267 = ((volatile __local
                                int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)];
                    if ((local_tid_46257 - squot32(local_tid_46257, 32) * 32) ==
                        0) {
                        x_46266 = x_46267;
                    }
                }
            }
            // in-block scan (hopefully no barriers needed)
            {
                skip_threads_46273 = 1;
                while (slt32(skip_threads_46273, 32)) {
                    if (sle32(skip_threads_46273, local_tid_46257 -
                              squot32(local_tid_46257, 32) * 32) &&
                        ltid_in_bounds_46272) {
                        // read operands
                        {
                            x_46266 = ((volatile __local
                                        int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257) -
                                                                       sext_i32_i64(skip_threads_46273)];
                        }
                        // perform operation
                        {
                            bool inactive_46274 =
                                 slt64(srem64(sext_i32_i64(local_tid_46257) +
                                              chunk_offset_46264, N_29165),
                                       sext_i32_i64(local_tid_46257) +
                                       chunk_offset_46264 -
                                       (sext_i32_i64(local_tid_46257 -
                                        skip_threads_46273) +
                                        chunk_offset_46264));
                            
                            if (inactive_46274) {
                                x_46266 = x_46267;
                            }
                            if (!inactive_46274) {
                                int64_t defunc_1_op_res_46268 = add64(x_46266,
                                                                      x_46267);
                                
                                x_46266 = defunc_1_op_res_46268;
                            }
                        }
                    }
                    if (sle32(wave_sizze_46259, skip_threads_46273)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    if (sle32(skip_threads_46273, local_tid_46257 -
                              squot32(local_tid_46257, 32) * 32) &&
                        ltid_in_bounds_46272) {
                        // write result
                        {
                            ((volatile __local
                              int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] =
                                x_46266;
                            x_46267 = x_46266;
                        }
                    }
                    if (sle32(wave_sizze_46259, skip_threads_46273)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    skip_threads_46273 *= 2;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // last thread of block 'i' writes its result to offset 'i'
            {
                if ((local_tid_46257 - squot32(local_tid_46257, 32) * 32) ==
                    31 && ltid_in_bounds_46272) {
                    ((volatile __local
                      int64_t *) scan_arr_mem_46261)[sext_i32_i64(squot32(local_tid_46257,
                                                                          32))] =
                        x_46266;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
            {
                int32_t skip_threads_46275;
                
                // read input for in-block scan
                {
                    if (squot32(local_tid_46257, 32) == 0 &&
                        ltid_in_bounds_46272) {
                        x_46270 = ((volatile __local
                                    int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)];
                        if ((local_tid_46257 - squot32(local_tid_46257, 32) *
                             32) == 0) {
                            x_46269 = x_46270;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46275 = 1;
                    while (slt32(skip_threads_46275, 32)) {
                        if (sle32(skip_threads_46275, local_tid_46257 -
                                  squot32(local_tid_46257, 32) * 32) &&
                            (squot32(local_tid_46257, 32) == 0 &&
                             ltid_in_bounds_46272)) {
                            // read operands
                            {
                                x_46269 = ((volatile __local
                                            int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257) -
                                                                           sext_i32_i64(skip_threads_46275)];
                            }
                            // perform operation
                            {
                                bool inactive_46276 =
                                     slt64(srem64(sext_i32_i64(local_tid_46257 *
                                                  32 + 32 - 1) +
                                                  chunk_offset_46264, N_29165),
                                           sext_i32_i64(local_tid_46257 * 32 +
                                           32 - 1) + chunk_offset_46264 -
                                           (sext_i32_i64((local_tid_46257 -
                                                          skip_threads_46275) *
                                            32 + 32 - 1) + chunk_offset_46264));
                                
                                if (inactive_46276) {
                                    x_46269 = x_46270;
                                }
                                if (!inactive_46276) {
                                    int64_t defunc_1_op_res_46271 =
                                            add64(x_46269, x_46270);
                                    
                                    x_46269 = defunc_1_op_res_46271;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46259, skip_threads_46275)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46275, local_tid_46257 -
                                  squot32(local_tid_46257, 32) * 32) &&
                            (squot32(local_tid_46257, 32) == 0 &&
                             ltid_in_bounds_46272)) {
                            // write result
                            {
                                ((volatile __local
                                  int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] =
                                    x_46269;
                                x_46270 = x_46269;
                            }
                        }
                        if (sle32(wave_sizze_46259, skip_threads_46275)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46275 *= 2;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // carry-in for every block except the first
            {
                if (!(squot32(local_tid_46257, 32) == 0 ||
                      !ltid_in_bounds_46272)) {
                    // read operands
                    {
                        x_46267 = x_46266;
                        x_46266 = ((__local
                                    int64_t *) scan_arr_mem_46261)[sext_i32_i64(squot32(local_tid_46257,
                                                                                        32)) -
                                                                   (int64_t) 1];
                    }
                    // perform operation
                    {
                        bool inactive_46277 =
                             slt64(srem64(sext_i32_i64(local_tid_46257) +
                                          chunk_offset_46264, N_29165),
                                   sext_i32_i64(local_tid_46257) +
                                   chunk_offset_46264 -
                                   (sext_i32_i64(squot32(local_tid_46257, 32) *
                                    32 - 1) + chunk_offset_46264));
                        
                        if (inactive_46277) {
                            x_46266 = x_46267;
                        }
                        if (!inactive_46277) {
                            int64_t defunc_1_op_res_46268 = add64(x_46266,
                                                                  x_46267);
                            
                            x_46266 = defunc_1_op_res_46268;
                        }
                    }
                    // write final result
                    {
                        ((__local
                          int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] =
                            x_46266;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // restore correct values for first block
            {
                if (squot32(local_tid_46257, 32) == 0) {
                    ((__local
                      int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] =
                        x_46267;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // threads in bounds write partial scan result
            {
                if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) {
                    ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 +
                                                     gtid_41041] = ((__local
                                                                     int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // first thread reads last element as carry-in for next iteration
            {
                bool crosses_segment_46278 = slt64(srem64(chunk_offset_46264 +
                                                          segscan_group_sizze_41059,
                                                          N_29165),
                                                   chunk_offset_46264 +
                                                   segscan_group_sizze_41059 -
                                                   (chunk_offset_46264 +
                                                    segscan_group_sizze_41059 -
                                                    (int64_t) 1));
                bool should_load_carry_46279 = local_tid_46257 == 0 &&
                     !crosses_segment_46278;
                
                if (should_load_carry_46279) {
                    x_41064 = ((__local
                                int64_t *) scan_arr_mem_46261)[segscan_group_sizze_41059 -
                                                               (int64_t) 1];
                }
                if (!should_load_carry_46279) {
                    x_41064 = (int64_t) 0;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    
  error_1:
    return;
    #undef segscan_group_sizze_41059
}
__kernel void mainziscan_stage1_42114(__global int *global_failure,
                                      int failure_is_an_option, __global
                                      int64_t *global_failure_args,
                                      __local volatile
                                      int64_t *scan_arr_mem_46644_backing_aligned_0,
                                      int64_t N_29165, int64_t m_29166,
                                      int64_t iota32_arg_29597,
                                      int32_t num_threads_46638, __global
                                      unsigned char *defunc_4_map_res_mem_45178,
                                      __global
                                      unsigned char *defunc_3_map_res_mem_45244,
                                      __global
                                      unsigned char *defunc_3_map_res_mem_45245,
                                      __global
                                      unsigned char *defunc_0_f_res_mem_45279,
                                      __global unsigned char *mem_45298,
                                      __global unsigned char *mem_45302)
{
    #define segscan_group_sizze_42200 (mainzisegscan_group_sizze_42108)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46644_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46644_backing_aligned_0;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46639;
    int32_t local_tid_46640;
    int64_t group_sizze_46643;
    int32_t wave_sizze_46642;
    int32_t group_tid_46641;
    
    global_tid_46639 = get_global_id(0);
    local_tid_46640 = get_local_id(0);
    group_sizze_46643 = get_local_size(0);
    wave_sizze_46642 = LOCKSTEP_WIDTH;
    group_tid_46641 = get_group_id(0);
    
    int32_t phys_tid_42114;
    
    phys_tid_42114 = global_tid_46639;
    
    __local char *scan_arr_mem_46644;
    
    scan_arr_mem_46644 = (__local char *) scan_arr_mem_46644_backing_0;
    
    float x_42204;
    float x_42205;
    
    x_42204 = 0.0F;
    for (int64_t j_46646 = 0; j_46646 < sdiv_up64(m_29166 * iota32_arg_29597,
                                                  sext_i32_i64(num_threads_46638));
         j_46646++) {
        int64_t chunk_offset_46647 = segscan_group_sizze_42200 * j_46646 +
                sext_i32_i64(group_tid_46641) * (segscan_group_sizze_42200 *
                                                 sdiv_up64(m_29166 *
                                                           iota32_arg_29597,
                                                           sext_i32_i64(num_threads_46638)));
        int64_t flat_idx_46648 = chunk_offset_46647 +
                sext_i32_i64(local_tid_46640);
        int64_t gtid_42105 = squot64(flat_idx_46648, iota32_arg_29597);
        int64_t gtid_42113 = flat_idx_46648 - squot64(flat_idx_46648,
                                                      iota32_arg_29597) *
                iota32_arg_29597;
        
        // threads in bounds read input
        {
            if (slt64(gtid_42105, m_29166) && slt64(gtid_42113,
                                                    iota32_arg_29597)) {
                int32_t y_42211 = ((__global int32_t *) mem_45298)[gtid_42105];
                int32_t index_primexp_42401 = sext_i64_i32(gtid_42113);
                bool cond_42214 = sle32(y_42211, index_primexp_42401);
                float defunc_0_f_res_42215;
                
                if (cond_42214) {
                    defunc_0_f_res_42215 = 0.0F;
                } else {
                    int32_t x_42207 = ((__global
                                        int32_t *) defunc_3_map_res_mem_45245)[gtid_42105];
                    int32_t x_42208 = ((__global
                                        int32_t *) defunc_3_map_res_mem_45244)[gtid_42105];
                    float x_42209 = ((__global
                                      float *) defunc_0_f_res_mem_45279)[gtid_42105];
                    bool cond_42216 = index_primexp_42401 == 0;
                    float defunc_0_f_res_f_res_42217;
                    
                    if (cond_42216) {
                        defunc_0_f_res_f_res_42217 = x_42209;
                    } else {
                        int32_t i_42218 = add32(x_42207, index_primexp_42401);
                        int64_t i_42219 = sext_i32_i64(i_42218);
                        bool x_42220 = sle64((int64_t) 0, i_42219);
                        bool y_42221 = slt64(i_42219, N_29165);
                        bool bounds_check_42222 = x_42220 && y_42221;
                        bool index_certs_42223;
                        
                        if (!bounds_check_42222) {
                            {
                                if (atomic_cmpxchg_i32_global(global_failure,
                                                              -1, 28) == -1) {
                                    global_failure_args[0] = i_42219;
                                    global_failure_args[1] = N_29165;
                                    ;
                                }
                                local_failure = true;
                                goto error_0;
                            }
                        }
                        
                        float x_42224 = ((__global
                                          float *) defunc_4_map_res_mem_45178)[gtid_42105 *
                                                                               N_29165 +
                                                                               i_42219];
                        int32_t x_42225 = sub32(x_42207, x_42208);
                        int32_t i_42226 = add32(x_42225, index_primexp_42401);
                        int64_t i_42227 = sext_i32_i64(i_42226);
                        bool x_42228 = sle64((int64_t) 0, i_42227);
                        bool y_42229 = slt64(i_42227, N_29165);
                        bool bounds_check_42230 = x_42228 && y_42229;
                        bool index_certs_42231;
                        
                        if (!bounds_check_42230) {
                            {
                                if (atomic_cmpxchg_i32_global(global_failure,
                                                              -1, 29) == -1) {
                                    global_failure_args[0] = i_42227;
                                    global_failure_args[1] = N_29165;
                                    ;
                                }
                                local_failure = true;
                                goto error_0;
                            }
                        }
                        
                        float y_42232 = ((__global
                                          float *) defunc_4_map_res_mem_45178)[gtid_42105 *
                                                                               N_29165 +
                                                                               i_42227];
                        float defunc_0_f_res_f_res_f_res_42233 = x_42224 -
                              y_42232;
                        
                        defunc_0_f_res_f_res_42217 =
                            defunc_0_f_res_f_res_f_res_42233;
                    }
                    defunc_0_f_res_42215 = defunc_0_f_res_f_res_42217;
                }
                // write to-scan values to parameters
                {
                    x_42205 = defunc_0_f_res_42215;
                }
                // write mapped values results to global memory
                { }
            }
        }
        // do one intra-group scan operation
        {
            // maybe restore some to-scan values to parameters, or read neutral
            {
                if (!(slt64(gtid_42105, m_29166) && slt64(gtid_42113,
                                                          iota32_arg_29597))) {
                    x_42205 = 0.0F;
                }
            }
            // combine with carry and write to local memory
            {
                float defunc_1_op_res_42206 = x_42204 + x_42205;
                
                ((__local
                  float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] =
                    defunc_1_op_res_42206;
            }
            
          error_0:
            barrier(CLK_LOCAL_MEM_FENCE);
            if (local_failure)
                return;
            barrier(CLK_LOCAL_MEM_FENCE);
            
            float x_46649;
            float x_46650;
            float x_46652;
            float x_46653;
            bool ltid_in_bounds_46655;
            
            ltid_in_bounds_46655 = slt64(sext_i32_i64(local_tid_46640),
                                         segscan_group_sizze_42200);
            
            int32_t skip_threads_46656;
            
            // read input for in-block scan
            {
                if (ltid_in_bounds_46655) {
                    x_46650 = ((volatile __local
                                float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)];
                    if ((local_tid_46640 - squot32(local_tid_46640, 32) * 32) ==
                        0) {
                        x_46649 = x_46650;
                    }
                }
            }
            // in-block scan (hopefully no barriers needed)
            {
                skip_threads_46656 = 1;
                while (slt32(skip_threads_46656, 32)) {
                    if (sle32(skip_threads_46656, local_tid_46640 -
                              squot32(local_tid_46640, 32) * 32) &&
                        ltid_in_bounds_46655) {
                        // read operands
                        {
                            x_46649 = ((volatile __local
                                        float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640) -
                                                                     sext_i32_i64(skip_threads_46656)];
                        }
                        // perform operation
                        {
                            bool inactive_46657 =
                                 slt64(srem64(sext_i32_i64(local_tid_46640) +
                                              chunk_offset_46647,
                                              iota32_arg_29597),
                                       sext_i32_i64(local_tid_46640) +
                                       chunk_offset_46647 -
                                       (sext_i32_i64(local_tid_46640 -
                                        skip_threads_46656) +
                                        chunk_offset_46647));
                            
                            if (inactive_46657) {
                                x_46649 = x_46650;
                            }
                            if (!inactive_46657) {
                                float defunc_1_op_res_46651 = x_46649 + x_46650;
                                
                                x_46649 = defunc_1_op_res_46651;
                            }
                        }
                    }
                    if (sle32(wave_sizze_46642, skip_threads_46656)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    if (sle32(skip_threads_46656, local_tid_46640 -
                              squot32(local_tid_46640, 32) * 32) &&
                        ltid_in_bounds_46655) {
                        // write result
                        {
                            ((volatile __local
                              float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] =
                                x_46649;
                            x_46650 = x_46649;
                        }
                    }
                    if (sle32(wave_sizze_46642, skip_threads_46656)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    skip_threads_46656 *= 2;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // last thread of block 'i' writes its result to offset 'i'
            {
                if ((local_tid_46640 - squot32(local_tid_46640, 32) * 32) ==
                    31 && ltid_in_bounds_46655) {
                    ((volatile __local
                      float *) scan_arr_mem_46644)[sext_i32_i64(squot32(local_tid_46640,
                                                                        32))] =
                        x_46649;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
            {
                int32_t skip_threads_46658;
                
                // read input for in-block scan
                {
                    if (squot32(local_tid_46640, 32) == 0 &&
                        ltid_in_bounds_46655) {
                        x_46653 = ((volatile __local
                                    float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)];
                        if ((local_tid_46640 - squot32(local_tid_46640, 32) *
                             32) == 0) {
                            x_46652 = x_46653;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46658 = 1;
                    while (slt32(skip_threads_46658, 32)) {
                        if (sle32(skip_threads_46658, local_tid_46640 -
                                  squot32(local_tid_46640, 32) * 32) &&
                            (squot32(local_tid_46640, 32) == 0 &&
                             ltid_in_bounds_46655)) {
                            // read operands
                            {
                                x_46652 = ((volatile __local
                                            float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640) -
                                                                         sext_i32_i64(skip_threads_46658)];
                            }
                            // perform operation
                            {
                                bool inactive_46659 =
                                     slt64(srem64(sext_i32_i64(local_tid_46640 *
                                                  32 + 32 - 1) +
                                                  chunk_offset_46647,
                                                  iota32_arg_29597),
                                           sext_i32_i64(local_tid_46640 * 32 +
                                           32 - 1) + chunk_offset_46647 -
                                           (sext_i32_i64((local_tid_46640 -
                                                          skip_threads_46658) *
                                            32 + 32 - 1) + chunk_offset_46647));
                                
                                if (inactive_46659) {
                                    x_46652 = x_46653;
                                }
                                if (!inactive_46659) {
                                    float defunc_1_op_res_46654 = x_46652 +
                                          x_46653;
                                    
                                    x_46652 = defunc_1_op_res_46654;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46642, skip_threads_46658)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46658, local_tid_46640 -
                                  squot32(local_tid_46640, 32) * 32) &&
                            (squot32(local_tid_46640, 32) == 0 &&
                             ltid_in_bounds_46655)) {
                            // write result
                            {
                                ((volatile __local
                                  float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] =
                                    x_46652;
                                x_46653 = x_46652;
                            }
                        }
                        if (sle32(wave_sizze_46642, skip_threads_46658)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46658 *= 2;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // carry-in for every block except the first
            {
                if (!(squot32(local_tid_46640, 32) == 0 ||
                      !ltid_in_bounds_46655)) {
                    // read operands
                    {
                        x_46650 = x_46649;
                        x_46649 = ((__local
                                    float *) scan_arr_mem_46644)[sext_i32_i64(squot32(local_tid_46640,
                                                                                      32)) -
                                                                 (int64_t) 1];
                    }
                    // perform operation
                    {
                        bool inactive_46660 =
                             slt64(srem64(sext_i32_i64(local_tid_46640) +
                                          chunk_offset_46647, iota32_arg_29597),
                                   sext_i32_i64(local_tid_46640) +
                                   chunk_offset_46647 -
                                   (sext_i32_i64(squot32(local_tid_46640, 32) *
                                    32 - 1) + chunk_offset_46647));
                        
                        if (inactive_46660) {
                            x_46649 = x_46650;
                        }
                        if (!inactive_46660) {
                            float defunc_1_op_res_46651 = x_46649 + x_46650;
                            
                            x_46649 = defunc_1_op_res_46651;
                        }
                    }
                    // write final result
                    {
                        ((__local
                          float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] =
                            x_46649;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // restore correct values for first block
            {
                if (squot32(local_tid_46640, 32) == 0) {
                    ((__local
                      float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] =
                        x_46650;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // threads in bounds write partial scan result
            {
                if (slt64(gtid_42105, m_29166) && slt64(gtid_42113,
                                                        iota32_arg_29597)) {
                    ((__global float *) mem_45302)[gtid_42105 *
                                                   iota32_arg_29597 +
                                                   gtid_42113] = ((__local
                                                                   float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // first thread reads last element as carry-in for next iteration
            {
                bool crosses_segment_46661 = slt64(srem64(chunk_offset_46647 +
                                                          segscan_group_sizze_42200,
                                                          iota32_arg_29597),
                                                   chunk_offset_46647 +
                                                   segscan_group_sizze_42200 -
                                                   (chunk_offset_46647 +
                                                    segscan_group_sizze_42200 -
                                                    (int64_t) 1));
                bool should_load_carry_46662 = local_tid_46640 == 0 &&
                     !crosses_segment_46661;
                
                if (should_load_carry_46662) {
                    x_42204 = ((__local
                                float *) scan_arr_mem_46644)[segscan_group_sizze_42200 -
                                                             (int64_t) 1];
                }
                if (!should_load_carry_46662) {
                    x_42204 = 0.0F;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    
  error_1:
    return;
    #undef segscan_group_sizze_42200
}
__kernel void mainziscan_stage2_41042(__global int *global_failure,
                                      __local volatile
                                      int64_t *scan_arr_mem_46285_backing_aligned_0,
                                      int64_t N_29165, int64_t m_29166,
                                      int64_t stage1_num_groups_46254,
                                      int32_t num_threads_46255, __global
                                      unsigned char *mem_45163)
{
    #define segscan_group_sizze_41059 (mainzisegscan_group_sizze_41036)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46285_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46285_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46280;
    int32_t local_tid_46281;
    int64_t group_sizze_46284;
    int32_t wave_sizze_46283;
    int32_t group_tid_46282;
    
    global_tid_46280 = get_global_id(0);
    local_tid_46281 = get_local_id(0);
    group_sizze_46284 = get_local_size(0);
    wave_sizze_46283 = LOCKSTEP_WIDTH;
    group_tid_46282 = get_group_id(0);
    
    int32_t phys_tid_41042;
    
    phys_tid_41042 = global_tid_46280;
    
    __local char *scan_arr_mem_46285;
    
    scan_arr_mem_46285 = (__local char *) scan_arr_mem_46285_backing_0;
    
    int64_t flat_idx_46287;
    
    flat_idx_46287 = (sext_i32_i64(local_tid_46281) + (int64_t) 1) *
        (segscan_group_sizze_41059 * sdiv_up64(m_29166 * N_29165,
                                               sext_i32_i64(num_threads_46255))) -
        (int64_t) 1;
    
    int64_t gtid_41033;
    
    gtid_41033 = squot64(flat_idx_46287, N_29165);
    
    int64_t gtid_41041;
    
    gtid_41041 = flat_idx_46287 - squot64(flat_idx_46287, N_29165) * N_29165;
    // threads in bound read carries; others get neutral element
    {
        if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) {
            ((__local
              int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] =
                ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 +
                                                 gtid_41041];
        } else {
            ((__local
              int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] =
                (int64_t) 0;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t x_41064;
    int64_t x_41065;
    int64_t x_46288;
    int64_t x_46289;
    bool ltid_in_bounds_46291;
    
    ltid_in_bounds_46291 = slt64(sext_i32_i64(local_tid_46281),
                                 stage1_num_groups_46254);
    
    int32_t skip_threads_46292;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46291) {
            x_41065 = ((volatile __local
                        int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)];
            if ((local_tid_46281 - squot32(local_tid_46281, 32) * 32) == 0) {
                x_41064 = x_41065;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46292 = 1;
        while (slt32(skip_threads_46292, 32)) {
            if (sle32(skip_threads_46292, local_tid_46281 -
                      squot32(local_tid_46281, 32) * 32) &&
                ltid_in_bounds_46291) {
                // read operands
                {
                    x_41064 = ((volatile __local
                                int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281) -
                                                               sext_i32_i64(skip_threads_46292)];
                }
                // perform operation
                {
                    bool inactive_46293 =
                         slt64(srem64((sext_i32_i64(local_tid_46281) +
                                       (int64_t) 1) *
                                      (segscan_group_sizze_41059 *
                                       sdiv_up64(m_29166 * N_29165,
                                                 sext_i32_i64(num_threads_46255))) -
                                      (int64_t) 1, N_29165),
                               (sext_i32_i64(local_tid_46281) + (int64_t) 1) *
                               (segscan_group_sizze_41059 * sdiv_up64(m_29166 *
                                                                      N_29165,
                                                                      sext_i32_i64(num_threads_46255))) -
                               (int64_t) 1 - ((sext_i32_i64(local_tid_46281 -
                                               skip_threads_46292) +
                                               (int64_t) 1) *
                                              (segscan_group_sizze_41059 *
                                               sdiv_up64(m_29166 * N_29165,
                                                         sext_i32_i64(num_threads_46255))) -
                                              (int64_t) 1));
                    
                    if (inactive_46293) {
                        x_41064 = x_41065;
                    }
                    if (!inactive_46293) {
                        int64_t defunc_1_op_res_41066 = add64(x_41064, x_41065);
                        
                        x_41064 = defunc_1_op_res_41066;
                    }
                }
            }
            if (sle32(wave_sizze_46283, skip_threads_46292)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46292, local_tid_46281 -
                      squot32(local_tid_46281, 32) * 32) &&
                ltid_in_bounds_46291) {
                // write result
                {
                    ((volatile __local
                      int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] =
                        x_41064;
                    x_41065 = x_41064;
                }
            }
            if (sle32(wave_sizze_46283, skip_threads_46292)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46292 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46281 - squot32(local_tid_46281, 32) * 32) == 31 &&
            ltid_in_bounds_46291) {
            ((volatile __local
              int64_t *) scan_arr_mem_46285)[sext_i32_i64(squot32(local_tid_46281,
                                                                  32))] =
                x_41064;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46294;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46281, 32) == 0 && ltid_in_bounds_46291) {
                x_46289 = ((volatile __local
                            int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)];
                if ((local_tid_46281 - squot32(local_tid_46281, 32) * 32) ==
                    0) {
                    x_46288 = x_46289;
                }
            }
        }
        // in-block scan (hopefully no barriers needed)
        {
            skip_threads_46294 = 1;
            while (slt32(skip_threads_46294, 32)) {
                if (sle32(skip_threads_46294, local_tid_46281 -
                          squot32(local_tid_46281, 32) * 32) &&
                    (squot32(local_tid_46281, 32) == 0 &&
                     ltid_in_bounds_46291)) {
                    // read operands
                    {
                        x_46288 = ((volatile __local
                                    int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281) -
                                                                   sext_i32_i64(skip_threads_46294)];
                    }
                    // perform operation
                    {
                        bool inactive_46295 =
                             slt64(srem64((sext_i32_i64(local_tid_46281 * 32 +
                                           32 - 1) + (int64_t) 1) *
                                          (segscan_group_sizze_41059 *
                                           sdiv_up64(m_29166 * N_29165,
                                                     sext_i32_i64(num_threads_46255))) -
                                          (int64_t) 1, N_29165),
                                   (sext_i32_i64(local_tid_46281 * 32 + 32 -
                                    1) + (int64_t) 1) *
                                   (segscan_group_sizze_41059 *
                                    sdiv_up64(m_29166 * N_29165,
                                              sext_i32_i64(num_threads_46255))) -
                                   (int64_t) 1 -
                                   ((sext_i32_i64((local_tid_46281 -
                                                   skip_threads_46294) * 32 +
                                     32 - 1) + (int64_t) 1) *
                                    (segscan_group_sizze_41059 *
                                     sdiv_up64(m_29166 * N_29165,
                                               sext_i32_i64(num_threads_46255))) -
                                    (int64_t) 1));
                        
                        if (inactive_46295) {
                            x_46288 = x_46289;
                        }
                        if (!inactive_46295) {
                            int64_t defunc_1_op_res_46290 = add64(x_46288,
                                                                  x_46289);
                            
                            x_46288 = defunc_1_op_res_46290;
                        }
                    }
                }
                if (sle32(wave_sizze_46283, skip_threads_46294)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                if (sle32(skip_threads_46294, local_tid_46281 -
                          squot32(local_tid_46281, 32) * 32) &&
                    (squot32(local_tid_46281, 32) == 0 &&
                     ltid_in_bounds_46291)) {
                    // write result
                    {
                        ((volatile __local
                          int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] =
                            x_46288;
                        x_46289 = x_46288;
                    }
                }
                if (sle32(wave_sizze_46283, skip_threads_46294)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                skip_threads_46294 *= 2;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // carry-in for every block except the first
    {
        if (!(squot32(local_tid_46281, 32) == 0 || !ltid_in_bounds_46291)) {
            // read operands
            {
                x_41065 = x_41064;
                x_41064 = ((__local
                            int64_t *) scan_arr_mem_46285)[sext_i32_i64(squot32(local_tid_46281,
                                                                                32)) -
                                                           (int64_t) 1];
            }
            // perform operation
            {
                bool inactive_46296 =
                     slt64(srem64((sext_i32_i64(local_tid_46281) +
                                   (int64_t) 1) * (segscan_group_sizze_41059 *
                                                   sdiv_up64(m_29166 * N_29165,
                                                             sext_i32_i64(num_threads_46255))) -
                                  (int64_t) 1, N_29165),
                           (sext_i32_i64(local_tid_46281) + (int64_t) 1) *
                           (segscan_group_sizze_41059 * sdiv_up64(m_29166 *
                                                                  N_29165,
                                                                  sext_i32_i64(num_threads_46255))) -
                           (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46281,
                                                                32) * 32 - 1) +
                                           (int64_t) 1) *
                                          (segscan_group_sizze_41059 *
                                           sdiv_up64(m_29166 * N_29165,
                                                     sext_i32_i64(num_threads_46255))) -
                                          (int64_t) 1));
                
                if (inactive_46296) {
                    x_41064 = x_41065;
                }
                if (!inactive_46296) {
                    int64_t defunc_1_op_res_41066 = add64(x_41064, x_41065);
                    
                    x_41064 = defunc_1_op_res_41066;
                }
            }
            // write final result
            {
                ((__local
                  int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] =
                    x_41064;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // restore correct values for first block
    {
        if (squot32(local_tid_46281, 32) == 0) {
            ((__local
              int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] =
                x_41065;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // threads in bounds write scanned carries
    {
        if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) {
            ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 +
                                             gtid_41041] = ((__local
                                                             int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)];
        }
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_41059
}
__kernel void mainziscan_stage2_42114(__global int *global_failure,
                                      __local volatile
                                      int64_t *scan_arr_mem_46668_backing_aligned_0,
                                      int64_t m_29166, int64_t iota32_arg_29597,
                                      int64_t stage1_num_groups_46637,
                                      int32_t num_threads_46638, __global
                                      unsigned char *mem_45302)
{
    #define segscan_group_sizze_42200 (mainzisegscan_group_sizze_42108)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46668_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46668_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46663;
    int32_t local_tid_46664;
    int64_t group_sizze_46667;
    int32_t wave_sizze_46666;
    int32_t group_tid_46665;
    
    global_tid_46663 = get_global_id(0);
    local_tid_46664 = get_local_id(0);
    group_sizze_46667 = get_local_size(0);
    wave_sizze_46666 = LOCKSTEP_WIDTH;
    group_tid_46665 = get_group_id(0);
    
    int32_t phys_tid_42114;
    
    phys_tid_42114 = global_tid_46663;
    
    __local char *scan_arr_mem_46668;
    
    scan_arr_mem_46668 = (__local char *) scan_arr_mem_46668_backing_0;
    
    int64_t flat_idx_46670;
    
    flat_idx_46670 = (sext_i32_i64(local_tid_46664) + (int64_t) 1) *
        (segscan_group_sizze_42200 * sdiv_up64(m_29166 * iota32_arg_29597,
                                               sext_i32_i64(num_threads_46638))) -
        (int64_t) 1;
    
    int64_t gtid_42105;
    
    gtid_42105 = squot64(flat_idx_46670, iota32_arg_29597);
    
    int64_t gtid_42113;
    
    gtid_42113 = flat_idx_46670 - squot64(flat_idx_46670, iota32_arg_29597) *
        iota32_arg_29597;
    // threads in bound read carries; others get neutral element
    {
        if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, iota32_arg_29597)) {
            ((__local
              float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] =
                ((__global float *) mem_45302)[gtid_42105 * iota32_arg_29597 +
                                               gtid_42113];
        } else {
            ((__local
              float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] =
                0.0F;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    float x_42204;
    float x_42205;
    float x_46671;
    float x_46672;
    bool ltid_in_bounds_46674;
    
    ltid_in_bounds_46674 = slt64(sext_i32_i64(local_tid_46664),
                                 stage1_num_groups_46637);
    
    int32_t skip_threads_46675;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46674) {
            x_42205 = ((volatile __local
                        float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)];
            if ((local_tid_46664 - squot32(local_tid_46664, 32) * 32) == 0) {
                x_42204 = x_42205;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46675 = 1;
        while (slt32(skip_threads_46675, 32)) {
            if (sle32(skip_threads_46675, local_tid_46664 -
                      squot32(local_tid_46664, 32) * 32) &&
                ltid_in_bounds_46674) {
                // read operands
                {
                    x_42204 = ((volatile __local
                                float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664) -
                                                             sext_i32_i64(skip_threads_46675)];
                }
                // perform operation
                {
                    bool inactive_46676 =
                         slt64(srem64((sext_i32_i64(local_tid_46664) +
                                       (int64_t) 1) *
                                      (segscan_group_sizze_42200 *
                                       sdiv_up64(m_29166 * iota32_arg_29597,
                                                 sext_i32_i64(num_threads_46638))) -
                                      (int64_t) 1, iota32_arg_29597),
                               (sext_i32_i64(local_tid_46664) + (int64_t) 1) *
                               (segscan_group_sizze_42200 * sdiv_up64(m_29166 *
                                                                      iota32_arg_29597,
                                                                      sext_i32_i64(num_threads_46638))) -
                               (int64_t) 1 - ((sext_i32_i64(local_tid_46664 -
                                               skip_threads_46675) +
                                               (int64_t) 1) *
                                              (segscan_group_sizze_42200 *
                                               sdiv_up64(m_29166 *
                                                         iota32_arg_29597,
                                                         sext_i32_i64(num_threads_46638))) -
                                              (int64_t) 1));
                    
                    if (inactive_46676) {
                        x_42204 = x_42205;
                    }
                    if (!inactive_46676) {
                        float defunc_1_op_res_42206 = x_42204 + x_42205;
                        
                        x_42204 = defunc_1_op_res_42206;
                    }
                }
            }
            if (sle32(wave_sizze_46666, skip_threads_46675)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46675, local_tid_46664 -
                      squot32(local_tid_46664, 32) * 32) &&
                ltid_in_bounds_46674) {
                // write result
                {
                    ((volatile __local
                      float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] =
                        x_42204;
                    x_42205 = x_42204;
                }
            }
            if (sle32(wave_sizze_46666, skip_threads_46675)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46675 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46664 - squot32(local_tid_46664, 32) * 32) == 31 &&
            ltid_in_bounds_46674) {
            ((volatile __local
              float *) scan_arr_mem_46668)[sext_i32_i64(squot32(local_tid_46664,
                                                                32))] = x_42204;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46677;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46664, 32) == 0 && ltid_in_bounds_46674) {
                x_46672 = ((volatile __local
                            float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)];
                if ((local_tid_46664 - squot32(local_tid_46664, 32) * 32) ==
                    0) {
                    x_46671 = x_46672;
                }
            }
        }
        // in-block scan (hopefully no barriers needed)
        {
            skip_threads_46677 = 1;
            while (slt32(skip_threads_46677, 32)) {
                if (sle32(skip_threads_46677, local_tid_46664 -
                          squot32(local_tid_46664, 32) * 32) &&
                    (squot32(local_tid_46664, 32) == 0 &&
                     ltid_in_bounds_46674)) {
                    // read operands
                    {
                        x_46671 = ((volatile __local
                                    float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664) -
                                                                 sext_i32_i64(skip_threads_46677)];
                    }
                    // perform operation
                    {
                        bool inactive_46678 =
                             slt64(srem64((sext_i32_i64(local_tid_46664 * 32 +
                                           32 - 1) + (int64_t) 1) *
                                          (segscan_group_sizze_42200 *
                                           sdiv_up64(m_29166 * iota32_arg_29597,
                                                     sext_i32_i64(num_threads_46638))) -
                                          (int64_t) 1, iota32_arg_29597),
                                   (sext_i32_i64(local_tid_46664 * 32 + 32 -
                                    1) + (int64_t) 1) *
                                   (segscan_group_sizze_42200 *
                                    sdiv_up64(m_29166 * iota32_arg_29597,
                                              sext_i32_i64(num_threads_46638))) -
                                   (int64_t) 1 -
                                   ((sext_i32_i64((local_tid_46664 -
                                                   skip_threads_46677) * 32 +
                                     32 - 1) + (int64_t) 1) *
                                    (segscan_group_sizze_42200 *
                                     sdiv_up64(m_29166 * iota32_arg_29597,
                                               sext_i32_i64(num_threads_46638))) -
                                    (int64_t) 1));
                        
                        if (inactive_46678) {
                            x_46671 = x_46672;
                        }
                        if (!inactive_46678) {
                            float defunc_1_op_res_46673 = x_46671 + x_46672;
                            
                            x_46671 = defunc_1_op_res_46673;
                        }
                    }
                }
                if (sle32(wave_sizze_46666, skip_threads_46677)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                if (sle32(skip_threads_46677, local_tid_46664 -
                          squot32(local_tid_46664, 32) * 32) &&
                    (squot32(local_tid_46664, 32) == 0 &&
                     ltid_in_bounds_46674)) {
                    // write result
                    {
                        ((volatile __local
                          float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] =
                            x_46671;
                        x_46672 = x_46671;
                    }
                }
                if (sle32(wave_sizze_46666, skip_threads_46677)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                skip_threads_46677 *= 2;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // carry-in for every block except the first
    {
        if (!(squot32(local_tid_46664, 32) == 0 || !ltid_in_bounds_46674)) {
            // read operands
            {
                x_42205 = x_42204;
                x_42204 = ((__local
                            float *) scan_arr_mem_46668)[sext_i32_i64(squot32(local_tid_46664,
                                                                              32)) -
                                                         (int64_t) 1];
            }
            // perform operation
            {
                bool inactive_46679 =
                     slt64(srem64((sext_i32_i64(local_tid_46664) +
                                   (int64_t) 1) * (segscan_group_sizze_42200 *
                                                   sdiv_up64(m_29166 *
                                                             iota32_arg_29597,
                                                             sext_i32_i64(num_threads_46638))) -
                                  (int64_t) 1, iota32_arg_29597),
                           (sext_i32_i64(local_tid_46664) + (int64_t) 1) *
                           (segscan_group_sizze_42200 * sdiv_up64(m_29166 *
                                                                  iota32_arg_29597,
                                                                  sext_i32_i64(num_threads_46638))) -
                           (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46664,
                                                                32) * 32 - 1) +
                                           (int64_t) 1) *
                                          (segscan_group_sizze_42200 *
                                           sdiv_up64(m_29166 * iota32_arg_29597,
                                                     sext_i32_i64(num_threads_46638))) -
                                          (int64_t) 1));
                
                if (inactive_46679) {
                    x_42204 = x_42205;
                }
                if (!inactive_46679) {
                    float defunc_1_op_res_42206 = x_42204 + x_42205;
                    
                    x_42204 = defunc_1_op_res_42206;
                }
            }
            // write final result
            {
                ((__local
                  float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] =
                    x_42204;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // restore correct values for first block
    {
        if (squot32(local_tid_46664, 32) == 0) {
            ((__local
              float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] =
                x_42205;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // threads in bounds write scanned carries
    {
        if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, iota32_arg_29597)) {
            ((__global float *) mem_45302)[gtid_42105 * iota32_arg_29597 +
                                           gtid_42113] = ((__local
                                                           float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)];
        }
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_42200
}
__kernel void mainziscan_stage3_41042(__global int *global_failure,
                                      int64_t N_29165, int64_t m_29166,
                                      int64_t num_groups_41060,
                                      int32_t num_threads_46255,
                                      int32_t required_groups_46297, __global
                                      unsigned char *mem_45163)
{
    #define segscan_group_sizze_41059 (mainzisegscan_group_sizze_41036)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46298;
    int32_t local_tid_46299;
    int64_t group_sizze_46302;
    int32_t wave_sizze_46301;
    int32_t group_tid_46300;
    
    global_tid_46298 = get_global_id(0);
    local_tid_46299 = get_local_id(0);
    group_sizze_46302 = get_local_size(0);
    wave_sizze_46301 = LOCKSTEP_WIDTH;
    group_tid_46300 = get_group_id(0);
    
    int32_t phys_tid_41042;
    
    phys_tid_41042 = global_tid_46298;
    
    int32_t phys_group_id_46303;
    
    phys_group_id_46303 = get_group_id(0);
    for (int32_t i_46304 = 0; i_46304 < sdiv_up32(required_groups_46297 -
                                                  phys_group_id_46303,
                                                  sext_i64_i32(num_groups_41060));
         i_46304++) {
        int32_t virt_group_id_46305 = phys_group_id_46303 + i_46304 *
                sext_i64_i32(num_groups_41060);
        int64_t flat_idx_46306 = sext_i32_i64(virt_group_id_46305) *
                segscan_group_sizze_41059 + sext_i32_i64(local_tid_46299);
        int64_t gtid_41033 = squot64(flat_idx_46306, N_29165);
        int64_t gtid_41041 = flat_idx_46306 - squot64(flat_idx_46306, N_29165) *
                N_29165;
        int64_t orig_group_46307 = squot64(flat_idx_46306,
                                           segscan_group_sizze_41059 *
                                           sdiv_up64(m_29166 * N_29165,
                                                     sext_i32_i64(num_threads_46255)));
        int64_t carry_in_flat_idx_46308 = orig_group_46307 *
                (segscan_group_sizze_41059 * sdiv_up64(m_29166 * N_29165,
                                                       sext_i32_i64(num_threads_46255))) -
                (int64_t) 1;
        
        if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) {
            if (!(orig_group_46307 == (int64_t) 0 || (flat_idx_46306 ==
                                                      (orig_group_46307 +
                                                       (int64_t) 1) *
                                                      (segscan_group_sizze_41059 *
                                                       sdiv_up64(m_29166 *
                                                                 N_29165,
                                                                 sext_i32_i64(num_threads_46255))) -
                                                      (int64_t) 1 ||
                                                      slt64(srem64(flat_idx_46306,
                                                                   N_29165),
                                                            flat_idx_46306 -
                                                            carry_in_flat_idx_46308)))) {
                int64_t x_41064;
                int64_t x_41065;
                
                x_41064 = ((__global
                            int64_t *) mem_45163)[squot64(carry_in_flat_idx_46308,
                                                          N_29165) * N_29165 +
                                                  (carry_in_flat_idx_46308 -
                                                   squot64(carry_in_flat_idx_46308,
                                                           N_29165) * N_29165)];
                x_41065 = ((__global int64_t *) mem_45163)[gtid_41033 *
                                                           N_29165 +
                                                           gtid_41041];
                
                int64_t defunc_1_op_res_41066;
                
                defunc_1_op_res_41066 = add64(x_41064, x_41065);
                x_41064 = defunc_1_op_res_41066;
                ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 +
                                                 gtid_41041] = x_41064;
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_41059
}
__kernel void mainziscan_stage3_42114(__global int *global_failure,
                                      int64_t m_29166, int64_t iota32_arg_29597,
                                      int64_t num_groups_42201,
                                      int32_t num_threads_46638,
                                      int32_t required_groups_46680, __global
                                      unsigned char *mem_45302)
{
    #define segscan_group_sizze_42200 (mainzisegscan_group_sizze_42108)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46681;
    int32_t local_tid_46682;
    int64_t group_sizze_46685;
    int32_t wave_sizze_46684;
    int32_t group_tid_46683;
    
    global_tid_46681 = get_global_id(0);
    local_tid_46682 = get_local_id(0);
    group_sizze_46685 = get_local_size(0);
    wave_sizze_46684 = LOCKSTEP_WIDTH;
    group_tid_46683 = get_group_id(0);
    
    int32_t phys_tid_42114;
    
    phys_tid_42114 = global_tid_46681;
    
    int32_t phys_group_id_46686;
    
    phys_group_id_46686 = get_group_id(0);
    for (int32_t i_46687 = 0; i_46687 < sdiv_up32(required_groups_46680 -
                                                  phys_group_id_46686,
                                                  sext_i64_i32(num_groups_42201));
         i_46687++) {
        int32_t virt_group_id_46688 = phys_group_id_46686 + i_46687 *
                sext_i64_i32(num_groups_42201);
        int64_t flat_idx_46689 = sext_i32_i64(virt_group_id_46688) *
                segscan_group_sizze_42200 + sext_i32_i64(local_tid_46682);
        int64_t gtid_42105 = squot64(flat_idx_46689, iota32_arg_29597);
        int64_t gtid_42113 = flat_idx_46689 - squot64(flat_idx_46689,
                                                      iota32_arg_29597) *
                iota32_arg_29597;
        int64_t orig_group_46690 = squot64(flat_idx_46689,
                                           segscan_group_sizze_42200 *
                                           sdiv_up64(m_29166 * iota32_arg_29597,
                                                     sext_i32_i64(num_threads_46638)));
        int64_t carry_in_flat_idx_46691 = orig_group_46690 *
                (segscan_group_sizze_42200 * sdiv_up64(m_29166 *
                                                       iota32_arg_29597,
                                                       sext_i32_i64(num_threads_46638))) -
                (int64_t) 1;
        
        if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, iota32_arg_29597)) {
            if (!(orig_group_46690 == (int64_t) 0 || (flat_idx_46689 ==
                                                      (orig_group_46690 +
                                                       (int64_t) 1) *
                                                      (segscan_group_sizze_42200 *
                                                       sdiv_up64(m_29166 *
                                                                 iota32_arg_29597,
                                                                 sext_i32_i64(num_threads_46638))) -
                                                      (int64_t) 1 ||
                                                      slt64(srem64(flat_idx_46689,
                                                                   iota32_arg_29597),
                                                            flat_idx_46689 -
                                                            carry_in_flat_idx_46691)))) {
                float x_42204;
                float x_42205;
                
                x_42204 = ((__global
                            float *) mem_45302)[squot64(carry_in_flat_idx_46691,
                                                        iota32_arg_29597) *
                                                iota32_arg_29597 +
                                                (carry_in_flat_idx_46691 -
                                                 squot64(carry_in_flat_idx_46691,
                                                         iota32_arg_29597) *
                                                 iota32_arg_29597)];
                x_42205 = ((__global float *) mem_45302)[gtid_42105 *
                                                         iota32_arg_29597 +
                                                         gtid_42113];
                
                float defunc_1_op_res_42206;
                
                defunc_1_op_res_42206 = x_42204 + x_42205;
                x_42204 = defunc_1_op_res_42206;
                ((__global float *) mem_45302)[gtid_42105 * iota32_arg_29597 +
                                               gtid_42113] = x_42204;
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_42200
}
__kernel void mainzisegmap_38661(__global int *global_failure, int64_t N_29165,
                                 float freq_29170, int64_t i32_res_29181,
                                 __global
                                 unsigned char *mappingindices_mem_44380,
                                 __global unsigned char *mem_44385)
{
    #define segmap_group_sizze_38734 (mainzisegmap_group_sizze_38664)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45679;
    int32_t local_tid_45680;
    int64_t group_sizze_45683;
    int32_t wave_sizze_45682;
    int32_t group_tid_45681;
    
    global_tid_45679 = get_global_id(0);
    local_tid_45680 = get_local_id(0);
    group_sizze_45683 = get_local_size(0);
    wave_sizze_45682 = LOCKSTEP_WIDTH;
    group_tid_45681 = get_group_id(0);
    
    int32_t phys_tid_38661;
    
    phys_tid_38661 = global_tid_45679;
    
    int64_t gtid_38659;
    
    gtid_38659 = squot64(sext_i32_i64(group_tid_45681) *
                         segmap_group_sizze_38734 +
                         sext_i32_i64(local_tid_45680), N_29165);
    
    int64_t gtid_38660;
    
    gtid_38660 = sext_i32_i64(group_tid_45681) * segmap_group_sizze_38734 +
        sext_i32_i64(local_tid_45680) - squot64(sext_i32_i64(group_tid_45681) *
                                                segmap_group_sizze_38734 +
                                                sext_i32_i64(local_tid_45680),
                                                N_29165) * N_29165;
    if (slt64(gtid_38659, i32_res_29181) && slt64(gtid_38660, N_29165)) {
        int32_t index_primexp_42340 = sext_i64_i32(gtid_38659);
        bool index_primexp_42337 = index_primexp_42340 == 0;
        float defunc_0_f_res_38740;
        
        if (index_primexp_42337) {
            defunc_0_f_res_38740 = 1.0F;
        } else {
            int32_t x_38739 = ((__global
                                int32_t *) mappingindices_mem_44380)[gtid_38660];
            bool cond_38741 = index_primexp_42340 == 1;
            float defunc_0_f_res_f_res_38742;
            
            if (cond_38741) {
                float i32_res_38743 = sitofp_i32_f32(x_38739);
                
                defunc_0_f_res_f_res_38742 = i32_res_38743;
            } else {
                int32_t r32_arg_38744 = sdiv32(index_primexp_42340, 2);
                float i32_res_38745 = sitofp_i32_f32(r32_arg_38744);
                float i32_res_38746 = sitofp_i32_f32(x_38739);
                float x_38747 = 6.2831855F * i32_res_38745;
                float x_38748 = i32_res_38746 * x_38747;
                float angle_38749 = x_38748 / freq_29170;
                int32_t x_38750 = smod32(index_primexp_42340, 2);
                bool cond_38751 = x_38750 == 0;
                float defunc_0_f_res_f_res_f_res_38752;
                
                if (cond_38751) {
                    float sin_res_38753;
                    
                    sin_res_38753 = futrts_sin32(angle_38749);
                    defunc_0_f_res_f_res_f_res_38752 = sin_res_38753;
                } else {
                    float cos_res_38754;
                    
                    cos_res_38754 = futrts_cos32(angle_38749);
                    defunc_0_f_res_f_res_f_res_38752 = cos_res_38754;
                }
                defunc_0_f_res_f_res_38742 = defunc_0_f_res_f_res_f_res_38752;
            }
            defunc_0_f_res_38740 = defunc_0_f_res_f_res_38742;
        }
        ((__global float *) mem_44385)[gtid_38659 * N_29165 + gtid_38660] =
            defunc_0_f_res_38740;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_38734
}
__kernel void mainzisegmap_38839(__global int *global_failure, int64_t N_29165,
                                 float freq_29170, int64_t i32_res_29181,
                                 __global
                                 unsigned char *mappingindices_mem_44380,
                                 __global unsigned char *mem_44389)
{
    #define segmap_group_sizze_38908 (mainzisegmap_group_sizze_38842)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45684;
    int32_t local_tid_45685;
    int64_t group_sizze_45688;
    int32_t wave_sizze_45687;
    int32_t group_tid_45686;
    
    global_tid_45684 = get_global_id(0);
    local_tid_45685 = get_local_id(0);
    group_sizze_45688 = get_local_size(0);
    wave_sizze_45687 = LOCKSTEP_WIDTH;
    group_tid_45686 = get_group_id(0);
    
    int32_t phys_tid_38839;
    
    phys_tid_38839 = global_tid_45684;
    
    int64_t gtid_38837;
    
    gtid_38837 = squot64(sext_i32_i64(group_tid_45686) *
                         segmap_group_sizze_38908 +
                         sext_i32_i64(local_tid_45685), N_29165);
    
    int64_t gtid_38838;
    
    gtid_38838 = sext_i32_i64(group_tid_45686) * segmap_group_sizze_38908 +
        sext_i32_i64(local_tid_45685) - squot64(sext_i32_i64(group_tid_45686) *
                                                segmap_group_sizze_38908 +
                                                sext_i32_i64(local_tid_45685),
                                                N_29165) * N_29165;
    if (slt64(gtid_38837, i32_res_29181) && slt64(gtid_38838, N_29165)) {
        int32_t index_primexp_42349 = sext_i64_i32(gtid_38837);
        bool index_primexp_42346 = index_primexp_42349 == 0;
        float defunc_0_f_res_38914;
        
        if (index_primexp_42346) {
            defunc_0_f_res_38914 = 1.0F;
        } else {
            int32_t x_38913 = ((__global
                                int32_t *) mappingindices_mem_44380)[gtid_38838];
            int32_t i_38915 = add32(1, index_primexp_42349);
            int32_t r32_arg_38916 = sdiv32(i_38915, 2);
            float i32_res_38917 = sitofp_i32_f32(r32_arg_38916);
            float i32_res_38918 = sitofp_i32_f32(x_38913);
            float x_38919 = 6.2831855F * i32_res_38917;
            float x_38920 = i32_res_38918 * x_38919;
            float angle_38921 = x_38920 / freq_29170;
            int32_t x_38922 = smod32(i_38915, 2);
            bool cond_38923 = x_38922 == 0;
            float defunc_0_f_res_f_res_38924;
            
            if (cond_38923) {
                float sin_res_38925;
                
                sin_res_38925 = futrts_sin32(angle_38921);
                defunc_0_f_res_f_res_38924 = sin_res_38925;
            } else {
                float cos_res_38926;
                
                cos_res_38926 = futrts_cos32(angle_38921);
                defunc_0_f_res_f_res_38924 = cos_res_38926;
            }
            defunc_0_f_res_38914 = defunc_0_f_res_f_res_38924;
        }
        ((__global float *) mem_44389)[gtid_38837 * N_29165 + gtid_38838] =
            defunc_0_f_res_38914;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_38908
}
__kernel void mainzisegmap_38967(__global int *global_failure, int64_t N_29165,
                                 int64_t i32_res_29181, float i32_res_29246,
                                 __global unsigned char *mem_44393, __global
                                 unsigned char *mem_44397)
{
    #define segmap_group_sizze_38991 (mainzisegmap_group_sizze_38970)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45689;
    int32_t local_tid_45690;
    int64_t group_sizze_45693;
    int32_t wave_sizze_45692;
    int32_t group_tid_45691;
    
    global_tid_45689 = get_global_id(0);
    local_tid_45690 = get_local_id(0);
    group_sizze_45693 = get_local_size(0);
    wave_sizze_45692 = LOCKSTEP_WIDTH;
    group_tid_45691 = get_group_id(0);
    
    int32_t phys_tid_38967;
    
    phys_tid_38967 = global_tid_45689;
    
    int64_t gtid_38965;
    
    gtid_38965 = squot64(sext_i32_i64(group_tid_45691) *
                         segmap_group_sizze_38991 +
                         sext_i32_i64(local_tid_45690), i32_res_29181);
    
    int64_t gtid_38966;
    
    gtid_38966 = sext_i32_i64(group_tid_45691) * segmap_group_sizze_38991 +
        sext_i32_i64(local_tid_45690) - squot64(sext_i32_i64(group_tid_45691) *
                                                segmap_group_sizze_38991 +
                                                sext_i32_i64(local_tid_45690),
                                                i32_res_29181) * i32_res_29181;
    if (slt64(gtid_38965, N_29165) && slt64(gtid_38966, i32_res_29181)) {
        float x_38994 = ((__global float *) mem_44393)[gtid_38965 *
                                                       i32_res_29181 +
                                                       gtid_38966];
        float defunc_0_f_res_38995 = i32_res_29246 + x_38994;
        
        ((__global float *) mem_44397)[gtid_38965 * i32_res_29181 +
                                       gtid_38966] = defunc_0_f_res_38995;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_38991
}
__kernel void mainzisegmap_39000(__global int *global_failure, int64_t N_29165,
                                 int64_t m_29166, int32_t n_29169,
                                 int32_t k2p2zq_29179, int64_t i32_res_29181,
                                 int64_t num_groups_39025, __global
                                 unsigned char *binop_p_mem_44390, __global
                                 unsigned char *mem_44397, __global
                                 unsigned char *mem_44400, __global
                                 unsigned char *mem_44404, __global
                                 unsigned char *mem_44446)
{
    #define segmap_group_sizze_39024 (mainzisegmap_group_sizze_39002)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45697;
    int32_t local_tid_45698;
    int64_t group_sizze_45701;
    int32_t wave_sizze_45700;
    int32_t group_tid_45699;
    
    global_tid_45697 = get_global_id(0);
    local_tid_45698 = get_local_id(0);
    group_sizze_45701 = get_local_size(0);
    wave_sizze_45700 = LOCKSTEP_WIDTH;
    group_tid_45699 = get_group_id(0);
    
    int32_t phys_tid_39000;
    
    phys_tid_39000 = global_tid_45697;
    
    int32_t phys_group_id_45702;
    
    phys_group_id_45702 = get_group_id(0);
    for (int32_t i_45703 = 0; i_45703 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_39024)) -
                   phys_group_id_45702, sext_i64_i32(num_groups_39025));
         i_45703++) {
        int32_t virt_group_id_45704 = phys_group_id_45702 + i_45703 *
                sext_i64_i32(num_groups_39025);
        int64_t gtid_38999 = sext_i32_i64(virt_group_id_45704) *
                segmap_group_sizze_39024 + sext_i32_i64(local_tid_45698);
        
        if (slt64(gtid_38999, m_29166)) {
            for (int32_t i_44360 = 0; i_44360 < k2p2zq_29179; i_44360++) {
                int64_t i_44289 = sext_i32_i64(i_44360);
                
                for (int32_t i_44359 = 0; i_44359 < k2p2zq_29179; i_44359++) {
                    int64_t i_44293 = sext_i32_i64(i_44359);
                    float defunc_2_reduce_res_39033;
                    float redout_44295 = 0.0F;
                    
                    for (int32_t i_44358 = 0; i_44358 < n_29169; i_44358++) {
                        int64_t i_44296 = sext_i32_i64(i_44358);
                        float x_39037 = ((__global float *) mem_44400)[i_44296 *
                                                                       m_29166 +
                                                                       gtid_38999];
                        float x_39038 = ((__global
                                          float *) binop_p_mem_44390)[i_44289 *
                                                                      N_29165 +
                                                                      i_44296];
                        float x_39039 = ((__global float *) mem_44397)[i_44296 *
                                                                       i32_res_29181 +
                                                                       i_44293];
                        float x_39040 = x_39038 * x_39039;
                        bool isnan_res_39041;
                        
                        isnan_res_39041 = futrts_isnan32(x_39037);
                        
                        float y_39042;
                        
                        if (isnan_res_39041) {
                            y_39042 = 0.0F;
                        } else {
                            y_39042 = 1.0F;
                        }
                        
                        float defunc_2_f_res_39043 = x_39040 * y_39042;
                        float defunc_1_op_res_39036 = defunc_2_f_res_39043 +
                              redout_44295;
                        float redout_tmp_45707 = defunc_1_op_res_39036;
                        
                        redout_44295 = redout_tmp_45707;
                    }
                    defunc_2_reduce_res_39033 = redout_44295;
                    ((__global float *) mem_44404)[phys_tid_39000 + (i_44289 *
                                                                     (num_groups_39025 *
                                                                      segmap_group_sizze_39024 *
                                                                      i32_res_29181) +
                                                                     i_44293 *
                                                                     (num_groups_39025 *
                                                                      segmap_group_sizze_39024))] =
                        defunc_2_reduce_res_39033;
                }
            }
            for (int64_t i_45708 = 0; i_45708 < i32_res_29181; i_45708++) {
                for (int64_t i_45709 = 0; i_45709 < i32_res_29181; i_45709++) {
                    ((__global float *) mem_44446)[i_45708 * (m_29166 *
                                                              i32_res_29181) +
                                                   i_45709 * m_29166 +
                                                   gtid_38999] = ((__global
                                                                   float *) mem_44404)[phys_tid_39000 +
                                                                                       (i_45708 *
                                                                                        (num_groups_39025 *
                                                                                         segmap_group_sizze_39024 *
                                                                                         i32_res_29181) +
                                                                                        i_45709 *
                                                                                        (num_groups_39025 *
                                                                                         segmap_group_sizze_39024))];
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_39024
}
__kernel void mainzisegmap_39046(__global int *global_failure, int64_t N_29165,
                                 int64_t m_29166, int32_t n_29169,
                                 int32_t k2p2zq_29179, int64_t i32_res_29181,
                                 int64_t num_groups_39202, __global
                                 unsigned char *images_mem_44381, __global
                                 unsigned char *mem_44393, __global
                                 unsigned char *mem_44397, __global
                                 unsigned char *mem_44449, __global
                                 unsigned char *mem_44465)
{
    #define segmap_group_sizze_39201 (mainzisegmap_group_sizze_39049)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45710;
    int32_t local_tid_45711;
    int64_t group_sizze_45714;
    int32_t wave_sizze_45713;
    int32_t group_tid_45712;
    
    global_tid_45710 = get_global_id(0);
    local_tid_45711 = get_local_id(0);
    group_sizze_45714 = get_local_size(0);
    wave_sizze_45713 = LOCKSTEP_WIDTH;
    group_tid_45712 = get_group_id(0);
    
    int32_t phys_tid_39046;
    
    phys_tid_39046 = global_tid_45710;
    
    int32_t phys_group_id_45715;
    
    phys_group_id_45715 = get_group_id(0);
    for (int32_t i_45716 = 0; i_45716 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181,
                                          segmap_group_sizze_39201)) -
                   phys_group_id_45715, sext_i64_i32(num_groups_39202));
         i_45716++) {
        int32_t virt_group_id_45717 = phys_group_id_45715 + i_45716 *
                sext_i64_i32(num_groups_39202);
        int64_t gtid_39044 = squot64(sext_i32_i64(virt_group_id_45717) *
                                     segmap_group_sizze_39201 +
                                     sext_i32_i64(local_tid_45711),
                                     i32_res_29181);
        int64_t gtid_39045 = sext_i32_i64(virt_group_id_45717) *
                segmap_group_sizze_39201 + sext_i32_i64(local_tid_45711) -
                squot64(sext_i32_i64(virt_group_id_45717) *
                        segmap_group_sizze_39201 +
                        sext_i32_i64(local_tid_45711), i32_res_29181) *
                i32_res_29181;
        
        if (slt64(gtid_39044, m_29166) && slt64(gtid_39045, i32_res_29181)) {
            for (int32_t i_44362 = 0; i_44362 < k2p2zq_29179; i_44362++) {
                int64_t i_44299 = sext_i32_i64(i_44362);
                float defunc_2_reduce_res_39213;
                float redout_44301 = 0.0F;
                
                for (int32_t i_44361 = 0; i_44361 < n_29169; i_44361++) {
                    int64_t i_44302 = sext_i32_i64(i_44361);
                    float x_39217 = ((__global
                                      float *) images_mem_44381)[gtid_39044 *
                                                                 N_29165 +
                                                                 i_44302];
                    float x_39218 = ((__global float *) mem_44393)[i_44302 *
                                                                   i32_res_29181 +
                                                                   gtid_39045];
                    float x_39219 = ((__global float *) mem_44397)[i_44302 *
                                                                   i32_res_29181 +
                                                                   i_44299];
                    float x_39220 = x_39218 * x_39219;
                    bool isnan_res_39221;
                    
                    isnan_res_39221 = futrts_isnan32(x_39217);
                    
                    float y_39222;
                    
                    if (isnan_res_39221) {
                        y_39222 = 0.0F;
                    } else {
                        y_39222 = 1.0F;
                    }
                    
                    float defunc_2_f_res_39223 = x_39220 * y_39222;
                    float defunc_1_op_res_39216 = defunc_2_f_res_39223 +
                          redout_44301;
                    float redout_tmp_45719 = defunc_1_op_res_39216;
                    
                    redout_44301 = redout_tmp_45719;
                }
                defunc_2_reduce_res_39213 = redout_44301;
                ((__global float *) mem_44449)[phys_tid_39046 + i_44299 *
                                               (num_groups_39202 *
                                                segmap_group_sizze_39201)] =
                    defunc_2_reduce_res_39213;
            }
            for (int64_t i_45720 = 0; i_45720 < i32_res_29181; i_45720++) {
                ((__global float *) mem_44465)[i_45720 * (i32_res_29181 *
                                                          m_29166) +
                                               gtid_39044 * i32_res_29181 +
                                               gtid_39045] = ((__global
                                                               float *) mem_44449)[phys_tid_39046 +
                                                                                   i_45720 *
                                                                                   (num_groups_39202 *
                                                                                    segmap_group_sizze_39201)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_39201
}
__kernel void mainzisegmap_39638(__global int *global_failure, int64_t m_29166,
                                 int64_t i32_res_29181, int64_t nm_29314,
                                 int64_t i32_res_29329, int64_t x_29330,
                                 int64_t j_m_i_29333,
                                 int64_t gauss_jordan_res_r_ixfn_44617,
                                 int64_t gauss_jordan_res_r_ixfn_44618,
                                 int64_t gauss_jordan_res_r_ixfn_44620, __global
                                 unsigned char *gauss_jordan_res_r_mem_44622,
                                 __global unsigned char *mem_44627)
{
    #define segmap_group_sizze_40385 (mainzisegmap_group_sizze_39642)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45877;
    int32_t local_tid_45878;
    int64_t group_sizze_45881;
    int32_t wave_sizze_45880;
    int32_t group_tid_45879;
    
    global_tid_45877 = get_global_id(0);
    local_tid_45878 = get_local_id(0);
    group_sizze_45881 = get_local_size(0);
    wave_sizze_45880 = LOCKSTEP_WIDTH;
    group_tid_45879 = get_group_id(0);
    
    int32_t phys_tid_39638;
    
    phys_tid_39638 = global_tid_45877;
    
    int64_t gtid_39635;
    
    gtid_39635 = squot64(sext_i32_i64(group_tid_45879) *
                         segmap_group_sizze_40385 +
                         sext_i32_i64(local_tid_45878), i32_res_29181 *
                         j_m_i_29333);
    
    int64_t gtid_slice_39633;
    
    gtid_slice_39633 = squot64(sext_i32_i64(group_tid_45879) *
                               segmap_group_sizze_40385 +
                               sext_i32_i64(local_tid_45878) -
                               squot64(sext_i32_i64(group_tid_45879) *
                                       segmap_group_sizze_40385 +
                                       sext_i32_i64(local_tid_45878),
                                       i32_res_29181 * j_m_i_29333) *
                               (i32_res_29181 * j_m_i_29333), j_m_i_29333);
    
    int64_t gtid_slice_39634;
    
    gtid_slice_39634 = sext_i32_i64(group_tid_45879) *
        segmap_group_sizze_40385 + sext_i32_i64(local_tid_45878) -
        squot64(sext_i32_i64(group_tid_45879) * segmap_group_sizze_40385 +
                sext_i32_i64(local_tid_45878), i32_res_29181 * j_m_i_29333) *
        (i32_res_29181 * j_m_i_29333) - squot64(sext_i32_i64(group_tid_45879) *
                                                segmap_group_sizze_40385 +
                                                sext_i32_i64(local_tid_45878) -
                                                squot64(sext_i32_i64(group_tid_45879) *
                                                        segmap_group_sizze_40385 +
                                                        sext_i32_i64(local_tid_45878),
                                                        i32_res_29181 *
                                                        j_m_i_29333) *
                                                (i32_res_29181 * j_m_i_29333),
                                                j_m_i_29333) * j_m_i_29333;
    if ((slt64(gtid_39635, m_29166) && slt64(gtid_slice_39633,
                                             i32_res_29181)) &&
        slt64(gtid_slice_39634, j_m_i_29333)) {
        int64_t slice_40389 = i32_res_29181 + gtid_slice_39634;
        int64_t binop_x_42407 = x_29330 * gtid_39635;
        int64_t binop_y_42408 = i32_res_29329 * gtid_slice_39633;
        int64_t binop_x_42409 = binop_x_42407 + binop_y_42408;
        int64_t binop_x_42410 = slice_40389 + binop_x_42409;
        int64_t new_index_42411 = squot64(binop_x_42410, nm_29314);
        int64_t binop_y_42423 = nm_29314 * new_index_42411;
        int64_t new_index_42424 = binop_x_42410 - binop_y_42423;
        float v_40390 = ((__global
                          float *) gauss_jordan_res_r_mem_44622)[gauss_jordan_res_r_ixfn_44617 +
                                                                 (new_index_42411 *
                                                                  gauss_jordan_res_r_ixfn_44618 +
                                                                  new_index_42424 *
                                                                  gauss_jordan_res_r_ixfn_44620)];
        
        ((__global float *) mem_44627)[gtid_39635 * (j_m_i_29333 *
                                                     i32_res_29181) +
                                       gtid_slice_39633 * j_m_i_29333 +
                                       gtid_slice_39634] = v_40390;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40385
}
__kernel void mainzisegmap_39868(__global int *global_failure, int64_t m_29166,
                                 int64_t nm_29314, int64_t ctx_param_ext_44580,
                                 int64_t ctx_param_ext_44581,
                                 int64_t ctx_param_ext_44583, __global
                                 unsigned char *mem_param_44585, __global
                                 unsigned char *mem_44605)
{
    #define segmap_group_sizze_40373 (mainzisegmap_group_sizze_39871)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45871;
    int32_t local_tid_45872;
    int64_t group_sizze_45875;
    int32_t wave_sizze_45874;
    int32_t group_tid_45873;
    
    global_tid_45871 = get_global_id(0);
    local_tid_45872 = get_local_id(0);
    group_sizze_45875 = get_local_size(0);
    wave_sizze_45874 = LOCKSTEP_WIDTH;
    group_tid_45873 = get_group_id(0);
    
    int32_t phys_tid_39868;
    
    phys_tid_39868 = global_tid_45871;
    
    int64_t gtid_39866;
    
    gtid_39866 = squot64(sext_i32_i64(group_tid_45873) *
                         segmap_group_sizze_40373 +
                         sext_i32_i64(local_tid_45872), nm_29314);
    
    int64_t gtid_39867;
    
    gtid_39867 = sext_i32_i64(group_tid_45873) * segmap_group_sizze_40373 +
        sext_i32_i64(local_tid_45872) - squot64(sext_i32_i64(group_tid_45873) *
                                                segmap_group_sizze_40373 +
                                                sext_i32_i64(local_tid_45872),
                                                nm_29314) * nm_29314;
    if (slt64(gtid_39866, m_29166) && slt64(gtid_39867, nm_29314)) {
        float write_value_40379 = ((__global float *) mem_44605)[gtid_39866 *
                                                                 nm_29314 +
                                                                 gtid_39867];
        
        if ((sle64((int64_t) 0, gtid_39866) && slt64(gtid_39866, m_29166)) &&
            (sle64((int64_t) 0, gtid_39867) && slt64(gtid_39867, nm_29314))) {
            ((__global float *) mem_param_44585)[ctx_param_ext_44580 +
                                                 (gtid_39866 *
                                                  ctx_param_ext_44581 +
                                                  gtid_39867 *
                                                  ctx_param_ext_44583)] =
                write_value_40379;
        }
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40373
}
__kernel void mainzisegmap_39938(__global int *global_failure,
                                 int failure_is_an_option, __global
                                 int64_t *global_failure_args, int64_t m_29166,
                                 int32_t k2p2zq_29179, int32_t m_29312,
                                 int64_t nm_29314, int32_t i_40240,
                                 int64_t i32_res_40242,
                                 int64_t ctx_param_ext_44580,
                                 int64_t ctx_param_ext_44581,
                                 int64_t ctx_param_ext_44583, __global
                                 unsigned char *mem_param_44585, __global
                                 unsigned char *mem_44601, __global
                                 unsigned char *mem_44605)
{
    #define segmap_group_sizze_40323 (mainzisegmap_group_sizze_39941)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45866;
    int32_t local_tid_45867;
    int64_t group_sizze_45870;
    int32_t wave_sizze_45869;
    int32_t group_tid_45868;
    
    global_tid_45866 = get_global_id(0);
    local_tid_45867 = get_local_id(0);
    group_sizze_45870 = get_local_size(0);
    wave_sizze_45869 = LOCKSTEP_WIDTH;
    group_tid_45868 = get_group_id(0);
    
    int32_t phys_tid_39938;
    
    phys_tid_39938 = global_tid_45866;
    
    int64_t gtid_39936;
    
    gtid_39936 = squot64(sext_i32_i64(group_tid_45868) *
                         segmap_group_sizze_40323 +
                         sext_i32_i64(local_tid_45867), nm_29314);
    
    int64_t gtid_39937;
    
    gtid_39937 = sext_i32_i64(group_tid_45868) * segmap_group_sizze_40323 +
        sext_i32_i64(local_tid_45867) - squot64(sext_i32_i64(group_tid_45868) *
                                                segmap_group_sizze_40323 +
                                                sext_i32_i64(local_tid_45867),
                                                nm_29314) * nm_29314;
    if (slt64(gtid_39936, m_29166) && slt64(gtid_39937, nm_29314)) {
        bool cond_40328 = ((__global bool *) mem_44601)[gtid_39936];
        int32_t defunc_0_f_res_40330 = sext_i64_i32(gtid_39937);
        int32_t defunc_0_f_res_40331 = sdiv32(defunc_0_f_res_40330, m_29312);
        int32_t defunc_0_f_res_40332 = smod32(defunc_0_f_res_40330, m_29312);
        float defunc_0_f_res_40333;
        
        if (cond_40328) {
            int32_t x_40334 = mul32(m_29312, defunc_0_f_res_40331);
            int32_t i32_arg_40335 = add32(defunc_0_f_res_40332, x_40334);
            int64_t i32_res_40336 = sext_i32_i64(i32_arg_40335);
            bool x_40337 = sle64((int64_t) 0, i32_res_40336);
            bool y_40338 = slt64(i32_res_40336, nm_29314);
            bool bounds_check_40339 = x_40337 && y_40338;
            bool index_certs_40340;
            
            if (!bounds_check_40339) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 11) ==
                        -1) {
                        global_failure_args[0] = i32_res_40336;
                        global_failure_args[1] = nm_29314;
                        ;
                    }
                    return;
                }
            }
            
            float defunc_0_f_res_t_res_40341 = ((__global
                                                 float *) mem_param_44585)[ctx_param_ext_44580 +
                                                                           (gtid_39936 *
                                                                            ctx_param_ext_44581 +
                                                                            i32_res_40336 *
                                                                            ctx_param_ext_44583)];
            
            defunc_0_f_res_40333 = defunc_0_f_res_t_res_40341;
        } else {
            float v1_40327 = ((__global
                               float *) mem_param_44585)[ctx_param_ext_44580 +
                                                         (gtid_39936 *
                                                          ctx_param_ext_44581 +
                                                          i32_res_40242 *
                                                          ctx_param_ext_44583)];
            int64_t i32_res_40342 = sext_i32_i64(defunc_0_f_res_40332);
            bool x_40343 = sle64((int64_t) 0, i32_res_40342);
            bool y_40344 = slt64(i32_res_40342, nm_29314);
            bool bounds_check_40345 = x_40343 && y_40344;
            bool index_certs_40346;
            
            if (!bounds_check_40345) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 12) ==
                        -1) {
                        global_failure_args[0] = i32_res_40342;
                        global_failure_args[1] = nm_29314;
                        ;
                    }
                    return;
                }
            }
            
            float x_40347 = ((__global
                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                        (gtid_39936 *
                                                         ctx_param_ext_44581 +
                                                         i32_res_40342 *
                                                         ctx_param_ext_44583)];
            float x_40348 = x_40347 / v1_40327;
            int32_t y_40349 = sub32(k2p2zq_29179, 1);
            bool cond_40350 = slt32(defunc_0_f_res_40331, y_40349);
            float defunc_0_f_res_f_res_40351;
            
            if (cond_40350) {
                int32_t x_40352 = add32(1, defunc_0_f_res_40331);
                int32_t x_40353 = mul32(m_29312, x_40352);
                int32_t i32_arg_40354 = add32(defunc_0_f_res_40332, x_40353);
                int64_t i32_res_40355 = sext_i32_i64(i32_arg_40354);
                bool x_40356 = sle64((int64_t) 0, i32_res_40355);
                bool y_40357 = slt64(i32_res_40355, nm_29314);
                bool bounds_check_40358 = x_40356 && y_40357;
                bool index_certs_40359;
                
                if (!bounds_check_40358) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 13) ==
                            -1) {
                            global_failure_args[0] = i32_res_40355;
                            global_failure_args[1] = nm_29314;
                            ;
                        }
                        return;
                    }
                }
                
                float x_40360 = ((__global
                                  float *) mem_param_44585)[ctx_param_ext_44580 +
                                                            (gtid_39936 *
                                                             ctx_param_ext_44581 +
                                                             i32_res_40355 *
                                                             ctx_param_ext_44583)];
                int32_t i32_arg_40361 = add32(i_40240, x_40353);
                int64_t i32_res_40362 = sext_i32_i64(i32_arg_40361);
                bool x_40363 = sle64((int64_t) 0, i32_res_40362);
                bool y_40364 = slt64(i32_res_40362, nm_29314);
                bool bounds_check_40365 = x_40363 && y_40364;
                bool index_certs_40366;
                
                if (!bounds_check_40365) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 14) ==
                            -1) {
                            global_failure_args[0] = i32_res_40362;
                            global_failure_args[1] = nm_29314;
                            ;
                        }
                        return;
                    }
                }
                
                float x_40367 = ((__global
                                  float *) mem_param_44585)[ctx_param_ext_44580 +
                                                            (gtid_39936 *
                                                             ctx_param_ext_44581 +
                                                             i32_res_40362 *
                                                             ctx_param_ext_44583)];
                float y_40368 = x_40348 * x_40367;
                float defunc_0_f_res_f_res_t_res_40369 = x_40360 - y_40368;
                
                defunc_0_f_res_f_res_40351 = defunc_0_f_res_f_res_t_res_40369;
            } else {
                defunc_0_f_res_f_res_40351 = x_40348;
            }
            defunc_0_f_res_40333 = defunc_0_f_res_f_res_40351;
        }
        ((__global float *) mem_44605)[gtid_39936 * nm_29314 + gtid_39937] =
            defunc_0_f_res_40333;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40323
}
__kernel void mainzisegmap_40038(__global int *global_failure, int64_t m_29166,
                                 int64_t i32_res_40242,
                                 int64_t ctx_param_ext_44580,
                                 int64_t ctx_param_ext_44581,
                                 int64_t ctx_param_ext_44583, __global
                                 unsigned char *mem_param_44585, __global
                                 unsigned char *mem_44601)
{
    #define segmap_group_sizze_40307 (mainzisegmap_group_sizze_40040)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45861;
    int32_t local_tid_45862;
    int64_t group_sizze_45865;
    int32_t wave_sizze_45864;
    int32_t group_tid_45863;
    
    global_tid_45861 = get_global_id(0);
    local_tid_45862 = get_local_id(0);
    group_sizze_45865 = get_local_size(0);
    wave_sizze_45864 = LOCKSTEP_WIDTH;
    group_tid_45863 = get_group_id(0);
    
    int32_t phys_tid_40038;
    
    phys_tid_40038 = global_tid_45861;
    
    int64_t gtid_40037;
    
    gtid_40037 = sext_i32_i64(group_tid_45863) * segmap_group_sizze_40307 +
        sext_i32_i64(local_tid_45862);
    if (slt64(gtid_40037, m_29166)) {
        float v1_40312 = ((__global
                           float *) mem_param_44585)[ctx_param_ext_44580 +
                                                     (gtid_40037 *
                                                      ctx_param_ext_44581 +
                                                      i32_res_40242 *
                                                      ctx_param_ext_44583)];
        bool cond_40313 = v1_40312 == 0.0F;
        
        ((__global bool *) mem_44601)[gtid_40037] = cond_40313;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40307
}
__kernel void mainzisegmap_40155(__global int *global_failure,
                                 int failure_is_an_option, __global
                                 int64_t *global_failure_args, int64_t m_29166,
                                 int32_t k2p2zq_29179, int64_t i32_res_29181,
                                 int32_t m_29312, int64_t nm_29314, __global
                                 unsigned char *defunc_3_map_res_mem_44549,
                                 __global unsigned char *mem_44577)
{
    #define segmap_group_sizze_40215 (mainzisegmap_group_sizze_40158)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45828;
    int32_t local_tid_45829;
    int64_t group_sizze_45832;
    int32_t wave_sizze_45831;
    int32_t group_tid_45830;
    
    global_tid_45828 = get_global_id(0);
    local_tid_45829 = get_local_id(0);
    group_sizze_45832 = get_local_size(0);
    wave_sizze_45831 = LOCKSTEP_WIDTH;
    group_tid_45830 = get_group_id(0);
    
    int32_t phys_tid_40155;
    
    phys_tid_40155 = global_tid_45828;
    
    int64_t gtid_40153;
    
    gtid_40153 = squot64(sext_i32_i64(group_tid_45830) *
                         segmap_group_sizze_40215 +
                         sext_i32_i64(local_tid_45829), nm_29314);
    
    int64_t gtid_40154;
    
    gtid_40154 = sext_i32_i64(group_tid_45830) * segmap_group_sizze_40215 +
        sext_i32_i64(local_tid_45829) - squot64(sext_i32_i64(group_tid_45830) *
                                                segmap_group_sizze_40215 +
                                                sext_i32_i64(local_tid_45829),
                                                nm_29314) * nm_29314;
    if (slt64(gtid_40153, m_29166) && slt64(gtid_40154, nm_29314)) {
        int32_t index_primexp_42361 = sext_i64_i32(gtid_40154);
        int32_t defunc_0_f_res_40220 = sdiv32(index_primexp_42361, m_29312);
        int32_t defunc_0_f_res_40221 = smod32(index_primexp_42361, m_29312);
        bool cond_40222 = slt32(defunc_0_f_res_40221, k2p2zq_29179);
        float defunc_0_f_res_40223;
        
        if (cond_40222) {
            int64_t i_40224 = sext_i32_i64(defunc_0_f_res_40220);
            bool x_40225 = sle64((int64_t) 0, i_40224);
            bool y_40226 = slt64(i_40224, i32_res_29181);
            bool bounds_check_40227 = x_40225 && y_40226;
            int64_t j_40228 = sext_i32_i64(defunc_0_f_res_40221);
            bool x_40229 = sle64((int64_t) 0, j_40228);
            bool y_40230 = slt64(j_40228, i32_res_29181);
            bool bounds_check_40231 = x_40229 && y_40230;
            bool index_ok_40232 = bounds_check_40227 && bounds_check_40231;
            bool index_certs_40233;
            
            if (!index_ok_40232) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 6) ==
                        -1) {
                        global_failure_args[0] = i_40224;
                        global_failure_args[1] = j_40228;
                        global_failure_args[2] = i32_res_29181;
                        global_failure_args[3] = i32_res_29181;
                        ;
                    }
                    return;
                }
            }
            
            float defunc_0_f_res_t_res_40234 = ((__global
                                                 float *) defunc_3_map_res_mem_44549)[gtid_40153 *
                                                                                      (i32_res_29181 *
                                                                                       i32_res_29181) +
                                                                                      i_40224 *
                                                                                      i32_res_29181 +
                                                                                      j_40228];
            
            defunc_0_f_res_40223 = defunc_0_f_res_t_res_40234;
        } else {
            int32_t y_40235 = add32(k2p2zq_29179, defunc_0_f_res_40220);
            bool cond_40236 = defunc_0_f_res_40221 == y_40235;
            float defunc_0_f_res_f_res_40237;
            
            if (cond_40236) {
                defunc_0_f_res_f_res_40237 = 1.0F;
            } else {
                defunc_0_f_res_f_res_40237 = 0.0F;
            }
            defunc_0_f_res_40223 = defunc_0_f_res_f_res_40237;
        }
        ((__global float *) mem_44577)[gtid_40153 * nm_29314 + gtid_40154] =
            defunc_0_f_res_40223;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40215
}
__kernel void mainzisegmap_40396(__global int *global_failure, int64_t N_29165,
                                 int64_t m_29166, int32_t n_29169,
                                 int32_t k2p2zq_29179, int64_t i32_res_29181,
                                 int64_t num_groups_40417, __global
                                 unsigned char *binop_p_mem_44390, __global
                                 unsigned char *mem_44632, __global
                                 unsigned char *mem_44635, __global
                                 unsigned char *mem_44650)
{
    #define segmap_group_sizze_40416 (mainzisegmap_group_sizze_40398)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45885;
    int32_t local_tid_45886;
    int64_t group_sizze_45889;
    int32_t wave_sizze_45888;
    int32_t group_tid_45887;
    
    global_tid_45885 = get_global_id(0);
    local_tid_45886 = get_local_id(0);
    group_sizze_45889 = get_local_size(0);
    wave_sizze_45888 = LOCKSTEP_WIDTH;
    group_tid_45887 = get_group_id(0);
    
    int32_t phys_tid_40396;
    
    phys_tid_40396 = global_tid_45885;
    
    int32_t phys_group_id_45890;
    
    phys_group_id_45890 = get_group_id(0);
    for (int32_t i_45891 = 0; i_45891 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_40416)) -
                   phys_group_id_45890, sext_i64_i32(num_groups_40417));
         i_45891++) {
        int32_t virt_group_id_45892 = phys_group_id_45890 + i_45891 *
                sext_i64_i32(num_groups_40417);
        int64_t gtid_40395 = sext_i32_i64(virt_group_id_45892) *
                segmap_group_sizze_40416 + sext_i32_i64(local_tid_45886);
        
        if (slt64(gtid_40395, m_29166)) {
            for (int32_t i_44364 = 0; i_44364 < k2p2zq_29179; i_44364++) {
                int64_t i_44305 = sext_i32_i64(i_44364);
                float defunc_2_reduce_res_40423;
                float redout_44307 = 0.0F;
                
                for (int32_t i_44363 = 0; i_44363 < n_29169; i_44363++) {
                    int64_t i_44308 = sext_i32_i64(i_44363);
                    float x_40428 = ((__global float *) mem_44632)[i_44308 *
                                                                   m_29166 +
                                                                   gtid_40395];
                    bool isnan_res_40429;
                    
                    isnan_res_40429 = futrts_isnan32(x_40428);
                    
                    float defunc_1_f_res_40430;
                    
                    if (isnan_res_40429) {
                        defunc_1_f_res_40430 = 0.0F;
                    } else {
                        float x_40427 = ((__global
                                          float *) binop_p_mem_44390)[i_44305 *
                                                                      N_29165 +
                                                                      i_44308];
                        float defunc_1_f_res_f_res_40431 = x_40427 * x_40428;
                        
                        defunc_1_f_res_40430 = defunc_1_f_res_f_res_40431;
                    }
                    
                    float defunc_1_op_res_40426 = defunc_1_f_res_40430 +
                          redout_44307;
                    float redout_tmp_45894 = defunc_1_op_res_40426;
                    
                    redout_44307 = redout_tmp_45894;
                }
                defunc_2_reduce_res_40423 = redout_44307;
                ((__global float *) mem_44635)[phys_tid_40396 + i_44305 *
                                               (num_groups_40417 *
                                                segmap_group_sizze_40416)] =
                    defunc_2_reduce_res_40423;
            }
            for (int64_t i_45895 = 0; i_45895 < i32_res_29181; i_45895++) {
                ((__global float *) mem_44650)[i_45895 * m_29166 + gtid_40395] =
                    ((__global float *) mem_44635)[phys_tid_40396 + i_45895 *
                                                   (num_groups_40417 *
                                                    segmap_group_sizze_40416)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40416
}
__kernel void mainzisegmap_40537(__global int *global_failure, int64_t m_29166,
                                 int32_t k2p2zq_29179, int64_t i32_res_29181,
                                 int64_t num_groups_40557, __global
                                 unsigned char *mem_44854, __global
                                 unsigned char *mem_44857, __global
                                 unsigned char *mem_44860, __global
                                 unsigned char *mem_44875)
{
    #define segmap_group_sizze_40556 (mainzisegmap_group_sizze_40539)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46017;
    int32_t local_tid_46018;
    int64_t group_sizze_46021;
    int32_t wave_sizze_46020;
    int32_t group_tid_46019;
    
    global_tid_46017 = get_global_id(0);
    local_tid_46018 = get_local_id(0);
    group_sizze_46021 = get_local_size(0);
    wave_sizze_46020 = LOCKSTEP_WIDTH;
    group_tid_46019 = get_group_id(0);
    
    int32_t phys_tid_40537;
    
    phys_tid_40537 = global_tid_46017;
    
    int32_t phys_group_id_46022;
    
    phys_group_id_46022 = get_group_id(0);
    for (int32_t i_46023 = 0; i_46023 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_40556)) -
                   phys_group_id_46022, sext_i64_i32(num_groups_40557));
         i_46023++) {
        int32_t virt_group_id_46024 = phys_group_id_46022 + i_46023 *
                sext_i64_i32(num_groups_40557);
        int64_t gtid_40536 = sext_i32_i64(virt_group_id_46024) *
                segmap_group_sizze_40556 + sext_i32_i64(local_tid_46018);
        
        if (slt64(gtid_40536, m_29166)) {
            for (int32_t i_44366 = 0; i_44366 < k2p2zq_29179; i_44366++) {
                int64_t i_44311 = sext_i32_i64(i_44366);
                float defunc_0_f_res_40564;
                float redout_44313 = 0.0F;
                
                for (int32_t i_44365 = 0; i_44365 < k2p2zq_29179; i_44365++) {
                    int64_t i_44314 = sext_i32_i64(i_44365);
                    float x_40568 = ((__global float *) mem_44857)[i_44314 *
                                                                   m_29166 +
                                                                   gtid_40536];
                    float x_40569 = ((__global float *) mem_44854)[i_44311 *
                                                                   (m_29166 *
                                                                    i32_res_29181) +
                                                                   i_44314 *
                                                                   m_29166 +
                                                                   gtid_40536];
                    float defunc_1_f_res_40570 = x_40568 * x_40569;
                    float defunc_1_op_res_40567 = defunc_1_f_res_40570 +
                          redout_44313;
                    float redout_tmp_46026 = defunc_1_op_res_40567;
                    
                    redout_44313 = redout_tmp_46026;
                }
                defunc_0_f_res_40564 = redout_44313;
                ((__global float *) mem_44860)[phys_tid_40537 + i_44311 *
                                               (num_groups_40557 *
                                                segmap_group_sizze_40556)] =
                    defunc_0_f_res_40564;
            }
            for (int64_t i_46027 = 0; i_46027 < i32_res_29181; i_46027++) {
                ((__global float *) mem_44875)[i_46027 * m_29166 + gtid_40536] =
                    ((__global float *) mem_44860)[phys_tid_40537 + i_46027 *
                                                   (num_groups_40557 *
                                                    segmap_group_sizze_40556)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40556
}
__kernel void mainzisegmap_40669(__global int *global_failure, int64_t N_29165,
                                 int64_t m_29166, int32_t k2p2zq_29179,
                                 int64_t i32_res_29181,
                                 int64_t num_groups_40688, __global
                                 unsigned char *mem_44397, __global
                                 unsigned char *mem_44919, __global
                                 unsigned char *mem_44922, __global
                                 unsigned char *mem_44937)
{
    #define segmap_group_sizze_40687 (mainzisegmap_group_sizze_40671)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46105;
    int32_t local_tid_46106;
    int64_t group_sizze_46109;
    int32_t wave_sizze_46108;
    int32_t group_tid_46107;
    
    global_tid_46105 = get_global_id(0);
    local_tid_46106 = get_local_id(0);
    group_sizze_46109 = get_local_size(0);
    wave_sizze_46108 = LOCKSTEP_WIDTH;
    group_tid_46107 = get_group_id(0);
    
    int32_t phys_tid_40669;
    
    phys_tid_40669 = global_tid_46105;
    
    int32_t phys_group_id_46110;
    
    phys_group_id_46110 = get_group_id(0);
    for (int32_t i_46111 = 0; i_46111 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_40687)) -
                   phys_group_id_46110, sext_i64_i32(num_groups_40688));
         i_46111++) {
        int32_t virt_group_id_46112 = phys_group_id_46110 + i_46111 *
                sext_i64_i32(num_groups_40688);
        int64_t gtid_40668 = sext_i32_i64(virt_group_id_46112) *
                segmap_group_sizze_40687 + sext_i32_i64(local_tid_46106);
        
        if (slt64(gtid_40668, m_29166)) {
            for (int64_t i_44321 = 0; i_44321 < N_29165; i_44321++) {
                float defunc_0_f_res_40694;
                float redout_44323 = 0.0F;
                
                for (int32_t i_44369 = 0; i_44369 < k2p2zq_29179; i_44369++) {
                    int64_t i_44324 = sext_i32_i64(i_44369);
                    float x_40698 = ((__global float *) mem_44919)[i_44324 *
                                                                   m_29166 +
                                                                   gtid_40668];
                    float x_40699 = ((__global float *) mem_44397)[i_44321 *
                                                                   i32_res_29181 +
                                                                   i_44324];
                    float defunc_1_f_res_40700 = x_40698 * x_40699;
                    float defunc_1_op_res_40697 = defunc_1_f_res_40700 +
                          redout_44323;
                    float redout_tmp_46114 = defunc_1_op_res_40697;
                    
                    redout_44323 = redout_tmp_46114;
                }
                defunc_0_f_res_40694 = redout_44323;
                ((__global float *) mem_44922)[phys_tid_40669 + i_44321 *
                                               (num_groups_40688 *
                                                segmap_group_sizze_40687)] =
                    defunc_0_f_res_40694;
            }
            for (int64_t i_46115 = 0; i_46115 < N_29165; i_46115++) {
                ((__global float *) mem_44937)[i_46115 * m_29166 + gtid_40668] =
                    ((__global float *) mem_44922)[phys_tid_40669 + i_46115 *
                                                   (num_groups_40688 *
                                                    segmap_group_sizze_40687)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_40687
}
__kernel void mainzisegmap_40949(__global int *global_failure, int64_t N_29165,
                                 int64_t m_29166, __global
                                 unsigned char *mem_45163, __global
                                 unsigned char *mem_45166, __global
                                 unsigned char *mem_45172, __global
                                 unsigned char *mem_45175)
{
    #define segmap_group_sizze_41115 (mainzisegmap_group_sizze_40952)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46332;
    int32_t local_tid_46333;
    int64_t group_sizze_46336;
    int32_t wave_sizze_46335;
    int32_t group_tid_46334;
    
    global_tid_46332 = get_global_id(0);
    local_tid_46333 = get_local_id(0);
    group_sizze_46336 = get_local_size(0);
    wave_sizze_46335 = LOCKSTEP_WIDTH;
    group_tid_46334 = get_group_id(0);
    
    int32_t phys_tid_40949;
    
    phys_tid_40949 = global_tid_46332;
    
    int64_t gtid_40947;
    
    gtid_40947 = squot64(sext_i32_i64(group_tid_46334) *
                         segmap_group_sizze_41115 +
                         sext_i32_i64(local_tid_46333), N_29165);
    
    int64_t gtid_40948;
    
    gtid_40948 = sext_i32_i64(group_tid_46334) * segmap_group_sizze_41115 +
        sext_i32_i64(local_tid_46333) - squot64(sext_i32_i64(group_tid_46334) *
                                                segmap_group_sizze_41115 +
                                                sext_i32_i64(local_tid_46333),
                                                N_29165) * N_29165;
    if (slt64(gtid_40947, m_29166) && slt64(gtid_40948, N_29165)) {
        float x_41123 = ((__global float *) mem_45166)[gtid_40947 * N_29165 +
                                                       gtid_40948];
        int32_t index_primexp_42377 = sext_i64_i32(gtid_40948);
        bool isnan_res_41126;
        
        isnan_res_41126 = futrts_isnan32(x_41123);
        
        bool defunc_0_p_res_41127 = !isnan_res_41126;
        int64_t defunc_1_f_res_41128;
        
        if (defunc_0_p_res_41127) {
            int64_t x_41124 = ((__global int64_t *) mem_45163)[gtid_40947 *
                                                               N_29165 +
                                                               gtid_40948];
            int64_t defunc_1_f_res_t_res_41129 = sub64(x_41124, (int64_t) 1);
            
            defunc_1_f_res_41128 = defunc_1_f_res_t_res_41129;
        } else {
            defunc_1_f_res_41128 = (int64_t) -1;
        }
        if ((sle64((int64_t) 0, gtid_40947) && slt64(gtid_40947, m_29166)) &&
            (sle64((int64_t) 0, defunc_1_f_res_41128) &&
             slt64(defunc_1_f_res_41128, N_29165))) {
            ((__global int32_t *) mem_45175)[gtid_40947 * N_29165 +
                                             defunc_1_f_res_41128] =
                index_primexp_42377;
        }
        if ((sle64((int64_t) 0, gtid_40947) && slt64(gtid_40947, m_29166)) &&
            (sle64((int64_t) 0, defunc_1_f_res_41128) &&
             slt64(defunc_1_f_res_41128, N_29165))) {
            ((__global float *) mem_45172)[gtid_40947 * N_29165 +
                                           defunc_1_f_res_41128] = x_41123;
        }
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_41115
}
__kernel void mainzisegmap_41025(__global int *global_failure, int64_t N_29165,
                                 int64_t m_29166, int64_t i_29469, __global
                                 unsigned char *mem_45163, __global
                                 unsigned char *mem_45169)
{
    #define segmap_group_sizze_41079 (mainzisegmap_group_sizze_41027)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46309;
    int32_t local_tid_46310;
    int64_t group_sizze_46313;
    int32_t wave_sizze_46312;
    int32_t group_tid_46311;
    
    global_tid_46309 = get_global_id(0);
    local_tid_46310 = get_local_id(0);
    group_sizze_46313 = get_local_size(0);
    wave_sizze_46312 = LOCKSTEP_WIDTH;
    group_tid_46311 = get_group_id(0);
    
    int32_t phys_tid_41025;
    
    phys_tid_41025 = global_tid_46309;
    
    int64_t gtid_41024;
    
    gtid_41024 = sext_i32_i64(group_tid_46311) * segmap_group_sizze_41079 +
        sext_i32_i64(local_tid_46310);
    if (slt64(gtid_41024, m_29166)) {
        int64_t last_res_41083 = ((__global int64_t *) mem_45163)[gtid_41024 *
                                                                  N_29165 +
                                                                  i_29469];
        int32_t defunc_0_f_res_41084 = sext_i64_i32(last_res_41083);
        
        ((__global int32_t *) mem_45169)[gtid_41024] = defunc_0_f_res_41084;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_41079
}
__kernel void mainzisegmap_41288(__global int *global_failure, int64_t m_29166,
                                 float hfrac_29171, int32_t k2p2_29177, __global
                                 unsigned char *mem_45232, __global
                                 unsigned char *mem_45235, __global
                                 unsigned char *mem_45238, __global
                                 unsigned char *mem_45240)
{
    #define segmap_group_sizze_41381 (mainzisegmap_group_sizze_41290)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46485;
    int32_t local_tid_46486;
    int64_t group_sizze_46489;
    int32_t wave_sizze_46488;
    int32_t group_tid_46487;
    
    global_tid_46485 = get_global_id(0);
    local_tid_46486 = get_local_id(0);
    group_sizze_46489 = get_local_size(0);
    wave_sizze_46488 = LOCKSTEP_WIDTH;
    group_tid_46487 = get_group_id(0);
    
    int32_t phys_tid_41288;
    
    phys_tid_41288 = global_tid_46485;
    
    int64_t gtid_41287;
    
    gtid_41287 = sext_i32_i64(group_tid_46487) * segmap_group_sizze_41381 +
        sext_i32_i64(local_tid_46486);
    if (slt64(gtid_41287, m_29166)) {
        int32_t defunc_0_f_res_41385 = ((__global
                                         int32_t *) mem_45232)[gtid_41287];
        float defunc_0_f_res_41386 = ((__global float *) mem_45235)[gtid_41287];
        int32_t r32_arg_41387 = sub32(defunc_0_f_res_41385, k2p2_29177);
        float i32_res_41388 = sitofp_i32_f32(r32_arg_41387);
        float sqrt_arg_41389 = defunc_0_f_res_41386 / i32_res_41388;
        float sqrt_res_41390;
        
        sqrt_res_41390 = futrts_sqrt32(sqrt_arg_41389);
        
        float i32_res_41391 = sitofp_i32_f32(defunc_0_f_res_41385);
        float t32_arg_41392 = hfrac_29171 * i32_res_41391;
        int32_t f32_res_41393 = fptosi_f32_i32(t32_arg_41392);
        
        ((__global int32_t *) mem_45238)[gtid_41287] = f32_res_41393;
        ((__global float *) mem_45240)[gtid_41287] = sqrt_res_41390;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_41381
}
__kernel void mainzisegmap_41589(__global int *global_failure,
                                 int failure_is_an_option, __global
                                 int64_t *global_failure_args, int64_t N_29165,
                                 int32_t n_29169, float lam_29172,
                                 int64_t iota32_arg_29597, float i32_res_29609,
                                 __global
                                 unsigned char *mappingindices_mem_44380,
                                 __global unsigned char *mem_45282, __global
                                 unsigned char *mem_45284)
{
    #define segmap_group_sizze_41611 (mainzisegmap_group_sizze_41591)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46595;
    int32_t local_tid_46596;
    int64_t group_sizze_46599;
    int32_t wave_sizze_46598;
    int32_t group_tid_46597;
    
    global_tid_46595 = get_global_id(0);
    local_tid_46596 = get_local_id(0);
    group_sizze_46599 = get_local_size(0);
    wave_sizze_46598 = LOCKSTEP_WIDTH;
    group_tid_46597 = get_group_id(0);
    
    int32_t phys_tid_41589;
    
    phys_tid_41589 = global_tid_46595;
    
    int64_t gtid_41588;
    
    gtid_41588 = sext_i32_i64(group_tid_46597) * segmap_group_sizze_41611 +
        sext_i32_i64(local_tid_46596);
    if (slt64(gtid_41588, iota32_arg_29597)) {
        int32_t defunc_0_f_res_41616 = sext_i64_i32(gtid_41588);
        int32_t i_41617 = add32(n_29169, defunc_0_f_res_41616);
        int64_t i_41618 = sext_i32_i64(i_41617);
        bool x_41619 = sle64((int64_t) 0, i_41618);
        bool y_41620 = slt64(i_41618, N_29165);
        bool bounds_check_41621 = x_41619 && y_41620;
        bool index_certs_41622;
        
        if (!bounds_check_41621) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 24) == -1) {
                    global_failure_args[0] = i_41618;
                    global_failure_args[1] = N_29165;
                    ;
                }
                return;
            }
        }
        
        int32_t time_41623 = ((__global
                               int32_t *) mappingindices_mem_44380)[i_41618];
        float i32_res_41624 = sitofp_i32_f32(time_41623);
        float logplus_arg_41625 = i32_res_41624 / i32_res_29609;
        bool cond_41626 = 2.7182817F < logplus_arg_41625;
        float logplus_res_41627;
        
        if (cond_41626) {
            float log_res_41628;
            
            log_res_41628 = futrts_log32(logplus_arg_41625);
            logplus_res_41627 = log_res_41628;
        } else {
            logplus_res_41627 = 1.0F;
        }
        
        float sqrt_res_41629;
        
        sqrt_res_41629 = futrts_sqrt32(logplus_res_41627);
        
        float defunc_0_f_res_41630 = lam_29172 * sqrt_res_41629;
        
        ((__global int32_t *) mem_45282)[gtid_41588] = defunc_0_f_res_41616;
        ((__global float *) mem_45284)[gtid_41588] = defunc_0_f_res_41630;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_41611
}
__kernel void mainzisegmap_42001(__global int *global_failure,
                                 int failure_is_an_option, __global
                                 int64_t *global_failure_args, int64_t N_29165,
                                 int64_t m_29166, int32_t n_29169, __global
                                 unsigned char *defunc_4_map_res_mem_45179,
                                 __global
                                 unsigned char *defunc_3_map_res_mem_45245,
                                 __global unsigned char *mem_45298, __global
                                 unsigned char *mem_45305, __global
                                 unsigned char *mem_45307, __global
                                 unsigned char *mem_45309, __global
                                 unsigned char *mem_45312, __global
                                 unsigned char *mem_45314)
{
    #define segmap_group_sizze_42285 (mainzisegmap_group_sizze_42003)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46790;
    int32_t local_tid_46791;
    int64_t group_sizze_46794;
    int32_t wave_sizze_46793;
    int32_t group_tid_46792;
    
    global_tid_46790 = get_global_id(0);
    local_tid_46791 = get_local_id(0);
    group_sizze_46794 = get_local_size(0);
    wave_sizze_46793 = LOCKSTEP_WIDTH;
    group_tid_46792 = get_group_id(0);
    
    int32_t phys_tid_42001;
    
    phys_tid_42001 = global_tid_46790;
    
    int64_t gtid_42000;
    
    gtid_42000 = sext_i32_i64(group_tid_46792) * segmap_group_sizze_42285 +
        sext_i32_i64(local_tid_46791);
    if (slt64(gtid_42000, m_29166)) {
        int32_t x_42289 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_42000];
        int32_t y_42291 = ((__global int32_t *) mem_45298)[gtid_42000];
        bool acc0_42293 = ((__global bool *) mem_45305)[gtid_42000];
        bool x_42298 = acc0_42293 && acc0_42293;
        int32_t defunc_1_op_res_f_res_42302;
        
        if (acc0_42293) {
            int32_t acc0_42294 = ((__global int32_t *) mem_45307)[gtid_42000];
            
            defunc_1_op_res_f_res_42302 = acc0_42294;
        } else {
            defunc_1_op_res_f_res_42302 = -1;
        }
        
        bool cond_42308 = y_42291 == 0;
        float defunc_0_f_res_42309;
        
        if (cond_42308) {
            defunc_0_f_res_42309 = 0.0F;
        } else {
            float acc0_42295 = ((__global float *) mem_45309)[gtid_42000];
            float i32_res_42310 = sitofp_i32_f32(y_42291);
            float defunc_0_f_res_f_res_42311 = acc0_42295 / i32_res_42310;
            
            defunc_0_f_res_42309 = defunc_0_f_res_f_res_42311;
        }
        
        bool cond_42312 = !x_42298;
        int32_t fst_breakzq_42313;
        
        if (cond_42312) {
            fst_breakzq_42313 = -1;
        } else {
            bool cond_42314 = slt32(defunc_1_op_res_f_res_42302, y_42291);
            int32_t adjustValInds_res_42315;
            
            if (cond_42314) {
                int32_t i_42316 = add32(x_42289, defunc_1_op_res_f_res_42302);
                int64_t i_42317 = sext_i32_i64(i_42316);
                bool x_42318 = sle64((int64_t) 0, i_42317);
                bool y_42319 = slt64(i_42317, N_29165);
                bool bounds_check_42320 = x_42318 && y_42319;
                bool index_certs_42321;
                
                if (!bounds_check_42320) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 30) ==
                            -1) {
                            global_failure_args[0] = i_42317;
                            global_failure_args[1] = N_29165;
                            ;
                        }
                        return;
                    }
                }
                
                int32_t x_42322 = ((__global
                                    int32_t *) defunc_4_map_res_mem_45179)[gtid_42000 *
                                                                           N_29165 +
                                                                           i_42317];
                int32_t adjustValInds_res_t_res_42323 = sub32(x_42322, n_29169);
                
                adjustValInds_res_42315 = adjustValInds_res_t_res_42323;
            } else {
                adjustValInds_res_42315 = -1;
            }
            fst_breakzq_42313 = adjustValInds_res_42315;
        }
        
        bool cond_42324 = sle32(x_42289, 5);
        bool cond_f_res_42325 = sle32(y_42291, 5);
        bool x_42326 = !cond_42324;
        bool y_42327 = cond_f_res_42325 && x_42326;
        bool cond_42328 = cond_42324 || y_42327;
        int32_t fst_breakzq_42329;
        
        if (cond_42328) {
            fst_breakzq_42329 = -2;
        } else {
            fst_breakzq_42329 = fst_breakzq_42313;
        }
        ((__global int32_t *) mem_45312)[gtid_42000] = fst_breakzq_42329;
        ((__global float *) mem_45314)[gtid_42000] = defunc_0_f_res_42309;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_42285
}
__kernel void mainzisegmap_42155(__global int *global_failure, int64_t m_29166,
                                 int64_t num_groups_42178, __global
                                 unsigned char *defunc_4_map_res_mem_45177,
                                 __global
                                 unsigned char *defunc_3_map_res_mem_45245,
                                 __global
                                 unsigned char *defunc_3_map_res_mem_45246,
                                 __global unsigned char *mem_45296, __global
                                 unsigned char *mem_45298)
{
    #define segmap_group_sizze_42177 (mainzisegmap_group_sizze_42157)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46628;
    int32_t local_tid_46629;
    int64_t group_sizze_46632;
    int32_t wave_sizze_46631;
    int32_t group_tid_46630;
    
    global_tid_46628 = get_global_id(0);
    local_tid_46629 = get_local_id(0);
    group_sizze_46632 = get_local_size(0);
    wave_sizze_46631 = LOCKSTEP_WIDTH;
    group_tid_46630 = get_group_id(0);
    
    int32_t phys_tid_42155;
    
    phys_tid_42155 = global_tid_46628;
    
    int32_t phys_group_id_46633;
    
    phys_group_id_46633 = get_group_id(0);
    for (int32_t i_46634 = 0; i_46634 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_42177)) -
                   phys_group_id_46633, sext_i64_i32(num_groups_42178));
         i_46634++) {
        int32_t virt_group_id_46635 = phys_group_id_46633 + i_46634 *
                sext_i64_i32(num_groups_42178);
        int64_t gtid_42154 = sext_i32_i64(virt_group_id_46635) *
                segmap_group_sizze_42177 + sext_i32_i64(local_tid_46629);
        
        if (slt64(gtid_42154, m_29166)) {
            int32_t x_42184 = ((__global
                                int32_t *) defunc_4_map_res_mem_45177)[gtid_42154];
            int32_t x_42185 = ((__global
                                int32_t *) defunc_3_map_res_mem_45245)[gtid_42154];
            float x_42186 = ((__global
                              float *) defunc_3_map_res_mem_45246)[gtid_42154];
            int32_t y_42187 = sub32(x_42184, x_42185);
            float i32_res_42188 = sitofp_i32_f32(x_42185);
            float sqrt_res_42189;
            
            sqrt_res_42189 = futrts_sqrt32(i32_res_42188);
            
            float y_42190 = x_42186 * sqrt_res_42189;
            
            ((__global float *) mem_45296)[gtid_42154] = y_42190;
            ((__global int32_t *) mem_45298)[gtid_42154] = y_42187;
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_42177
}
__kernel void mainzisegmap_intragroup_39374(__global int *global_failure,
                                            int failure_is_an_option, __global
                                            int64_t *global_failure_args,
                                            __local volatile
                                            int64_t *mem_44563_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_44553_backing_aligned_1,
                                            int32_t k2p2zq_29179,
                                            int64_t i32_res_29181,
                                            int32_t m_29312, int64_t nm_29314,
                                            int64_t i32_res_29329, __global
                                            unsigned char *defunc_3_map_res_mem_44549,
                                            __global unsigned char *mem_44573)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44563_backing_1 = (__local volatile
                                                           char *) mem_44563_backing_aligned_0;
    __local volatile char *restrict mem_44553_backing_0 = (__local volatile
                                                           char *) mem_44553_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_45820;
    int32_t local_tid_45821;
    int64_t group_sizze_45824;
    int32_t wave_sizze_45823;
    int32_t group_tid_45822;
    
    global_tid_45820 = get_global_id(0);
    local_tid_45821 = get_local_id(0);
    group_sizze_45824 = get_local_size(0);
    wave_sizze_45823 = LOCKSTEP_WIDTH;
    group_tid_45822 = get_group_id(0);
    
    int32_t phys_tid_39374;
    
    phys_tid_39374 = group_tid_45822;
    
    int32_t ltid_pre_45825;
    
    ltid_pre_45825 = local_tid_45821;
    
    int64_t gtid_39305;
    
    gtid_39305 = sext_i32_i64(group_tid_45822);
    
    __local char *mem_44553;
    
    mem_44553 = (__local char *) mem_44553_backing_0;
    
    int64_t gtid_39308 = sext_i32_i64(ltid_pre_45825);
    int32_t phys_tid_39309 = local_tid_45821;
    int32_t index_primexp_42354 = sext_i64_i32(gtid_39308);
    int32_t defunc_0_f_res_39555 = sdiv32(index_primexp_42354, m_29312);
    int32_t defunc_0_f_res_39556 = smod32(index_primexp_42354, m_29312);
    bool cond_39557 = slt32(defunc_0_f_res_39556, k2p2zq_29179);
    float defunc_0_f_res_39558;
    
    if (cond_39557) {
        int64_t i_39559 = sext_i32_i64(defunc_0_f_res_39555);
        bool x_39560 = sle64((int64_t) 0, i_39559);
        bool y_39561 = slt64(i_39559, i32_res_29181);
        bool bounds_check_39562 = x_39560 && y_39561;
        int64_t j_39563 = sext_i32_i64(defunc_0_f_res_39556);
        bool x_39564 = sle64((int64_t) 0, j_39563);
        bool y_39565 = slt64(j_39563, i32_res_29181);
        bool bounds_check_39566 = x_39564 && y_39565;
        bool index_ok_39567 = bounds_check_39562 && bounds_check_39566;
        bool index_certs_39568;
        
        if (!index_ok_39567) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 0) == -1) {
                    global_failure_args[0] = i_39559;
                    global_failure_args[1] = j_39563;
                    global_failure_args[2] = i32_res_29181;
                    global_failure_args[3] = i32_res_29181;
                    ;
                }
                local_failure = true;
                goto error_0;
            }
        }
        
        float defunc_0_f_res_t_res_39569 = ((__global
                                             float *) defunc_3_map_res_mem_44549)[gtid_39305 *
                                                                                  (i32_res_29181 *
                                                                                   i32_res_29181) +
                                                                                  i_39559 *
                                                                                  i32_res_29181 +
                                                                                  j_39563];
        
        defunc_0_f_res_39558 = defunc_0_f_res_t_res_39569;
    } else {
        int32_t y_39570 = add32(k2p2zq_29179, defunc_0_f_res_39555);
        bool cond_39571 = defunc_0_f_res_39556 == y_39570;
        float defunc_0_f_res_f_res_39572;
        
        if (cond_39571) {
            defunc_0_f_res_f_res_39572 = 1.0F;
        } else {
            defunc_0_f_res_f_res_39572 = 0.0F;
        }
        defunc_0_f_res_39558 = defunc_0_f_res_f_res_39572;
    }
    ((__local float *) mem_44553)[gtid_39308] = defunc_0_f_res_39558;
    
  error_0:
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_failure)
        return;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_44563;
    
    mem_44563 = (__local char *) mem_44563_backing_1;
    for (int32_t i_39574 = 0; i_39574 < k2p2zq_29179; i_39574++) {
        int64_t i32_res_39576 = sext_i32_i64(i_39574);
        bool x_39577 = sle64((int64_t) 0, i32_res_39576);
        bool y_39578 = slt64(i32_res_39576, nm_29314);
        bool bounds_check_39579 = x_39577 && y_39578;
        bool index_certs_39580;
        
        if (!bounds_check_39579) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 1) == -1) {
                    global_failure_args[0] = i32_res_39576;
                    global_failure_args[1] = nm_29314;
                    ;
                }
                local_failure = true;
                goto error_1;
            }
        }
        
        float v1_39581 = ((__local float *) mem_44553)[i32_res_39576];
        bool cond_39582 = v1_39581 == 0.0F;
        int64_t gtid_39329 = sext_i32_i64(ltid_pre_45825);
        int32_t phys_tid_39330 = local_tid_45821;
        int32_t defunc_0_f_res_39585 = sext_i64_i32(gtid_39329);
        int32_t defunc_0_f_res_39586 = sdiv32(defunc_0_f_res_39585, m_29312);
        int32_t defunc_0_f_res_39587 = smod32(defunc_0_f_res_39585, m_29312);
        float defunc_0_f_res_39588;
        
        if (cond_39582) {
            int32_t x_39589 = mul32(m_29312, defunc_0_f_res_39586);
            int32_t i32_arg_39590 = add32(defunc_0_f_res_39587, x_39589);
            int64_t i32_res_39591 = sext_i32_i64(i32_arg_39590);
            bool x_39592 = sle64((int64_t) 0, i32_res_39591);
            bool y_39593 = slt64(i32_res_39591, nm_29314);
            bool bounds_check_39594 = x_39592 && y_39593;
            bool index_certs_39595;
            
            if (!bounds_check_39594) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 2) ==
                        -1) {
                        global_failure_args[0] = i32_res_39591;
                        global_failure_args[1] = nm_29314;
                        ;
                    }
                    local_failure = true;
                    goto error_1;
                }
            }
            
            float defunc_0_f_res_t_res_39596 = ((__local
                                                 float *) mem_44553)[i32_res_39591];
            
            defunc_0_f_res_39588 = defunc_0_f_res_t_res_39596;
        } else {
            int64_t i32_res_39597 = sext_i32_i64(defunc_0_f_res_39587);
            bool x_39598 = sle64((int64_t) 0, i32_res_39597);
            bool y_39599 = slt64(i32_res_39597, nm_29314);
            bool bounds_check_39600 = x_39598 && y_39599;
            bool index_certs_39601;
            
            if (!bounds_check_39600) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 3) ==
                        -1) {
                        global_failure_args[0] = i32_res_39597;
                        global_failure_args[1] = nm_29314;
                        ;
                    }
                    local_failure = true;
                    goto error_1;
                }
            }
            
            float x_39602 = ((__local float *) mem_44553)[i32_res_39597];
            float x_39603 = x_39602 / v1_39581;
            int32_t y_39604 = sub32(k2p2zq_29179, 1);
            bool cond_39605 = slt32(defunc_0_f_res_39586, y_39604);
            float defunc_0_f_res_f_res_39606;
            
            if (cond_39605) {
                int32_t x_39607 = add32(1, defunc_0_f_res_39586);
                int32_t x_39608 = mul32(m_29312, x_39607);
                int32_t i32_arg_39609 = add32(defunc_0_f_res_39587, x_39608);
                int64_t i32_res_39610 = sext_i32_i64(i32_arg_39609);
                bool x_39611 = sle64((int64_t) 0, i32_res_39610);
                bool y_39612 = slt64(i32_res_39610, nm_29314);
                bool bounds_check_39613 = x_39611 && y_39612;
                bool index_certs_39614;
                
                if (!bounds_check_39613) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 4) ==
                            -1) {
                            global_failure_args[0] = i32_res_39610;
                            global_failure_args[1] = nm_29314;
                            ;
                        }
                        local_failure = true;
                        goto error_1;
                    }
                }
                
                float x_39615 = ((__local float *) mem_44553)[i32_res_39610];
                int32_t i32_arg_39616 = add32(i_39574, x_39608);
                int64_t i32_res_39617 = sext_i32_i64(i32_arg_39616);
                bool x_39618 = sle64((int64_t) 0, i32_res_39617);
                bool y_39619 = slt64(i32_res_39617, nm_29314);
                bool bounds_check_39620 = x_39618 && y_39619;
                bool index_certs_39621;
                
                if (!bounds_check_39620) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 5) ==
                            -1) {
                            global_failure_args[0] = i32_res_39617;
                            global_failure_args[1] = nm_29314;
                            ;
                        }
                        local_failure = true;
                        goto error_1;
                    }
                }
                
                float x_39622 = ((__local float *) mem_44553)[i32_res_39617];
                float y_39623 = x_39603 * x_39622;
                float defunc_0_f_res_f_res_t_res_39624 = x_39615 - y_39623;
                
                defunc_0_f_res_f_res_39606 = defunc_0_f_res_f_res_t_res_39624;
            } else {
                defunc_0_f_res_f_res_39606 = x_39603;
            }
            defunc_0_f_res_39588 = defunc_0_f_res_f_res_39606;
        }
        ((__local float *) mem_44563)[gtid_39329] = defunc_0_f_res_39588;
        
      error_1:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int64_t write_i_39372 = sext_i32_i64(ltid_pre_45825);
        int32_t phys_tid_39373 = local_tid_45821;
        float write_value_39627 = ((__local float *) mem_44563)[write_i_39372];
        
        if (sle64((int64_t) 0, write_i_39372) && slt64(write_i_39372,
                                                       nm_29314)) {
            ((__local float *) mem_44553)[write_i_39372] = write_value_39627;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    for (int64_t i_45827 = 0; i_45827 < sdiv_up64(i32_res_29181 *
                                                  i32_res_29181 -
                                                  sext_i32_i64(local_tid_45821),
                                                  nm_29314); i_45827++) {
        ((__global float *) mem_44573)[gtid_39305 * (i32_res_29181 *
                                                     i32_res_29181) +
                                       squot64(i_45827 * nm_29314 +
                                               sext_i32_i64(local_tid_45821),
                                               i32_res_29181) * i32_res_29181 +
                                       (i_45827 * nm_29314 +
                                        sext_i32_i64(local_tid_45821) -
                                        squot64(i_45827 * nm_29314 +
                                                sext_i32_i64(local_tid_45821),
                                                i32_res_29181) *
                                        i32_res_29181)] = ((__local
                                                            float *) mem_44553)[i32_res_29181 +
                                                                                (squot64(i_45827 *
                                                                                         nm_29314 +
                                                                                         sext_i32_i64(local_tid_45821),
                                                                                         i32_res_29181) *
                                                                                 i32_res_29329 +
                                                                                 (i_45827 *
                                                                                  nm_29314 +
                                                                                  sext_i32_i64(local_tid_45821) -
                                                                                  squot64(i_45827 *
                                                                                          nm_29314 +
                                                                                          sext_i32_i64(local_tid_45821),
                                                                                          i32_res_29181) *
                                                                                  i32_res_29181))];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
  error_3:
    return;
}
__kernel void mainzisegmap_intragroup_39701(__global int *global_failure,
                                            int failure_is_an_option, __global
                                            int64_t *global_failure_args,
                                            __local volatile
                                            int64_t *mem_44594_backing_aligned_0,
                                            int64_t m_29166,
                                            int32_t k2p2zq_29179,
                                            int32_t m_29312, int64_t nm_29314,
                                            int32_t i_40240,
                                            int64_t i32_res_40242,
                                            int64_t ctx_param_ext_44580,
                                            int64_t ctx_param_ext_44581,
                                            int64_t ctx_param_ext_44583,
                                            __global
                                            unsigned char *mem_param_44585,
                                            __global unsigned char *mem_44590,
                                            __global unsigned char *mem_44598)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44594_backing_0 = (__local volatile
                                                           char *) mem_44594_backing_aligned_0;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_45854;
    int32_t local_tid_45855;
    int64_t group_sizze_45858;
    int32_t wave_sizze_45857;
    int32_t group_tid_45856;
    
    global_tid_45854 = get_global_id(0);
    local_tid_45855 = get_local_id(0);
    group_sizze_45858 = get_local_size(0);
    wave_sizze_45857 = LOCKSTEP_WIDTH;
    group_tid_45856 = get_group_id(0);
    
    int32_t phys_tid_39701;
    
    phys_tid_39701 = group_tid_45856;
    
    int32_t ltid_pre_45859;
    
    ltid_pre_45859 = local_tid_45855;
    
    int64_t gtid_39653;
    
    gtid_39653 = sext_i32_i64(group_tid_45856);
    
    float v1_40259 = ((__global float *) mem_param_44585)[ctx_param_ext_44580 +
                                                          (gtid_39653 *
                                                           ctx_param_ext_44581 +
                                                           i32_res_40242 *
                                                           ctx_param_ext_44583)];
    bool cond_40260 = v1_40259 == 0.0F;
    __local char *mem_44594;
    
    mem_44594 = (__local char *) mem_44594_backing_0;
    
    int64_t gtid_39656 = sext_i32_i64(ltid_pre_45859);
    int32_t phys_tid_39657 = local_tid_45855;
    int32_t defunc_0_f_res_40263 = sext_i64_i32(gtid_39656);
    int32_t defunc_0_f_res_40264 = sdiv32(defunc_0_f_res_40263, m_29312);
    int32_t defunc_0_f_res_40265 = smod32(defunc_0_f_res_40263, m_29312);
    float defunc_0_f_res_40266;
    
    if (cond_40260) {
        int32_t x_40267 = mul32(m_29312, defunc_0_f_res_40264);
        int32_t i32_arg_40268 = add32(defunc_0_f_res_40265, x_40267);
        int64_t i32_res_40269 = sext_i32_i64(i32_arg_40268);
        bool x_40270 = sle64((int64_t) 0, i32_res_40269);
        bool y_40271 = slt64(i32_res_40269, nm_29314);
        bool bounds_check_40272 = x_40270 && y_40271;
        bool index_certs_40273;
        
        if (!bounds_check_40272) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 7) == -1) {
                    global_failure_args[0] = i32_res_40269;
                    global_failure_args[1] = nm_29314;
                    ;
                }
                local_failure = true;
                goto error_0;
            }
        }
        
        float defunc_0_f_res_t_res_40274 = ((__global
                                             float *) mem_param_44585)[ctx_param_ext_44580 +
                                                                       (gtid_39653 *
                                                                        ctx_param_ext_44581 +
                                                                        i32_res_40269 *
                                                                        ctx_param_ext_44583)];
        
        defunc_0_f_res_40266 = defunc_0_f_res_t_res_40274;
    } else {
        int64_t i32_res_40275 = sext_i32_i64(defunc_0_f_res_40265);
        bool x_40276 = sle64((int64_t) 0, i32_res_40275);
        bool y_40277 = slt64(i32_res_40275, nm_29314);
        bool bounds_check_40278 = x_40276 && y_40277;
        bool index_certs_40279;
        
        if (!bounds_check_40278) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 8) == -1) {
                    global_failure_args[0] = i32_res_40275;
                    global_failure_args[1] = nm_29314;
                    ;
                }
                local_failure = true;
                goto error_0;
            }
        }
        
        float x_40280 = ((__global
                          float *) mem_param_44585)[ctx_param_ext_44580 +
                                                    (gtid_39653 *
                                                     ctx_param_ext_44581 +
                                                     i32_res_40275 *
                                                     ctx_param_ext_44583)];
        float x_40281 = x_40280 / v1_40259;
        int32_t y_40282 = sub32(k2p2zq_29179, 1);
        bool cond_40283 = slt32(defunc_0_f_res_40264, y_40282);
        float defunc_0_f_res_f_res_40284;
        
        if (cond_40283) {
            int32_t x_40285 = add32(1, defunc_0_f_res_40264);
            int32_t x_40286 = mul32(m_29312, x_40285);
            int32_t i32_arg_40287 = add32(defunc_0_f_res_40265, x_40286);
            int64_t i32_res_40288 = sext_i32_i64(i32_arg_40287);
            bool x_40289 = sle64((int64_t) 0, i32_res_40288);
            bool y_40290 = slt64(i32_res_40288, nm_29314);
            bool bounds_check_40291 = x_40289 && y_40290;
            bool index_certs_40292;
            
            if (!bounds_check_40291) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 9) ==
                        -1) {
                        global_failure_args[0] = i32_res_40288;
                        global_failure_args[1] = nm_29314;
                        ;
                    }
                    local_failure = true;
                    goto error_0;
                }
            }
            
            float x_40293 = ((__global
                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                        (gtid_39653 *
                                                         ctx_param_ext_44581 +
                                                         i32_res_40288 *
                                                         ctx_param_ext_44583)];
            int32_t i32_arg_40294 = add32(i_40240, x_40286);
            int64_t i32_res_40295 = sext_i32_i64(i32_arg_40294);
            bool x_40296 = sle64((int64_t) 0, i32_res_40295);
            bool y_40297 = slt64(i32_res_40295, nm_29314);
            bool bounds_check_40298 = x_40296 && y_40297;
            bool index_certs_40299;
            
            if (!bounds_check_40298) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 10) ==
                        -1) {
                        global_failure_args[0] = i32_res_40295;
                        global_failure_args[1] = nm_29314;
                        ;
                    }
                    local_failure = true;
                    goto error_0;
                }
            }
            
            float x_40300 = ((__global
                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                        (gtid_39653 *
                                                         ctx_param_ext_44581 +
                                                         i32_res_40295 *
                                                         ctx_param_ext_44583)];
            float y_40301 = x_40281 * x_40300;
            float defunc_0_f_res_f_res_t_res_40302 = x_40293 - y_40301;
            
            defunc_0_f_res_f_res_40284 = defunc_0_f_res_f_res_t_res_40302;
        } else {
            defunc_0_f_res_f_res_40284 = x_40281;
        }
        defunc_0_f_res_40266 = defunc_0_f_res_f_res_40284;
    }
    ((__local float *) mem_44594)[gtid_39656] = defunc_0_f_res_40266;
    
  error_0:
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_failure)
        return;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t write_i_39699 = sext_i32_i64(ltid_pre_45859);
    int32_t phys_tid_39700 = local_tid_45855;
    float write_value_40305 = ((__local float *) mem_44594)[write_i_39699];
    
    if (sle64((int64_t) 0, write_i_39699) && slt64(write_i_39699, nm_29314)) {
        ((__global float *) mem_44590)[gtid_39653 + write_i_39699 * m_29166] =
            write_value_40305;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_tid_45855 == 0) {
        for (int64_t i_45860 = 0; i_45860 < nm_29314; i_45860++) {
            ((__global float *) mem_44598)[gtid_39653 * nm_29314 + i_45860] =
                ((__global float *) mem_44590)[gtid_39653 + i_45860 * m_29166];
        }
    }
    
  error_2:
    return;
}
__kernel void mainzisegmap_intragroup_40832(__global int *global_failure,
                                            __local volatile
                                            int64_t *mem_45150_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_45148_backing_aligned_1,
                                            __local volatile
                                            int64_t *mem_45146_backing_aligned_2,
                                            __local volatile
                                            int64_t *mem_45144_backing_aligned_3,
                                            int64_t N_29165, int64_t i_29469,
                                            __global
                                            unsigned char *images_mem_44381,
                                            __global
                                            unsigned char *defunc_3_map_res_mem_45140,
                                            __global unsigned char *mem_45153,
                                            __global unsigned char *mem_45156,
                                            __global unsigned char *mem_45159)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_45150_backing_3 = (__local volatile
                                                           char *) mem_45150_backing_aligned_0;
    __local volatile char *restrict mem_45148_backing_2 = (__local volatile
                                                           char *) mem_45148_backing_aligned_1;
    __local volatile char *restrict mem_45146_backing_1 = (__local volatile
                                                           char *) mem_45146_backing_aligned_2;
    __local volatile char *restrict mem_45144_backing_0 = (__local volatile
                                                           char *) mem_45144_backing_aligned_3;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46236;
    int32_t local_tid_46237;
    int64_t group_sizze_46240;
    int32_t wave_sizze_46239;
    int32_t group_tid_46238;
    
    global_tid_46236 = get_global_id(0);
    local_tid_46237 = get_local_id(0);
    group_sizze_46240 = get_local_size(0);
    wave_sizze_46239 = LOCKSTEP_WIDTH;
    group_tid_46238 = get_group_id(0);
    
    int32_t phys_tid_40832;
    
    phys_tid_40832 = group_tid_46238;
    
    int32_t ltid_pre_46241;
    
    ltid_pre_46241 = local_tid_46237;
    
    int64_t gtid_40825;
    
    gtid_40825 = sext_i32_i64(group_tid_46238);
    
    __local char *mem_45144;
    
    mem_45144 = (__local char *) mem_45144_backing_0;
    
    __local char *mem_45146;
    
    mem_45146 = (__local char *) mem_45146_backing_1;
    
    int64_t gtid_40828 = sext_i32_i64(ltid_pre_46241);
    int32_t phys_tid_40829 = local_tid_46237;
    float x_40921 = ((__global float *) images_mem_44381)[gtid_40825 * N_29165 +
                                                          gtid_40828];
    bool isnan_res_40923;
    
    isnan_res_40923 = futrts_isnan32(x_40921);
    
    bool cond_40924 = !isnan_res_40923;
    float defunc_1_f_res_40925;
    
    if (cond_40924) {
        float x_40922 = ((__global
                          float *) defunc_3_map_res_mem_45140)[gtid_40825 *
                                                               N_29165 +
                                                               gtid_40828];
        float defunc_1_f_res_t_res_40926 = x_40921 - x_40922;
        
        defunc_1_f_res_40925 = defunc_1_f_res_t_res_40926;
    } else {
        defunc_1_f_res_40925 = NAN;
    }
    
    bool isnan_res_40927;
    
    isnan_res_40927 = futrts_isnan32(defunc_1_f_res_40925);
    
    bool defunc_0_p_res_40928 = !isnan_res_40927;
    int64_t defunc_0_f_res_40929 = btoi_bool_i64(defunc_0_p_res_40928);
    
    ((__local int64_t *) mem_45144)[gtid_40828] = defunc_0_f_res_40929;
    ((__local float *) mem_45146)[gtid_40828] = defunc_1_f_res_40925;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t dims_flat_46242;
    
    dims_flat_46242 = N_29165;
    
    int64_t x_40918;
    int64_t x_40919;
    int64_t x_46244;
    int64_t x_46245;
    bool ltid_in_bounds_46247;
    
    ltid_in_bounds_46247 = slt64(sext_i32_i64(local_tid_46237), N_29165);
    
    int32_t skip_threads_46248;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46247) {
            x_40919 = ((volatile __local
                        int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)];
            if ((local_tid_46237 - squot32(local_tid_46237, 32) * 32) == 0) {
                x_40918 = x_40919;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46248 = 1;
        while (slt32(skip_threads_46248, 32)) {
            if (sle32(skip_threads_46248, local_tid_46237 -
                      squot32(local_tid_46237, 32) * 32) &&
                ltid_in_bounds_46247) {
                // read operands
                {
                    x_40918 = ((volatile __local
                                int64_t *) mem_45144)[sext_i32_i64(local_tid_46237) -
                                                      sext_i32_i64(skip_threads_46248)];
                }
                // perform operation
                {
                    bool inactive_46249 =
                         slt64(srem64(sext_i32_i64(local_tid_46237), N_29165),
                               sext_i32_i64(local_tid_46237) -
                               sext_i32_i64(local_tid_46237 -
                               skip_threads_46248));
                    
                    if (inactive_46249) {
                        x_40918 = x_40919;
                    }
                    if (!inactive_46249) {
                        int64_t defunc_1_op_res_40920 = add64(x_40918, x_40919);
                        
                        x_40918 = defunc_1_op_res_40920;
                    }
                }
            }
            if (sle32(wave_sizze_46239, skip_threads_46248)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46248, local_tid_46237 -
                      squot32(local_tid_46237, 32) * 32) &&
                ltid_in_bounds_46247) {
                // write result
                {
                    ((volatile __local
                      int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] =
                        x_40918;
                    x_40919 = x_40918;
                }
            }
            if (sle32(wave_sizze_46239, skip_threads_46248)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46248 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46237 - squot32(local_tid_46237, 32) * 32) == 31 &&
            ltid_in_bounds_46247) {
            ((volatile __local
              int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46237,
                                                         32))] = x_40918;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46250;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46237, 32) == 0 && ltid_in_bounds_46247) {
                x_46245 = ((volatile __local
                            int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)];
                if ((local_tid_46237 - squot32(local_tid_46237, 32) * 32) ==
                    0) {
                    x_46244 = x_46245;
                }
            }
        }
        // in-block scan (hopefully no barriers needed)
        {
            skip_threads_46250 = 1;
            while (slt32(skip_threads_46250, 32)) {
                if (sle32(skip_threads_46250, local_tid_46237 -
                          squot32(local_tid_46237, 32) * 32) &&
                    (squot32(local_tid_46237, 32) == 0 &&
                     ltid_in_bounds_46247)) {
                    // read operands
                    {
                        x_46244 = ((volatile __local
                                    int64_t *) mem_45144)[sext_i32_i64(local_tid_46237) -
                                                          sext_i32_i64(skip_threads_46250)];
                    }
                    // perform operation
                    {
                        bool inactive_46251 =
                             slt64(srem64(sext_i32_i64(local_tid_46237 * 32 +
                                          32 - 1), N_29165),
                                   sext_i32_i64(local_tid_46237 * 32 + 32 - 1) -
                                   sext_i32_i64((local_tid_46237 -
                                                 skip_threads_46250) * 32 + 32 -
                                   1));
                        
                        if (inactive_46251) {
                            x_46244 = x_46245;
                        }
                        if (!inactive_46251) {
                            int64_t defunc_1_op_res_46246 = add64(x_46244,
                                                                  x_46245);
                            
                            x_46244 = defunc_1_op_res_46246;
                        }
                    }
                }
                if (sle32(wave_sizze_46239, skip_threads_46250)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                if (sle32(skip_threads_46250, local_tid_46237 -
                          squot32(local_tid_46237, 32) * 32) &&
                    (squot32(local_tid_46237, 32) == 0 &&
                     ltid_in_bounds_46247)) {
                    // write result
                    {
                        ((volatile __local
                          int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] =
                            x_46244;
                        x_46245 = x_46244;
                    }
                }
                if (sle32(wave_sizze_46239, skip_threads_46250)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                skip_threads_46250 *= 2;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // carry-in for every block except the first
    {
        if (!(squot32(local_tid_46237, 32) == 0 || !ltid_in_bounds_46247)) {
            // read operands
            {
                x_40919 = x_40918;
                x_40918 = ((__local
                            int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46237,
                                                                       32)) -
                                                  (int64_t) 1];
            }
            // perform operation
            {
                bool inactive_46252 =
                     slt64(srem64(sext_i32_i64(local_tid_46237), N_29165),
                           sext_i32_i64(local_tid_46237) -
                           sext_i32_i64(squot32(local_tid_46237, 32) * 32 - 1));
                
                if (inactive_46252) {
                    x_40918 = x_40919;
                }
                if (!inactive_46252) {
                    int64_t defunc_1_op_res_40920 = add64(x_40918, x_40919);
                    
                    x_40918 = defunc_1_op_res_40920;
                }
            }
            // write final result
            {
                ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] =
                    x_40918;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // restore correct values for first block
    {
        if (squot32(local_tid_46237, 32) == 0) {
            ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] =
                x_40919;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t last_res_40930 = ((__local int64_t *) mem_45144)[i_29469];
    int32_t defunc_0_f_res_40931 = sext_i64_i32(last_res_40930);
    __local char *mem_45148;
    
    mem_45148 = (__local char *) mem_45148_backing_2;
    ((__local float *) mem_45148)[sext_i32_i64(local_tid_46237)] = NAN;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_45150;
    
    mem_45150 = (__local char *) mem_45150_backing_3;
    ((__local int32_t *) mem_45150)[sext_i32_i64(local_tid_46237)] = 0;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t write_i_40830 = sext_i32_i64(ltid_pre_46241);
    int32_t phys_tid_40831 = local_tid_46237;
    float x_40936 = ((__local float *) mem_45146)[write_i_40830];
    int32_t index_primexp_42374 = sext_i64_i32(write_i_40830);
    bool isnan_res_40939;
    
    isnan_res_40939 = futrts_isnan32(x_40936);
    
    bool defunc_0_p_res_40940 = !isnan_res_40939;
    int64_t defunc_1_f_res_40941;
    
    if (defunc_0_p_res_40940) {
        int64_t x_40937 = ((__local int64_t *) mem_45144)[write_i_40830];
        int64_t defunc_1_f_res_t_res_40942 = sub64(x_40937, (int64_t) 1);
        
        defunc_1_f_res_40941 = defunc_1_f_res_t_res_40942;
    } else {
        defunc_1_f_res_40941 = (int64_t) -1;
    }
    if (sle64((int64_t) 0, defunc_1_f_res_40941) && slt64(defunc_1_f_res_40941,
                                                          N_29165)) {
        ((__local int32_t *) mem_45150)[defunc_1_f_res_40941] =
            index_primexp_42374;
    }
    if (sle64((int64_t) 0, defunc_1_f_res_40941) && slt64(defunc_1_f_res_40941,
                                                          N_29165)) {
        ((__local float *) mem_45148)[defunc_1_f_res_40941] = x_40936;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_tid_46237 == 0) {
        ((__global int32_t *) mem_45153)[gtid_40825] = defunc_0_f_res_40931;
    }
    ((__global float *) mem_45156)[gtid_40825 * N_29165 +
                                   sext_i32_i64(local_tid_46237)] = ((__local
                                                                      float *) mem_45148)[sext_i32_i64(local_tid_46237)];
    barrier(CLK_LOCAL_MEM_FENCE);
    ((__global int32_t *) mem_45159)[gtid_40825 * N_29165 +
                                     sext_i32_i64(local_tid_46237)] = ((__local
                                                                        int32_t *) mem_45150)[sext_i32_i64(local_tid_46237)];
    barrier(CLK_LOCAL_MEM_FENCE);
    
  error_2:
    return;
}
__kernel void mainzisegmap_intragroup_41172(__global int *global_failure,
                                            int failure_is_an_option, __global
                                            int64_t *global_failure_args,
                                            __local volatile
                                            int64_t *red_arr_mem_46361_backing_aligned_0,
                                            __local volatile
                                            int64_t *red_arr_mem_46357_backing_aligned_1,
                                            int64_t N_29165, float hfrac_29171,
                                            int64_t i32_res_29175,
                                            int32_t k2p2_29177, __global
                                            unsigned char *images_mem_44381,
                                            __global
                                            unsigned char *defunc_4_map_res_mem_45178,
                                            __global unsigned char *mem_45225,
                                            __global unsigned char *mem_45227,
                                            __global unsigned char *mem_45229)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46361_backing_1 =
                          (__local volatile
                           char *) red_arr_mem_46361_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46357_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46357_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46351;
    int32_t local_tid_46352;
    int64_t group_sizze_46355;
    int32_t wave_sizze_46354;
    int32_t group_tid_46353;
    
    global_tid_46351 = get_global_id(0);
    local_tid_46352 = get_local_id(0);
    group_sizze_46355 = get_local_size(0);
    wave_sizze_46354 = LOCKSTEP_WIDTH;
    group_tid_46353 = get_group_id(0);
    
    int32_t phys_tid_41172;
    
    phys_tid_41172 = group_tid_46353;
    
    int32_t ltid_pre_46356;
    
    ltid_pre_46356 = local_tid_46352;
    
    int64_t gtid_41165;
    
    gtid_41165 = sext_i32_i64(group_tid_46353);
    
    int32_t defunc_0_f_res_41257;
    int64_t gtid_41168 = sext_i32_i64(ltid_pre_46356);
    int32_t phys_tid_41169 = local_tid_46352;
    __local char *red_arr_mem_46357;
    
    red_arr_mem_46357 = (__local char *) red_arr_mem_46357_backing_0;
    
    float x_41261;
    
    x_41261 = ((__global float *) images_mem_44381)[gtid_41165 * N_29165 +
                                                    gtid_41168];
    
    bool isnan_res_41262;
    
    isnan_res_41262 = futrts_isnan32(x_41261);
    
    bool cond_41263 = !isnan_res_41262;
    int32_t defunc_0_f_res_41264 = btoi_bool_i32(cond_41263);
    
    ((__local int32_t *) red_arr_mem_46357)[gtid_41168] = defunc_0_f_res_41264;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t offset_46359;
    int32_t skip_waves_46360;
    
    skip_waves_46360 = 1;
    
    int32_t x_41258;
    int32_t x_41259;
    
    offset_46359 = 0;
    // participating threads read initial accumulator
    {
        if (slt32(local_tid_46352, sext_i64_i32(i32_res_29175))) {
            x_41258 = ((__local
                        int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352 +
                                                      offset_46359)];
        }
    }
    offset_46359 = 1;
    while (slt32(offset_46359, wave_sizze_46354)) {
        if (slt32(local_tid_46352 + offset_46359,
                  sext_i64_i32(i32_res_29175)) && ((local_tid_46352 -
                                                    squot32(local_tid_46352,
                                                            wave_sizze_46354) *
                                                    wave_sizze_46354) & (2 *
                                                                         offset_46359 -
                                                                         1)) ==
            0) {
            // read array element
            {
                x_41259 = ((volatile __local
                            int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352 +
                                                          offset_46359)];
            }
            // apply reduction operation
            {
                int32_t defunc_1_op_res_41260 = add32(x_41258, x_41259);
                
                x_41258 = defunc_1_op_res_41260;
            }
            // write result of operation
            {
                ((volatile __local
                  int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352)] =
                    x_41258;
            }
        }
        offset_46359 *= 2;
    }
    while (slt32(skip_waves_46360, squot32(sext_i64_i32(i32_res_29175) +
                                           wave_sizze_46354 - 1,
                                           wave_sizze_46354))) {
        barrier(CLK_LOCAL_MEM_FENCE);
        offset_46359 = skip_waves_46360 * wave_sizze_46354;
        if (slt32(local_tid_46352 + offset_46359,
                  sext_i64_i32(i32_res_29175)) && ((local_tid_46352 -
                                                    squot32(local_tid_46352,
                                                            wave_sizze_46354) *
                                                    wave_sizze_46354) == 0 &&
                                                   (squot32(local_tid_46352,
                                                            wave_sizze_46354) &
                                                    (2 * skip_waves_46360 -
                                                     1)) == 0)) {
            // read array element
            {
                x_41259 = ((__local
                            int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352 +
                                                          offset_46359)];
            }
            // apply reduction operation
            {
                int32_t defunc_1_op_res_41260 = add32(x_41258, x_41259);
                
                x_41258 = defunc_1_op_res_41260;
            }
            // write result of operation
            {
                ((__local
                  int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352)] =
                    x_41258;
            }
        }
        skip_waves_46360 *= 2;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    defunc_0_f_res_41257 = ((__local int32_t *) red_arr_mem_46357)[(int64_t) 0];
    
    float defunc_0_f_res_41265;
    int64_t gtid_41170 = sext_i32_i64(ltid_pre_46356);
    int32_t phys_tid_41171 = local_tid_46352;
    __local char *red_arr_mem_46361;
    
    red_arr_mem_46361 = (__local char *) red_arr_mem_46361_backing_1;
    
    int32_t index_primexp_42382;
    
    index_primexp_42382 = sext_i64_i32(gtid_41170);
    
    bool cond_41270 = slt32(index_primexp_42382, defunc_0_f_res_41257);
    float defunc_0_f_res_41271;
    
    if (cond_41270) {
        int64_t i_41272 = sext_i32_i64(index_primexp_42382);
        bool x_41273 = sle64((int64_t) 0, i_41272);
        bool y_41274 = slt64(i_41272, N_29165);
        bool bounds_check_41275 = x_41273 && y_41274;
        bool index_certs_41276;
        
        if (!bounds_check_41275) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 17) == -1) {
                    global_failure_args[0] = i_41272;
                    global_failure_args[1] = N_29165;
                    ;
                }
                local_failure = true;
                goto error_2;
            }
        }
        
        float defunc_0_f_res_t_res_41277 = ((__global
                                             float *) defunc_4_map_res_mem_45178)[gtid_41165 *
                                                                                  N_29165 +
                                                                                  i_41272];
        
        defunc_0_f_res_41271 = defunc_0_f_res_t_res_41277;
    } else {
        defunc_0_f_res_41271 = 0.0F;
    }
    
    float defunc_0_f_res_41278 = defunc_0_f_res_41271 * defunc_0_f_res_41271;
    
    ((__local float *) red_arr_mem_46361)[gtid_41170] = defunc_0_f_res_41278;
    
  error_2:
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_failure)
        return;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t offset_46363;
    int32_t skip_waves_46364;
    
    skip_waves_46364 = 1;
    
    float x_41266;
    float x_41267;
    
    offset_46363 = 0;
    // participating threads read initial accumulator
    {
        if (slt32(local_tid_46352, sext_i64_i32(i32_res_29175))) {
            x_41266 = ((__local
                        float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352 +
                                                    offset_46363)];
        }
    }
    offset_46363 = 1;
    while (slt32(offset_46363, wave_sizze_46354)) {
        if (slt32(local_tid_46352 + offset_46363,
                  sext_i64_i32(i32_res_29175)) && ((local_tid_46352 -
                                                    squot32(local_tid_46352,
                                                            wave_sizze_46354) *
                                                    wave_sizze_46354) & (2 *
                                                                         offset_46363 -
                                                                         1)) ==
            0) {
            // read array element
            {
                x_41267 = ((volatile __local
                            float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352 +
                                                        offset_46363)];
            }
            // apply reduction operation
            {
                float defunc_1_op_res_41268 = x_41266 + x_41267;
                
                x_41266 = defunc_1_op_res_41268;
            }
            // write result of operation
            {
                ((volatile __local
                  float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352)] =
                    x_41266;
            }
        }
        offset_46363 *= 2;
    }
    while (slt32(skip_waves_46364, squot32(sext_i64_i32(i32_res_29175) +
                                           wave_sizze_46354 - 1,
                                           wave_sizze_46354))) {
        barrier(CLK_LOCAL_MEM_FENCE);
        offset_46363 = skip_waves_46364 * wave_sizze_46354;
        if (slt32(local_tid_46352 + offset_46363,
                  sext_i64_i32(i32_res_29175)) && ((local_tid_46352 -
                                                    squot32(local_tid_46352,
                                                            wave_sizze_46354) *
                                                    wave_sizze_46354) == 0 &&
                                                   (squot32(local_tid_46352,
                                                            wave_sizze_46354) &
                                                    (2 * skip_waves_46364 -
                                                     1)) == 0)) {
            // read array element
            {
                x_41267 = ((__local
                            float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352 +
                                                        offset_46363)];
            }
            // apply reduction operation
            {
                float defunc_1_op_res_41268 = x_41266 + x_41267;
                
                x_41266 = defunc_1_op_res_41268;
            }
            // write result of operation
            {
                ((__local
                  float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352)] =
                    x_41266;
            }
        }
        skip_waves_46364 *= 2;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    defunc_0_f_res_41265 = ((__local float *) red_arr_mem_46361)[(int64_t) 0];
    
    int32_t r32_arg_41279 = sub32(defunc_0_f_res_41257, k2p2_29177);
    float i32_res_41280 = sitofp_i32_f32(r32_arg_41279);
    float sqrt_arg_41281 = defunc_0_f_res_41265 / i32_res_41280;
    float sqrt_res_41282;
    
    sqrt_res_41282 = futrts_sqrt32(sqrt_arg_41281);
    
    float i32_res_41283 = sitofp_i32_f32(defunc_0_f_res_41257);
    float t32_arg_41284 = hfrac_29171 * i32_res_41283;
    int32_t f32_res_41285 = fptosi_f32_i32(t32_arg_41284);
    
    if (local_tid_46352 == 0) {
        ((__global int32_t *) mem_45225)[gtid_41165] = f32_res_41285;
    }
    if (local_tid_46352 == 0) {
        ((__global int32_t *) mem_45227)[gtid_41165] = defunc_0_f_res_41257;
    }
    if (local_tid_46352 == 0) {
        ((__global float *) mem_45229)[gtid_41165] = sqrt_res_41282;
    }
    
  error_4:
    return;
}
__kernel void mainzisegmap_intragroup_41640(__global int *global_failure,
                                            int failure_is_an_option, __global
                                            int64_t *global_failure_args,
                                            __local volatile
                                            int64_t *red_arr_mem_46624_backing_aligned_0,
                                            __local volatile
                                            int64_t *red_arr_mem_46622_backing_aligned_1,
                                            __local volatile
                                            int64_t *red_arr_mem_46620_backing_aligned_2,
                                            __local volatile
                                            int64_t *mem_45288_backing_aligned_3,
                                            int64_t N_29165, int32_t n_29169,
                                            int64_t iota32_arg_29597, __global
                                            unsigned char *defunc_4_map_res_mem_45177,
                                            __global
                                            unsigned char *defunc_4_map_res_mem_45178,
                                            __global
                                            unsigned char *defunc_4_map_res_mem_45179,
                                            __global
                                            unsigned char *defunc_3_map_res_mem_45244,
                                            __global
                                            unsigned char *defunc_3_map_res_mem_45245,
                                            __global
                                            unsigned char *defunc_3_map_res_mem_45246,
                                            __global
                                            unsigned char *defunc_0_f_res_mem_45279,
                                            __global unsigned char *mem_45284,
                                            __global unsigned char *mem_45291,
                                            __global unsigned char *mem_45293)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46624_backing_3 =
                          (__local volatile
                           char *) red_arr_mem_46624_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46622_backing_2 =
                          (__local volatile
                           char *) red_arr_mem_46622_backing_aligned_1;
    __local volatile char *restrict red_arr_mem_46620_backing_1 =
                          (__local volatile
                           char *) red_arr_mem_46620_backing_aligned_2;
    __local volatile char *restrict mem_45288_backing_0 = (__local volatile
                                                           char *) mem_45288_backing_aligned_3;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46603;
    int32_t local_tid_46604;
    int64_t group_sizze_46607;
    int32_t wave_sizze_46606;
    int32_t group_tid_46605;
    
    global_tid_46603 = get_global_id(0);
    local_tid_46604 = get_local_id(0);
    group_sizze_46607 = get_local_size(0);
    wave_sizze_46606 = LOCKSTEP_WIDTH;
    group_tid_46605 = get_group_id(0);
    
    int32_t phys_tid_41640;
    
    phys_tid_41640 = group_tid_46605;
    
    int32_t ltid_pre_46608;
    
    ltid_pre_46608 = local_tid_46604;
    
    int64_t gtid_41633;
    
    gtid_41633 = sext_i32_i64(group_tid_46605);
    
    int32_t x_41883;
    
    x_41883 = ((__global int32_t *) defunc_4_map_res_mem_45177)[gtid_41633];
    
    int32_t x_41884 = ((__global
                        int32_t *) defunc_3_map_res_mem_45245)[gtid_41633];
    float x_41885 = ((__global float *) defunc_3_map_res_mem_45246)[gtid_41633];
    int32_t x_41886 = ((__global
                        int32_t *) defunc_3_map_res_mem_45244)[gtid_41633];
    float x_41887 = ((__global float *) defunc_0_f_res_mem_45279)[gtid_41633];
    int32_t y_41890 = sub32(x_41883, x_41884);
    float i32_res_41891 = sitofp_i32_f32(x_41884);
    float sqrt_res_41892;
    
    sqrt_res_41892 = futrts_sqrt32(i32_res_41891);
    
    float y_41893 = x_41885 * sqrt_res_41892;
    __local char *mem_45288;
    
    mem_45288 = (__local char *) mem_45288_backing_0;
    
    int64_t gtid_41636 = sext_i32_i64(ltid_pre_46608);
    int32_t phys_tid_41637 = local_tid_46604;
    int32_t index_primexp_42395 = sext_i64_i32(gtid_41636);
    bool cond_41906 = sle32(y_41890, index_primexp_42395);
    float defunc_0_f_res_41907;
    
    if (cond_41906) {
        defunc_0_f_res_41907 = 0.0F;
    } else {
        bool cond_41908 = index_primexp_42395 == 0;
        float defunc_0_f_res_f_res_41909;
        
        if (cond_41908) {
            defunc_0_f_res_f_res_41909 = x_41887;
        } else {
            int32_t i_41910 = add32(x_41884, index_primexp_42395);
            int64_t i_41911 = sext_i32_i64(i_41910);
            bool x_41912 = sle64((int64_t) 0, i_41911);
            bool y_41913 = slt64(i_41911, N_29165);
            bool bounds_check_41914 = x_41912 && y_41913;
            bool index_certs_41915;
            
            if (!bounds_check_41914) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 25) ==
                        -1) {
                        global_failure_args[0] = i_41911;
                        global_failure_args[1] = N_29165;
                        ;
                    }
                    local_failure = true;
                    goto error_0;
                }
            }
            
            float x_41916 = ((__global
                              float *) defunc_4_map_res_mem_45178)[gtid_41633 *
                                                                   N_29165 +
                                                                   i_41911];
            int32_t x_41917 = sub32(x_41884, x_41886);
            int32_t i_41918 = add32(x_41917, index_primexp_42395);
            int64_t i_41919 = sext_i32_i64(i_41918);
            bool x_41920 = sle64((int64_t) 0, i_41919);
            bool y_41921 = slt64(i_41919, N_29165);
            bool bounds_check_41922 = x_41920 && y_41921;
            bool index_certs_41923;
            
            if (!bounds_check_41922) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 26) ==
                        -1) {
                        global_failure_args[0] = i_41919;
                        global_failure_args[1] = N_29165;
                        ;
                    }
                    local_failure = true;
                    goto error_0;
                }
            }
            
            float y_41924 = ((__global
                              float *) defunc_4_map_res_mem_45178)[gtid_41633 *
                                                                   N_29165 +
                                                                   i_41919];
            float defunc_0_f_res_f_res_f_res_41925 = x_41916 - y_41924;
            
            defunc_0_f_res_f_res_41909 = defunc_0_f_res_f_res_f_res_41925;
        }
        defunc_0_f_res_41907 = defunc_0_f_res_f_res_41909;
    }
    ((__local float *) mem_45288)[gtid_41636] = defunc_0_f_res_41907;
    
  error_0:
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_failure)
        return;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t dims_flat_46609;
    
    dims_flat_46609 = iota32_arg_29597;
    
    float x_41902;
    float x_41903;
    float x_46611;
    float x_46612;
    bool ltid_in_bounds_46614;
    
    ltid_in_bounds_46614 = slt64(sext_i32_i64(local_tid_46604),
                                 iota32_arg_29597);
    
    int32_t skip_threads_46615;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46614) {
            x_41903 = ((volatile __local
                        float *) mem_45288)[sext_i32_i64(local_tid_46604)];
            if ((local_tid_46604 - squot32(local_tid_46604, 32) * 32) == 0) {
                x_41902 = x_41903;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46615 = 1;
        while (slt32(skip_threads_46615, 32)) {
            if (sle32(skip_threads_46615, local_tid_46604 -
                      squot32(local_tid_46604, 32) * 32) &&
                ltid_in_bounds_46614) {
                // read operands
                {
                    x_41902 = ((volatile __local
                                float *) mem_45288)[sext_i32_i64(local_tid_46604) -
                                                    sext_i32_i64(skip_threads_46615)];
                }
                // perform operation
                {
                    bool inactive_46616 =
                         slt64(srem64(sext_i32_i64(local_tid_46604),
                                      iota32_arg_29597),
                               sext_i32_i64(local_tid_46604) -
                               sext_i32_i64(local_tid_46604 -
                               skip_threads_46615));
                    
                    if (inactive_46616) {
                        x_41902 = x_41903;
                    }
                    if (!inactive_46616) {
                        float defunc_1_op_res_41904 = x_41902 + x_41903;
                        
                        x_41902 = defunc_1_op_res_41904;
                    }
                }
            }
            if (sle32(wave_sizze_46606, skip_threads_46615)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46615, local_tid_46604 -
                      squot32(local_tid_46604, 32) * 32) &&
                ltid_in_bounds_46614) {
                // write result
                {
                    ((volatile __local
                      float *) mem_45288)[sext_i32_i64(local_tid_46604)] =
                        x_41902;
                    x_41903 = x_41902;
                }
            }
            if (sle32(wave_sizze_46606, skip_threads_46615)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46615 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46604 - squot32(local_tid_46604, 32) * 32) == 31 &&
            ltid_in_bounds_46614) {
            ((volatile __local
              float *) mem_45288)[sext_i32_i64(squot32(local_tid_46604, 32))] =
                x_41902;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46617;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46604, 32) == 0 && ltid_in_bounds_46614) {
                x_46612 = ((volatile __local
                            float *) mem_45288)[sext_i32_i64(local_tid_46604)];
                if ((local_tid_46604 - squot32(local_tid_46604, 32) * 32) ==
                    0) {
                    x_46611 = x_46612;
                }
            }
        }
        // in-block scan (hopefully no barriers needed)
        {
            skip_threads_46617 = 1;
            while (slt32(skip_threads_46617, 32)) {
                if (sle32(skip_threads_46617, local_tid_46604 -
                          squot32(local_tid_46604, 32) * 32) &&
                    (squot32(local_tid_46604, 32) == 0 &&
                     ltid_in_bounds_46614)) {
                    // read operands
                    {
                        x_46611 = ((volatile __local
                                    float *) mem_45288)[sext_i32_i64(local_tid_46604) -
                                                        sext_i32_i64(skip_threads_46617)];
                    }
                    // perform operation
                    {
                        bool inactive_46618 =
                             slt64(srem64(sext_i32_i64(local_tid_46604 * 32 +
                                          32 - 1), iota32_arg_29597),
                                   sext_i32_i64(local_tid_46604 * 32 + 32 - 1) -
                                   sext_i32_i64((local_tid_46604 -
                                                 skip_threads_46617) * 32 + 32 -
                                   1));
                        
                        if (inactive_46618) {
                            x_46611 = x_46612;
                        }
                        if (!inactive_46618) {
                            float defunc_1_op_res_46613 = x_46611 + x_46612;
                            
                            x_46611 = defunc_1_op_res_46613;
                        }
                    }
                }
                if (sle32(wave_sizze_46606, skip_threads_46617)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                if (sle32(skip_threads_46617, local_tid_46604 -
                          squot32(local_tid_46604, 32) * 32) &&
                    (squot32(local_tid_46604, 32) == 0 &&
                     ltid_in_bounds_46614)) {
                    // write result
                    {
                        ((volatile __local
                          float *) mem_45288)[sext_i32_i64(local_tid_46604)] =
                            x_46611;
                        x_46612 = x_46611;
                    }
                }
                if (sle32(wave_sizze_46606, skip_threads_46617)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                skip_threads_46617 *= 2;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // carry-in for every block except the first
    {
        if (!(squot32(local_tid_46604, 32) == 0 || !ltid_in_bounds_46614)) {
            // read operands
            {
                x_41903 = x_41902;
                x_41902 = ((__local
                            float *) mem_45288)[sext_i32_i64(squot32(local_tid_46604,
                                                                     32)) -
                                                (int64_t) 1];
            }
            // perform operation
            {
                bool inactive_46619 =
                     slt64(srem64(sext_i32_i64(local_tid_46604),
                                  iota32_arg_29597),
                           sext_i32_i64(local_tid_46604) -
                           sext_i32_i64(squot32(local_tid_46604, 32) * 32 - 1));
                
                if (inactive_46619) {
                    x_41902 = x_41903;
                }
                if (!inactive_46619) {
                    float defunc_1_op_res_41904 = x_41902 + x_41903;
                    
                    x_41902 = defunc_1_op_res_41904;
                }
            }
            // write final result
            {
                ((__local float *) mem_45288)[sext_i32_i64(local_tid_46604)] =
                    x_41902;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // restore correct values for first block
    {
        if (squot32(local_tid_46604, 32) == 0) {
            ((__local float *) mem_45288)[sext_i32_i64(local_tid_46604)] =
                x_41903;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    bool acc0_41931;
    int32_t acc0_41932;
    float acc0_41933;
    int64_t gtid_41638 = sext_i32_i64(ltid_pre_46608);
    int32_t phys_tid_41639 = local_tid_46604;
    __local char *red_arr_mem_46620;
    
    red_arr_mem_46620 = (__local char *) red_arr_mem_46620_backing_1;
    
    __local char *red_arr_mem_46622;
    
    red_arr_mem_46622 = (__local char *) red_arr_mem_46622_backing_2;
    
    __local char *red_arr_mem_46624;
    
    red_arr_mem_46624 = (__local char *) red_arr_mem_46624_backing_3;
    
    float x_41948;
    
    x_41948 = ((__local float *) mem_45288)[gtid_41638];
    
    float x_41949 = ((__global float *) mem_45284)[gtid_41638];
    int32_t index_primexp_42398 = sext_i64_i32(gtid_41638);
    float defunc_0_f_res_41952 = x_41948 / y_41893;
    bool cond_41953 = slt32(index_primexp_42398, y_41890);
    bool isnan_res_41954;
    
    isnan_res_41954 = futrts_isnan32(defunc_0_f_res_41952);
    
    bool cond_t_res_41955 = !isnan_res_41954;
    bool x_41956 = cond_41953 && cond_t_res_41955;
    float abs_res_41957 = (float) fabs(defunc_0_f_res_41952);
    bool defunc_2_f_res_t_res_41958 = x_41949 < abs_res_41957;
    bool x_41959 = x_41956 && defunc_2_f_res_t_res_41958;
    float defunc_1_f_res_41960;
    
    if (cond_41953) {
        defunc_1_f_res_41960 = defunc_0_f_res_41952;
    } else {
        defunc_1_f_res_41960 = 0.0F;
    }
    ((__local bool *) red_arr_mem_46620)[gtid_41638] = x_41959;
    ((__local int32_t *) red_arr_mem_46622)[gtid_41638] = index_primexp_42398;
    ((__local float *) red_arr_mem_46624)[gtid_41638] = defunc_1_f_res_41960;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t offset_46626;
    int32_t skip_waves_46627;
    
    skip_waves_46627 = 1;
    
    bool x_41934;
    int32_t x_41935;
    float x_41936;
    bool x_41937;
    int32_t x_41938;
    float x_41939;
    
    offset_46626 = 0;
    // participating threads read initial accumulator
    {
        if (slt32(local_tid_46604, sext_i64_i32(iota32_arg_29597))) {
            x_41934 = ((__local
                        bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604 +
                                                   offset_46626)];
            x_41935 = ((__local
                        int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604 +
                                                      offset_46626)];
            x_41936 = ((__local
                        float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604 +
                                                    offset_46626)];
        }
    }
    offset_46626 = 1;
    while (slt32(offset_46626, wave_sizze_46606)) {
        if (slt32(local_tid_46604 + offset_46626,
                  sext_i64_i32(iota32_arg_29597)) && ((local_tid_46604 -
                                                       squot32(local_tid_46604,
                                                               wave_sizze_46606) *
                                                       wave_sizze_46606) & (2 *
                                                                            offset_46626 -
                                                                            1)) ==
            0) {
            // read array element
            {
                x_41937 = ((volatile __local
                            bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604 +
                                                       offset_46626)];
                x_41938 = ((volatile __local
                            int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604 +
                                                          offset_46626)];
                x_41939 = ((volatile __local
                            float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604 +
                                                        offset_46626)];
            }
            // apply reduction operation
            {
                bool defunc_1_op_res_41940;
                int32_t defunc_1_op_res_41941;
                
                if (x_41934) {
                    defunc_1_op_res_41940 = x_41934;
                    defunc_1_op_res_41941 = x_41935;
                } else {
                    bool x_41942 = x_41937 && x_41937;
                    bool x_41943 = !x_41937;
                    bool y_41944 = x_41934 && x_41943;
                    bool defunc_1_op_res_f_res_41945 = x_41942 || y_41944;
                    int32_t defunc_1_op_res_f_res_41946;
                    
                    if (x_41937) {
                        defunc_1_op_res_f_res_41946 = x_41938;
                    } else {
                        defunc_1_op_res_f_res_41946 = x_41935;
                    }
                    defunc_1_op_res_41940 = defunc_1_op_res_f_res_41945;
                    defunc_1_op_res_41941 = defunc_1_op_res_f_res_41946;
                }
                
                float defunc_1_op_res_41947 = x_41936 + x_41939;
                
                x_41934 = defunc_1_op_res_41940;
                x_41935 = defunc_1_op_res_41941;
                x_41936 = defunc_1_op_res_41947;
            }
            // write result of operation
            {
                ((volatile __local
                  bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604)] =
                    x_41934;
                ((volatile __local
                  int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604)] =
                    x_41935;
                ((volatile __local
                  float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604)] =
                    x_41936;
            }
        }
        offset_46626 *= 2;
    }
    while (slt32(skip_waves_46627, squot32(sext_i64_i32(iota32_arg_29597) +
                                           wave_sizze_46606 - 1,
                                           wave_sizze_46606))) {
        barrier(CLK_LOCAL_MEM_FENCE);
        offset_46626 = skip_waves_46627 * wave_sizze_46606;
        if (slt32(local_tid_46604 + offset_46626,
                  sext_i64_i32(iota32_arg_29597)) && ((local_tid_46604 -
                                                       squot32(local_tid_46604,
                                                               wave_sizze_46606) *
                                                       wave_sizze_46606) == 0 &&
                                                      (squot32(local_tid_46604,
                                                               wave_sizze_46606) &
                                                       (2 * skip_waves_46627 -
                                                        1)) == 0)) {
            // read array element
            {
                x_41937 = ((__local
                            bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604 +
                                                       offset_46626)];
                x_41938 = ((__local
                            int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604 +
                                                          offset_46626)];
                x_41939 = ((__local
                            float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604 +
                                                        offset_46626)];
            }
            // apply reduction operation
            {
                bool defunc_1_op_res_41940;
                int32_t defunc_1_op_res_41941;
                
                if (x_41934) {
                    defunc_1_op_res_41940 = x_41934;
                    defunc_1_op_res_41941 = x_41935;
                } else {
                    bool x_41942 = x_41937 && x_41937;
                    bool x_41943 = !x_41937;
                    bool y_41944 = x_41934 && x_41943;
                    bool defunc_1_op_res_f_res_41945 = x_41942 || y_41944;
                    int32_t defunc_1_op_res_f_res_41946;
                    
                    if (x_41937) {
                        defunc_1_op_res_f_res_41946 = x_41938;
                    } else {
                        defunc_1_op_res_f_res_41946 = x_41935;
                    }
                    defunc_1_op_res_41940 = defunc_1_op_res_f_res_41945;
                    defunc_1_op_res_41941 = defunc_1_op_res_f_res_41946;
                }
                
                float defunc_1_op_res_41947 = x_41936 + x_41939;
                
                x_41934 = defunc_1_op_res_41940;
                x_41935 = defunc_1_op_res_41941;
                x_41936 = defunc_1_op_res_41947;
            }
            // write result of operation
            {
                ((__local
                  bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604)] =
                    x_41934;
                ((__local
                  int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604)] =
                    x_41935;
                ((__local
                  float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604)] =
                    x_41936;
            }
        }
        skip_waves_46627 *= 2;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    acc0_41931 = ((__local bool *) red_arr_mem_46620)[(int64_t) 0];
    acc0_41932 = ((__local int32_t *) red_arr_mem_46622)[(int64_t) 0];
    acc0_41933 = ((__local float *) red_arr_mem_46624)[(int64_t) 0];
    
    bool x_41963 = acc0_41931 && acc0_41931;
    int32_t defunc_1_op_res_f_res_41967;
    
    if (acc0_41931) {
        defunc_1_op_res_f_res_41967 = acc0_41932;
    } else {
        defunc_1_op_res_f_res_41967 = -1;
    }
    
    bool cond_41973 = y_41890 == 0;
    float defunc_0_f_res_41974;
    
    if (cond_41973) {
        defunc_0_f_res_41974 = 0.0F;
    } else {
        float i32_res_41975 = sitofp_i32_f32(y_41890);
        float defunc_0_f_res_f_res_41976 = acc0_41933 / i32_res_41975;
        
        defunc_0_f_res_41974 = defunc_0_f_res_f_res_41976;
    }
    
    bool cond_41977 = !x_41963;
    int32_t fst_breakzq_41978;
    
    if (cond_41977) {
        fst_breakzq_41978 = -1;
    } else {
        bool cond_41979 = slt32(defunc_1_op_res_f_res_41967, y_41890);
        int32_t adjustValInds_res_41980;
        
        if (cond_41979) {
            int32_t i_41981 = add32(x_41884, defunc_1_op_res_f_res_41967);
            int64_t i_41982 = sext_i32_i64(i_41981);
            bool x_41983 = sle64((int64_t) 0, i_41982);
            bool y_41984 = slt64(i_41982, N_29165);
            bool bounds_check_41985 = x_41983 && y_41984;
            bool index_certs_41986;
            
            if (!bounds_check_41985) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 27) ==
                        -1) {
                        global_failure_args[0] = i_41982;
                        global_failure_args[1] = N_29165;
                        ;
                    }
                    local_failure = true;
                    goto error_3;
                }
            }
            
            int32_t x_41987 = ((__global
                                int32_t *) defunc_4_map_res_mem_45179)[gtid_41633 *
                                                                       N_29165 +
                                                                       i_41982];
            int32_t adjustValInds_res_t_res_41988 = sub32(x_41987, n_29169);
            
            adjustValInds_res_41980 = adjustValInds_res_t_res_41988;
        } else {
            adjustValInds_res_41980 = -1;
        }
        fst_breakzq_41978 = adjustValInds_res_41980;
    }
    
    bool cond_41989 = sle32(x_41884, 5);
    bool cond_f_res_41990 = sle32(y_41890, 5);
    bool x_41991 = !cond_41989;
    bool y_41992 = cond_f_res_41990 && x_41991;
    bool cond_41993 = cond_41989 || y_41992;
    int32_t fst_breakzq_41994;
    
    if (cond_41993) {
        fst_breakzq_41994 = -2;
    } else {
        fst_breakzq_41994 = fst_breakzq_41978;
    }
    if (local_tid_46604 == 0) {
        ((__global int32_t *) mem_45291)[gtid_41633] = fst_breakzq_41994;
    }
    if (local_tid_46604 == 0) {
        ((__global float *) mem_45293)[gtid_41633] = defunc_0_f_res_41974;
    }
    
  error_3:
    return;
}
__kernel void mainzisegmap_intragroup_42541(__global int *global_failure,
                                            __local volatile
                                            int64_t *mem_44480_backing_aligned_0,
                                            int64_t m_29166, int32_t n_29169,
                                            int64_t i32_res_29181,
                                            int64_t Ty_42529, int64_t Tx_42530,
                                            int64_t gridDim_x_42531,
                                            int64_t gridDim_y_42532,
                                            int64_t group_sizze_tile3d_42536,
                                            int64_t count_shmem_42537, __global
                                            unsigned char *mem_44393, __global
                                            unsigned char *mem_44397, __global
                                            unsigned char *mem_44468, __global
                                            unsigned char *mem_44528)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44480_backing_0 = (__local volatile
                                                           char *) mem_44480_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45721;
    int32_t local_tid_45722;
    int64_t group_sizze_45725;
    int32_t wave_sizze_45724;
    int32_t group_tid_45723;
    
    global_tid_45721 = get_global_id(0);
    local_tid_45722 = get_local_id(0);
    group_sizze_45725 = get_local_size(0);
    wave_sizze_45724 = LOCKSTEP_WIDTH;
    group_tid_45723 = get_group_id(0);
    
    int32_t gid_flat_42541;
    
    gid_flat_42541 = group_tid_45723;
    
    int32_t ltid_pre_45726;
    
    ltid_pre_45726 = squot32(local_tid_45722, sext_i64_i32(Ty_42529) *
                             sext_i64_i32(Tx_42530));
    
    int32_t ltid_pre_45727;
    
    ltid_pre_45727 = squot32(local_tid_45722 - squot32(local_tid_45722,
                                                       sext_i64_i32(Ty_42529) *
                                                       sext_i64_i32(Tx_42530)) *
                             (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)),
                             sext_i64_i32(Tx_42530));
    
    int32_t ltid_pre_45728;
    
    ltid_pre_45728 = local_tid_45722 - squot32(local_tid_45722,
                                               sext_i64_i32(Ty_42529) *
                                               sext_i64_i32(Tx_42530)) *
        (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)) -
        squot32(local_tid_45722 - squot32(local_tid_45722,
                                          sext_i64_i32(Ty_42529) *
                                          sext_i64_i32(Tx_42530)) *
                (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)),
                sext_i64_i32(Tx_42530)) * sext_i64_i32(Tx_42530);
    
    int32_t ltid_pre_45729;
    
    ltid_pre_45729 = squot32(local_tid_45722, sext_i64_i32(Tx_42530));
    
    int32_t ltid_pre_45730;
    
    ltid_pre_45730 = local_tid_45722 - squot32(local_tid_45722,
                                               sext_i64_i32(Tx_42530)) *
        sext_i64_i32(Tx_42530);
    
    int32_t ltid_pre_45731;
    
    ltid_pre_45731 = local_tid_45722;
    
    int64_t gid_zz_42540;
    
    gid_zz_42540 = squot64(sext_i32_i64(group_tid_45723), gridDim_y_42532 *
                           gridDim_x_42531);
    
    int64_t gid_y_42539;
    
    gid_y_42539 = squot64(sext_i32_i64(group_tid_45723) -
                          squot64(sext_i32_i64(group_tid_45723),
                                  gridDim_y_42532 * gridDim_x_42531) *
                          (gridDim_y_42532 * gridDim_x_42531), gridDim_x_42531);
    
    int64_t gid_x_42538;
    
    gid_x_42538 = sext_i32_i64(group_tid_45723) -
        squot64(sext_i32_i64(group_tid_45723), gridDim_y_42532 *
                gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531) -
        squot64(sext_i32_i64(group_tid_45723) -
                squot64(sext_i32_i64(group_tid_45723), gridDim_y_42532 *
                        gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531),
                gridDim_x_42531) * gridDim_x_42531;
    
    int64_t ii_42542;
    
    ii_42542 = (int64_t) 30 * gid_zz_42540;
    
    int64_t jj1_42543 = Ty_42529 * gid_y_42539;
    int64_t jj2_42544 = Tx_42530 * gid_x_42538;
    float mem_44478[30];
    int64_t ltid_y_42547 = sext_i32_i64(ltid_pre_45729);
    int64_t ltid_x_42545 = sext_i32_i64(ltid_pre_45730);
    int32_t ltid_flat_42546 = local_tid_45722;
    float mem_44472[30];
    
    for (int32_t i_44270 = 0; i_44270 < 30; i_44270++) {
        int64_t i_42555 = sext_i32_i64(i_44270);
        
        mem_44472[i_42555] = 0.0F;
    }
    for (int64_t i_45733 = 0; i_45733 < (int64_t) 30; i_45733++) {
        mem_44478[i_45733] = mem_44472[i_45733];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_44480;
    
    mem_44480 = (__local char *) mem_44480_backing_0;
    
    float loop_mem_44510[30];
    float mem_param_44481[30];
    
    for (int32_t i_1 = 0; i_1 < 30; i_1++)
        mem_param_44481[i_1] = mem_44478[i_1];
    for (int32_t i_44272 = 0; i_44272 < n_29169; i_44272++) {
        int64_t i_42560 = sext_i32_i64(i_44272);
        
        for (int64_t i_42563 = 0; i_42563 < count_shmem_42537; i_42563++) {
            int64_t offs_42576 = group_sizze_tile3d_42536 * i_42563;
            int64_t ltid_42566 = sext_i32_i64(ltid_pre_45731);
            int32_t ltid_flat_42565 = local_tid_45722;
            int64_t loc_ind_42577 = ltid_42566 + offs_42576;
            int64_t gtid_42578 = ii_42542 + loc_ind_42577;
            bool cond_42579 = slt64(gtid_42578, m_29166);
            float y_elem_42580;
            
            if (cond_42579) {
                float Y_elem_42582 = ((__global float *) mem_44468)[i_42560 *
                                                                    m_29166 +
                                                                    gtid_42578];
                
                y_elem_42580 = Y_elem_42582;
            } else {
                y_elem_42580 = 0.0F;
            }
            
            bool cond_42584 = slt64(loc_ind_42577, (int64_t) 30);
            int64_t y_loc_ind_42585;
            
            if (cond_42584) {
                y_loc_ind_42585 = loc_ind_42577;
            } else {
                y_loc_ind_42585 = (int64_t) -1;
            }
            if (sle64((int64_t) 0, y_loc_ind_42585) && slt64(y_loc_ind_42585,
                                                             (int64_t) 30)) {
                ((__local float *) mem_44480)[y_loc_ind_42585] = y_elem_42580;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
        
        float mem_44509[30];
        int64_t ltid_y_42591 = sext_i32_i64(ltid_pre_45729);
        int64_t ltid_x_42589 = sext_i32_i64(ltid_pre_45730);
        int32_t ltid_flat_42590 = local_tid_45722;
        int64_t gtid_42618 = jj1_42543 + ltid_y_42591;
        int64_t gtid_42619 = jj2_42544 + ltid_x_42589;
        bool binop_x_42621 = slt64(gtid_42618, i32_res_29181);
        bool binop_y_42622 = slt64(gtid_42619, i32_res_29181);
        bool cond_42623 = binop_x_42621 && binop_y_42622;
        float mem_45450[30];
        
        if (cond_42623) {
            float x_42626 = ((__global float *) mem_44393)[i_42560 *
                                                           i32_res_29181 +
                                                           gtid_42618];
            float x_42628 = ((__global float *) mem_44397)[i_42560 *
                                                           i32_res_29181 +
                                                           gtid_42619];
            
            for (int32_t i_44271 = 0; i_44271 < 30; i_44271++) {
                int64_t i_42630 = sext_i32_i64(i_44271);
                int64_t gtid_42632 = ii_42542 + i_42630;
                bool cond_42633 = slt64(gtid_42632, m_29166);
                
                if (cond_42633) {
                    float inp_reg_var2zz_42635 = ((__local
                                                   float *) mem_44480)[i_42630];
                    float res_reg_var2zz_42636 = mem_param_44481[i_42630];
                    float x_42640 = x_42626 * x_42628;
                    bool isnan_res_42641;
                    
                    isnan_res_42641 = futrts_isnan32(inp_reg_var2zz_42635);
                    
                    float y_42642;
                    
                    if (isnan_res_42641) {
                        y_42642 = 0.0F;
                    } else {
                        y_42642 = 1.0F;
                    }
                    
                    float defunc_2_f_res_42643 = x_42640 * y_42642;
                    float defunc_1_op_res_42647 = res_reg_var2zz_42636 +
                          defunc_2_f_res_42643;
                    
                    mem_param_44481[i_42630] = defunc_1_op_res_42647;
                }
            }
            for (int64_t i_45739 = 0; i_45739 < (int64_t) 30; i_45739++) {
                mem_45450[i_45739] = mem_param_44481[i_45739];
            }
        } else {
            for (int64_t i_45740 = 0; i_45740 < (int64_t) 30; i_45740++) {
                mem_45450[i_45740] = mem_param_44481[i_45740];
            }
        }
        for (int64_t i_45741 = 0; i_45741 < (int64_t) 30; i_45741++) {
            mem_44509[i_45741] = mem_45450[i_45741];
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_param_tmp_45734[30];
        
        for (int32_t i_2 = 0; i_2 < 30; i_2++)
            mem_param_tmp_45734[i_2] = mem_44509[i_2];
        for (int32_t i_3 = 0; i_3 < 30; i_3++)
            mem_param_44481[i_3] = mem_param_tmp_45734[i_3];
    }
    for (int32_t i_4 = 0; i_4 < 30; i_4++)
        loop_mem_44510[i_4] = mem_param_44481[i_4];
    
    float mem_44524[30 * 1 * 1];
    int64_t ltid_zz_42656 = sext_i32_i64(ltid_pre_45726);
    int64_t ltid_y_42655 = sext_i32_i64(ltid_pre_45727);
    int64_t ltid_x_42653 = sext_i32_i64(ltid_pre_45728);
    int32_t ltid_flat_42654 = local_tid_45722;
    float mem_44518[30 * 1 * 1];
    
    for (int32_t i_44274 = 0; i_44274 < 30; i_44274++) {
        int64_t i_42665 = sext_i32_i64(i_44274);
        
        for (int64_t i_45743 = 0; i_45743 < (int64_t) 1; i_45743++) {
            mem_44518[i_42665 + i_45743] = loop_mem_44510[i_42665 + i_45743];
        }
    }
    for (int64_t i_45744 = 0; i_45744 < (int64_t) 30; i_45744++) {
        for (int64_t i_45745 = 0; i_45745 < (int64_t) 1; i_45745++) {
            for (int64_t i_45746 = 0; i_45746 < (int64_t) 1; i_45746++) {
                mem_44524[i_45744 + i_45745 + i_45746] = mem_44518[i_45744 +
                                                                   i_45745 +
                                                                   i_45746];
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t reg_tile_i_45747 = squot64(sext_i32_i64(local_tid_45722), Ty_42529 *
                                       Tx_42530);
    int64_t reg_tile_i_45748 = squot64(sext_i32_i64(local_tid_45722) -
                                       squot64(sext_i32_i64(local_tid_45722),
                                               Ty_42529 * Tx_42530) *
                                       (Ty_42529 * Tx_42530), Tx_42530);
    int64_t reg_tile_i_45749 = sext_i32_i64(local_tid_45722) -
            squot64(sext_i32_i64(local_tid_45722), Ty_42529 * Tx_42530) *
            (Ty_42529 * Tx_42530) - squot64(sext_i32_i64(local_tid_45722) -
                                            squot64(sext_i32_i64(local_tid_45722),
                                                    Ty_42529 * Tx_42530) *
                                            (Ty_42529 * Tx_42530), Tx_42530) *
            Tx_42530;
    int64_t tile_dim_start_45750 = (int64_t) 30 * (gid_zz_42540 +
                                                   reg_tile_i_45747);
    int64_t tile_dim_start_45751 = Ty_42529 * gid_y_42539 + reg_tile_i_45748;
    int64_t tile_dim_start_45752 = Tx_42530 * gid_x_42538 + reg_tile_i_45749;
    
    for (int64_t nest_i_45753 = 0; nest_i_45753 < (int64_t) 30;
         nest_i_45753++) {
        for (int64_t nest_i_45754 = 0; nest_i_45754 < (int64_t) 1;
             nest_i_45754++) {
            for (int64_t nest_i_45755 = 0; nest_i_45755 < (int64_t) 1;
                 nest_i_45755++) {
                if ((slt64(tile_dim_start_45750 + nest_i_45753, m_29166) &&
                     slt64(tile_dim_start_45751 + nest_i_45754,
                           i32_res_29181)) && slt64(tile_dim_start_45752 +
                                                    nest_i_45755,
                                                    i32_res_29181)) {
                    ((__global float *) mem_44528)[(tile_dim_start_45750 +
                                                    nest_i_45753) *
                                                   (i32_res_29181 *
                                                    i32_res_29181) +
                                                   (tile_dim_start_45751 +
                                                    nest_i_45754) *
                                                   i32_res_29181 +
                                                   (tile_dim_start_45752 +
                                                    nest_i_45755)] =
                        mem_44524[nest_i_45753 + nest_i_45754 + nest_i_45755];
                }
            }
        }
    }
    
  error_4:
    return;
}
__kernel void mainzisegmap_intragroup_42694(__global int *global_failure,
                                            __local volatile
                                            int64_t *mem_44668_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_44666_backing_aligned_1,
                                            int64_t N_29165, int64_t m_29166,
                                            int64_t i32_res_29175,
                                            int64_t i32_res_29181,
                                            int64_t gridDim_x_42688,
                                            int64_t full_tiles_42719,
                                            int64_t kk_42926, __global
                                            unsigned char *images_mem_44381,
                                            __global unsigned char *mem_44393,
                                            __global unsigned char *mem_44840)
{
    #define Ty_42675 (mainziTy_42672)
    #define Ry_42676 (mainziRy_42674)
    #define Tx_42677 (mainziTx_42671)
    #define Rx_42678 (mainziRx_42673)
    #define Tk_42679 (mainziTk_42670)
    #define tk_div_tx_42680 (sdiv_up64(mainziTk_42670, mainziTx_42671))
    #define tk_div_ty_42681 (sdiv_up64(mainziTk_42670, mainziTy_42672))
    #define TxRx_42682 (mainziTx_42671 * mainziRx_42673)
    #define TyRy_42683 (mainziTy_42672 * mainziRy_42674)
    #define a_loc_szz_42685 (mainziTk_42670 * (mainziTy_42672 * mainziRy_42674))
    #define b_loc_szz_42687 (mainziRx_42673 * (mainziTx_42671 * mainziTk_42670))
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44668_backing_1 = (__local volatile
                                                           char *) mem_44668_backing_aligned_0;
    __local volatile char *restrict mem_44666_backing_0 = (__local volatile
                                                           char *) mem_44666_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45896;
    int32_t local_tid_45897;
    int64_t group_sizze_45900;
    int32_t wave_sizze_45899;
    int32_t group_tid_45898;
    
    global_tid_45896 = get_global_id(0);
    local_tid_45897 = get_local_id(0);
    group_sizze_45900 = get_local_size(0);
    wave_sizze_45899 = LOCKSTEP_WIDTH;
    group_tid_45898 = get_group_id(0);
    
    int32_t gid_flat_42694;
    
    gid_flat_42694 = group_tid_45898;
    
    int32_t ltid_pre_45901;
    
    ltid_pre_45901 = squot32(local_tid_45897, sext_i64_i32(Tx_42677));
    
    int32_t ltid_pre_45902;
    
    ltid_pre_45902 = local_tid_45897 - squot32(local_tid_45897,
                                               sext_i64_i32(Tx_42677)) *
        sext_i64_i32(Tx_42677);
    
    int64_t gid_y_42693;
    
    gid_y_42693 = squot64(sext_i32_i64(group_tid_45898), gridDim_x_42688);
    
    int64_t gid_x_42692;
    
    gid_x_42692 = sext_i32_i64(group_tid_45898) -
        squot64(sext_i32_i64(group_tid_45898), gridDim_x_42688) *
        gridDim_x_42688;
    
    int64_t iii_42695;
    
    iii_42695 = TyRy_42683 * gid_y_42693;
    
    int64_t jjj_42696 = TxRx_42682 * gid_x_42692;
    float mem_44664[Ry_42676 * Rx_42678];
    int64_t ltid_y_42699 = sext_i32_i64(ltid_pre_45901);
    int64_t ltid_x_42697 = sext_i32_i64(ltid_pre_45902);
    int32_t ltid_flat_42698 = local_tid_45897;
    float mem_44655[Ry_42676 * Rx_42678];
    
    for (int64_t i_42710 = 0; i_42710 < Ry_42676; i_42710++) {
        for (int64_t i_42713 = 0; i_42713 < Rx_42678; i_42713++) {
            mem_44655[i_42710 * Rx_42678 + i_42713] = 0.0F;
        }
    }
    for (int64_t i_45905 = 0; i_45905 < Ry_42676; i_45905++) {
        for (int64_t i_45906 = 0; i_45906 < Rx_42678; i_45906++) {
            mem_44664[i_45905 * Rx_42678 + i_45906] = mem_44655[i_45905 *
                                                                Rx_42678 +
                                                                i_45906];
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_44666;
    
    mem_44666 = (__local char *) mem_44666_backing_0;
    
    __local char *mem_44668;
    
    mem_44668 = (__local char *) mem_44668_backing_1;
    
    float mem_44739[Ry_42676];
    float mem_44743[Rx_42678];
    float loop_mem_44755[Ry_42676 * Rx_42678];
    float mem_param_44669[Ry_42676 * Rx_42678];
    
    for (int32_t i_2 = 0; i_2 < Ry_42676 * Rx_42678; i_2++)
        mem_param_44669[i_2] = mem_44664[i_2];
    for (int64_t i_42720 = 0; i_42720 < full_tiles_42719; i_42720++) {
        int64_t kk_42724 = Tk_42679 * i_42720;
        
        for (int64_t i_42725 = 0; i_42725 < Ry_42676; i_42725++) {
            int64_t binop_y_42748 = Ty_42675 * i_42725;
            
            for (int64_t i_42727 = 0; i_42727 < tk_div_tx_42680; i_42727++) {
                int64_t binop_y_42746 = Tx_42677 * i_42727;
                int64_t ltid_x_42729 = sext_i32_i64(ltid_pre_45901);
                int64_t ltid_y_42730 = sext_i32_i64(ltid_pre_45902);
                int32_t ltid_flat_42731 = local_tid_45897;
                int64_t k_42747 = ltid_y_42730 + binop_y_42746;
                int64_t i_42749 = ltid_x_42729 + binop_y_42748;
                int64_t gtid_42750 = iii_42695 + i_42749;
                int64_t A_col_idx_42751 = kk_42724 + k_42747;
                bool cond_42752 = slt64(gtid_42750, m_29166);
                float A_elem_42753;
                
                if (cond_42752) {
                    float A_elem_42755 = ((__global
                                           float *) images_mem_44381)[gtid_42750 *
                                                                      N_29165 +
                                                                      A_col_idx_42751];
                    
                    A_elem_42753 = A_elem_42755;
                } else {
                    A_elem_42753 = 0.0F;
                }
                
                bool cond_42757 = slt64(k_42747, Tk_42679);
                int64_t a_loc_ind_42758;
                
                if (cond_42757) {
                    int64_t binop_y_42759 = Tk_42679 * i_42749;
                    int64_t loc_fi_42760 = k_42747 + binop_y_42759;
                    
                    a_loc_ind_42758 = loc_fi_42760;
                } else {
                    a_loc_ind_42758 = (int64_t) -1;
                }
                if (sle64((int64_t) 0, a_loc_ind_42758) &&
                    slt64(a_loc_ind_42758, a_loc_szz_42685)) {
                    ((__local float *) mem_44666)[a_loc_ind_42758] =
                        A_elem_42753;
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        for (int64_t i_42765 = 0; i_42765 < tk_div_ty_42681; i_42765++) {
            int64_t binop_y_42786 = Ty_42675 * i_42765;
            
            for (int64_t i_42767 = 0; i_42767 < Rx_42678; i_42767++) {
                int64_t binop_y_42788 = Tx_42677 * i_42767;
                int64_t ltid_x_42769 = sext_i32_i64(ltid_pre_45901);
                int64_t ltid_y_42770 = sext_i32_i64(ltid_pre_45902);
                int32_t ltid_flat_42771 = local_tid_45897;
                int64_t k_42787 = ltid_x_42769 + binop_y_42786;
                int64_t j_42789 = ltid_y_42770 + binop_y_42788;
                int64_t gtid_42790 = jjj_42696 + j_42789;
                int64_t B_row_idx_42791 = kk_42724 + k_42787;
                bool cond_42792 = slt64(gtid_42790, i32_res_29181);
                float B_elem_42793;
                
                if (cond_42792) {
                    float B_elem_42795 = ((__global
                                           float *) mem_44393)[B_row_idx_42791 *
                                                               i32_res_29181 +
                                                               gtid_42790];
                    
                    B_elem_42793 = B_elem_42795;
                } else {
                    B_elem_42793 = 0.0F;
                }
                
                bool cond_42797 = slt64(k_42787, Tk_42679);
                int64_t b_loc_ind_42798;
                
                if (cond_42797) {
                    int64_t binop_y_42799 = TxRx_42682 * k_42787;
                    int64_t loc_fi_42800 = j_42789 + binop_y_42799;
                    
                    b_loc_ind_42798 = loc_fi_42800;
                } else {
                    b_loc_ind_42798 = (int64_t) -1;
                }
                if (sle64((int64_t) 0, b_loc_ind_42798) &&
                    slt64(b_loc_ind_42798, b_loc_szz_42687)) {
                    ((__local float *) mem_44668)[b_loc_ind_42798] =
                        B_elem_42793;
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        
        float loop_mem_44754[Ry_42676 * Rx_42678];
        float mem_param_44726[Ry_42676 * Rx_42678];
        
        for (int32_t i_3 = 0; i_3 < Ry_42676 * Rx_42678; i_3++)
            mem_param_44726[i_3] = mem_param_44669[i_3];
        for (int64_t i_42805 = 0; i_42805 < Tk_42679; i_42805++) {
            int64_t binop_y_42844 = TxRx_42682 * i_42805;
            int64_t ltid_y_42809 = sext_i32_i64(ltid_pre_45901);
            int64_t ltid_x_42807 = sext_i32_i64(ltid_pre_45902);
            int32_t ltid_flat_42808 = local_tid_45897;
            float mem_44729[Ry_42676];
            float mem_44731[Rx_42678];
            int64_t binop_x_42835 = Ry_42676 * ltid_y_42809;
            
            for (int64_t i_42833 = 0; i_42833 < Ry_42676; i_42833++) {
                int64_t binop_x_42836 = i_42833 + binop_x_42835;
                int64_t binop_y_42837 = Tk_42679 * binop_x_42836;
                int64_t a_loc_ind_42838 = i_42805 + binop_y_42837;
                
                for (int64_t i_45918 = 0; i_45918 < (int64_t) 1; i_45918++) {
                    mem_44729[i_42833 + i_45918] = ((__local
                                                     float *) mem_44666)[a_loc_ind_42838 +
                                                                         i_45918];
                }
            }
            
            int64_t binop_y_42846 = Rx_42678 * ltid_x_42807;
            
            for (int64_t i_42842 = 0; i_42842 < Rx_42678; i_42842++) {
                int64_t binop_x_42845 = i_42842 + binop_y_42844;
                int64_t b_loc_ind_42847 = binop_x_42845 + binop_y_42846;
                
                for (int64_t i_45920 = 0; i_45920 < (int64_t) 1; i_45920++) {
                    mem_44731[i_42842 + i_45920] = ((__local
                                                     float *) mem_44668)[b_loc_ind_42847 +
                                                                         i_45920];
                }
            }
            for (int64_t i_45921 = 0; i_45921 < Ry_42676; i_45921++) {
                mem_44739[i_45921] = mem_44729[i_45921];
            }
            for (int64_t i_45922 = 0; i_45922 < Rx_42678; i_45922++) {
                mem_44743[i_45922] = mem_44731[i_45922];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            float mem_44753[Ry_42676 * Rx_42678];
            int64_t ltid_y_42854 = sext_i32_i64(ltid_pre_45901);
            int64_t ltid_x_42852 = sext_i32_i64(ltid_pre_45902);
            int32_t ltid_flat_42853 = local_tid_45897;
            int64_t binop_y_42897 = Ry_42676 * ltid_y_42854;
            int64_t binop_y_42901 = Rx_42678 * ltid_x_42852;
            
            for (int64_t i_42891 = 0; i_42891 < Ry_42676; i_42891++) {
                int64_t binop_x_42896 = iii_42695 + i_42891;
                int64_t cmpop_x_42898 = binop_x_42896 + binop_y_42897;
                bool binop_x_42899 = slt64(cmpop_x_42898, m_29166);
                
                for (int64_t i_42894 = 0; i_42894 < Rx_42678; i_42894++) {
                    int64_t binop_x_42900 = jjj_42696 + i_42894;
                    int64_t cmpop_x_42902 = binop_x_42900 + binop_y_42901;
                    bool binop_y_42903 = slt64(cmpop_x_42902, i32_res_29181);
                    bool cond_42904 = binop_x_42899 && binop_y_42903;
                    
                    if (cond_42904) {
                        float a_42906 = mem_44739[i_42891];
                        float c_42908 = mem_param_44726[i_42891 * Rx_42678 +
                                                        i_42894];
                        bool isnan_res_42911;
                        
                        isnan_res_42911 = futrts_isnan32(a_42906);
                        
                        float defunc_1_f_res_42912;
                        
                        if (isnan_res_42911) {
                            defunc_1_f_res_42912 = 0.0F;
                        } else {
                            float b_42907 = mem_44743[i_42894];
                            float defunc_1_f_res_f_res_42913 = a_42906 *
                                  b_42907;
                            
                            defunc_1_f_res_42912 = defunc_1_f_res_f_res_42913;
                        }
                        
                        float defunc_1_op_res_42917 = c_42908 +
                              defunc_1_f_res_42912;
                        
                        mem_param_44726[i_42891 * Rx_42678 + i_42894] =
                            defunc_1_op_res_42917;
                    }
                }
            }
            for (int64_t i_45925 = 0; i_45925 < Ry_42676; i_45925++) {
                for (int64_t i_45926 = 0; i_45926 < Rx_42678; i_45926++) {
                    mem_44753[i_45925 * Rx_42678 + i_45926] =
                        mem_param_44726[i_45925 * Rx_42678 + i_45926];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            float mem_param_tmp_45915[Ry_42676 * Rx_42678];
            
            for (int32_t i_4 = 0; i_4 < Ry_42676 * Rx_42678; i_4++)
                mem_param_tmp_45915[i_4] = mem_44753[i_4];
            for (int32_t i_5 = 0; i_5 < Ry_42676 * Rx_42678; i_5++)
                mem_param_44726[i_5] = mem_param_tmp_45915[i_5];
        }
        for (int32_t i_6 = 0; i_6 < Ry_42676 * Rx_42678; i_6++)
            loop_mem_44754[i_6] = mem_param_44726[i_6];
        
        float mem_param_tmp_45907[Ry_42676 * Rx_42678];
        
        for (int32_t i_7 = 0; i_7 < Ry_42676 * Rx_42678; i_7++)
            mem_param_tmp_45907[i_7] = loop_mem_44754[i_7];
        for (int32_t i_8 = 0; i_8 < Ry_42676 * Rx_42678; i_8++)
            mem_param_44669[i_8] = mem_param_tmp_45907[i_8];
    }
    for (int32_t i_9 = 0; i_9 < Ry_42676 * Rx_42678; i_9++)
        loop_mem_44755[i_9] = mem_param_44669[i_9];
    for (int64_t i_42927 = 0; i_42927 < Ry_42676; i_42927++) {
        int64_t binop_y_42952 = Ty_42675 * i_42927;
        
        for (int64_t i_42929 = 0; i_42929 < tk_div_tx_42680; i_42929++) {
            int64_t binop_y_42950 = Tx_42677 * i_42929;
            int64_t ltid_x_42931 = sext_i32_i64(ltid_pre_45901);
            int64_t ltid_y_42932 = sext_i32_i64(ltid_pre_45902);
            int32_t ltid_flat_42933 = local_tid_45897;
            int64_t k_42951 = ltid_y_42932 + binop_y_42950;
            int64_t i_42953 = ltid_x_42931 + binop_y_42952;
            int64_t gtid_42954 = iii_42695 + i_42953;
            int64_t A_col_idx_42955 = kk_42926 + k_42951;
            bool binop_x_42956 = slt64(gtid_42954, m_29166);
            bool binop_y_42957 = slt64(A_col_idx_42955, i32_res_29175);
            bool cond_42958 = binop_x_42956 && binop_y_42957;
            float A_elem_42959;
            
            if (cond_42958) {
                float A_elem_42961 = ((__global
                                       float *) images_mem_44381)[gtid_42954 *
                                                                  N_29165 +
                                                                  A_col_idx_42955];
                
                A_elem_42959 = A_elem_42961;
            } else {
                A_elem_42959 = 0.0F;
            }
            
            bool cond_42963 = slt64(k_42951, Tk_42679);
            int64_t a_loc_ind_42964;
            
            if (cond_42963) {
                int64_t binop_y_42965 = Tk_42679 * i_42953;
                int64_t loc_fi_42966 = k_42951 + binop_y_42965;
                
                a_loc_ind_42964 = loc_fi_42966;
            } else {
                a_loc_ind_42964 = (int64_t) -1;
            }
            if (sle64((int64_t) 0, a_loc_ind_42964) && slt64(a_loc_ind_42964,
                                                             a_loc_szz_42685)) {
                ((__local float *) mem_44666)[a_loc_ind_42964] = A_elem_42959;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    for (int64_t i_42971 = 0; i_42971 < tk_div_ty_42681; i_42971++) {
        int64_t binop_y_42994 = Ty_42675 * i_42971;
        
        for (int64_t i_42973 = 0; i_42973 < Rx_42678; i_42973++) {
            int64_t binop_y_42996 = Tx_42677 * i_42973;
            int64_t ltid_x_42975 = sext_i32_i64(ltid_pre_45901);
            int64_t ltid_y_42976 = sext_i32_i64(ltid_pre_45902);
            int32_t ltid_flat_42977 = local_tid_45897;
            int64_t k_42995 = ltid_x_42975 + binop_y_42994;
            int64_t j_42997 = ltid_y_42976 + binop_y_42996;
            int64_t gtid_42998 = jjj_42696 + j_42997;
            int64_t B_row_idx_42999 = kk_42926 + k_42995;
            bool binop_x_43000 = slt64(gtid_42998, i32_res_29181);
            bool binop_y_43001 = slt64(B_row_idx_42999, i32_res_29175);
            bool cond_43002 = binop_x_43000 && binop_y_43001;
            float B_elem_43003;
            
            if (cond_43002) {
                float B_elem_43005 = ((__global
                                       float *) mem_44393)[B_row_idx_42999 *
                                                           i32_res_29181 +
                                                           gtid_42998];
                
                B_elem_43003 = B_elem_43005;
            } else {
                B_elem_43003 = 0.0F;
            }
            
            bool cond_43007 = slt64(k_42995, Tk_42679);
            int64_t b_loc_ind_43008;
            
            if (cond_43007) {
                int64_t binop_y_43009 = TxRx_42682 * k_42995;
                int64_t loc_fi_43010 = j_42997 + binop_y_43009;
                
                b_loc_ind_43008 = loc_fi_43010;
            } else {
                b_loc_ind_43008 = (int64_t) -1;
            }
            if (sle64((int64_t) 0, b_loc_ind_43008) && slt64(b_loc_ind_43008,
                                                             b_loc_szz_42687)) {
                ((__local float *) mem_44668)[b_loc_ind_43008] = B_elem_43003;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    
    float mem_44821[Ry_42676];
    float mem_44825[Rx_42678];
    float mem_44835[Ry_42676 * Rx_42678];
    float loop_mem_44837[Ry_42676 * Rx_42678];
    float mem_param_44808[Ry_42676 * Rx_42678];
    
    for (int32_t i_10 = 0; i_10 < Ry_42676 * Rx_42678; i_10++)
        mem_param_44808[i_10] = loop_mem_44755[i_10];
    for (int64_t i_43015 = 0; i_43015 < Tk_42679; i_43015++) {
        int64_t cmpop_x_43017 = kk_42926 + i_43015;
        bool cond_43018 = slt64(cmpop_x_43017, i32_res_29175);
        float mem_45468[Ry_42676 * Rx_42678];
        
        if (cond_43018) {
            int64_t binop_y_43056 = TxRx_42682 * i_43015;
            int64_t bytes_44810 = (int64_t) 4 * Ry_42676;
            int64_t bytes_44812 = (int64_t) 4 * Rx_42678;
            int64_t ltid_y_43021 = sext_i32_i64(ltid_pre_45901);
            int64_t ltid_x_43019 = sext_i32_i64(ltid_pre_45902);
            int32_t ltid_flat_43020 = local_tid_45897;
            float mem_44811[Ry_42676];
            float mem_44813[Rx_42678];
            int64_t binop_x_43047 = Ry_42676 * ltid_y_43021;
            
            for (int64_t i_43045 = 0; i_43045 < Ry_42676; i_43045++) {
                int64_t binop_x_43048 = i_43045 + binop_x_43047;
                int64_t binop_y_43049 = Tk_42679 * binop_x_43048;
                int64_t a_loc_ind_43050 = i_43015 + binop_y_43049;
                
                for (int64_t i_45934 = 0; i_45934 < (int64_t) 1; i_45934++) {
                    mem_44811[i_43045 + i_45934] = ((__local
                                                     float *) mem_44666)[a_loc_ind_43050 +
                                                                         i_45934];
                }
            }
            
            int64_t binop_y_43058 = Rx_42678 * ltid_x_43019;
            
            for (int64_t i_43054 = 0; i_43054 < Rx_42678; i_43054++) {
                int64_t binop_x_43057 = i_43054 + binop_y_43056;
                int64_t b_loc_ind_43059 = binop_x_43057 + binop_y_43058;
                
                for (int64_t i_45936 = 0; i_45936 < (int64_t) 1; i_45936++) {
                    mem_44813[i_43054 + i_45936] = ((__local
                                                     float *) mem_44668)[b_loc_ind_43059 +
                                                                         i_45936];
                }
            }
            for (int64_t i_45937 = 0; i_45937 < Ry_42676; i_45937++) {
                mem_44821[i_45937] = mem_44811[i_45937];
            }
            for (int64_t i_45938 = 0; i_45938 < Rx_42678; i_45938++) {
                mem_44825[i_45938] = mem_44813[i_45938];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            int64_t ltid_y_43066 = sext_i32_i64(ltid_pre_45901);
            int64_t ltid_x_43064 = sext_i32_i64(ltid_pre_45902);
            int32_t ltid_flat_43065 = local_tid_45897;
            int64_t binop_y_43109 = Ry_42676 * ltid_y_43066;
            int64_t binop_y_43113 = Rx_42678 * ltid_x_43064;
            
            for (int64_t i_43103 = 0; i_43103 < Ry_42676; i_43103++) {
                int64_t binop_x_43108 = iii_42695 + i_43103;
                int64_t cmpop_x_43110 = binop_x_43108 + binop_y_43109;
                bool binop_x_43111 = slt64(cmpop_x_43110, m_29166);
                
                for (int64_t i_43106 = 0; i_43106 < Rx_42678; i_43106++) {
                    int64_t binop_x_43112 = jjj_42696 + i_43106;
                    int64_t cmpop_x_43114 = binop_x_43112 + binop_y_43113;
                    bool binop_y_43115 = slt64(cmpop_x_43114, i32_res_29181);
                    bool cond_43116 = binop_x_43111 && binop_y_43115;
                    
                    if (cond_43116) {
                        float a_43118 = mem_44821[i_43103];
                        float c_43120 = mem_param_44808[i_43103 * Rx_42678 +
                                                        i_43106];
                        bool isnan_res_43123;
                        
                        isnan_res_43123 = futrts_isnan32(a_43118);
                        
                        float defunc_1_f_res_43124;
                        
                        if (isnan_res_43123) {
                            defunc_1_f_res_43124 = 0.0F;
                        } else {
                            float b_43119 = mem_44825[i_43106];
                            float defunc_1_f_res_f_res_43125 = a_43118 *
                                  b_43119;
                            
                            defunc_1_f_res_43124 = defunc_1_f_res_f_res_43125;
                        }
                        
                        float defunc_1_op_res_43129 = c_43120 +
                              defunc_1_f_res_43124;
                        
                        mem_param_44808[i_43103 * Rx_42678 + i_43106] =
                            defunc_1_op_res_43129;
                    }
                }
            }
            for (int64_t i_45941 = 0; i_45941 < Ry_42676; i_45941++) {
                for (int64_t i_45942 = 0; i_45942 < Rx_42678; i_45942++) {
                    mem_44835[i_45941 * Rx_42678 + i_45942] =
                        mem_param_44808[i_45941 * Rx_42678 + i_45942];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            for (int64_t i_45943 = 0; i_45943 < Ry_42676; i_45943++) {
                for (int64_t i_45944 = 0; i_45944 < Rx_42678; i_45944++) {
                    mem_45468[i_45943 * Rx_42678 + i_45944] =
                        mem_44835[i_45943 * Rx_42678 + i_45944];
                }
            }
        } else {
            for (int64_t i_45945 = 0; i_45945 < Ry_42676; i_45945++) {
                for (int64_t i_45946 = 0; i_45946 < Rx_42678; i_45946++) {
                    mem_45468[i_45945 * Rx_42678 + i_45946] =
                        mem_param_44808[i_45945 * Rx_42678 + i_45946];
                }
            }
        }
        
        float mem_param_tmp_45931[Ry_42676 * Rx_42678];
        
        for (int32_t i_11 = 0; i_11 < Ry_42676 * Rx_42678; i_11++)
            mem_param_tmp_45931[i_11] = mem_45468[i_11];
        for (int32_t i_12 = 0; i_12 < Ry_42676 * Rx_42678; i_12++)
            mem_param_44808[i_12] = mem_param_tmp_45931[i_12];
    }
    for (int32_t i_13 = 0; i_13 < Ry_42676 * Rx_42678; i_13++)
        loop_mem_44837[i_13] = mem_param_44808[i_13];
    
    int64_t reg_tile_i_45947 = squot64(sext_i32_i64(local_tid_45897), Tx_42677);
    int64_t reg_tile_i_45948 = sext_i32_i64(local_tid_45897) -
            squot64(sext_i32_i64(local_tid_45897), Tx_42677) * Tx_42677;
    int64_t tile_dim_start_45949 = Ry_42676 * (Ty_42675 * gid_y_42693 +
                                               reg_tile_i_45947);
    int64_t tile_dim_start_45950 = Rx_42678 * (Tx_42677 * gid_x_42692 +
                                               reg_tile_i_45948);
    
    for (int64_t nest_i_45951 = 0; nest_i_45951 < Ry_42676; nest_i_45951++) {
        for (int64_t nest_i_45952 = 0; nest_i_45952 < Rx_42678;
             nest_i_45952++) {
            if (slt64(tile_dim_start_45949 + nest_i_45951, m_29166) &&
                slt64(tile_dim_start_45950 + nest_i_45952, i32_res_29181)) {
                ((__global float *) mem_44840)[(tile_dim_start_45949 +
                                                nest_i_45951) * i32_res_29181 +
                                               (tile_dim_start_45950 +
                                                nest_i_45952)] =
                    loop_mem_44837[nest_i_45951 * Rx_42678 + nest_i_45952];
            }
        }
    }
    
  error_9:
    return;
    #undef Ty_42675
    #undef Ry_42676
    #undef Tx_42677
    #undef Rx_42678
    #undef Tk_42679
    #undef tk_div_tx_42680
    #undef tk_div_ty_42681
    #undef TxRx_42682
    #undef TyRy_42683
    #undef a_loc_szz_42685
    #undef b_loc_szz_42687
}
__kernel void mainzisegmap_intragroup_43143(__global int *global_failure,
                                            __local volatile
                                            int64_t *mem_44898_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_44889_backing_aligned_1,
                                            int64_t m_29166,
                                            int64_t i32_res_29181,
                                            int64_t num_groups_y_43141,
                                            int64_t num_whole_tiles_43159,
                                            int64_t residual_input_43286,
                                            unsigned char cond_43287, __global
                                            unsigned char *defunc_3_map_res_mem_44850,
                                            __global unsigned char *mem_44879,
                                            __global unsigned char *mem_44906)
{
    #define tile_sizze_43138 (mainzitile_sizze_43137)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44898_backing_5 = (__local volatile
                                                           char *) mem_44898_backing_aligned_0;
    __local volatile char *restrict mem_44889_backing_0 = (__local volatile
                                                           char *) mem_44889_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46028;
    int32_t local_tid_46029;
    int64_t group_sizze_46032;
    int32_t wave_sizze_46031;
    int32_t group_tid_46030;
    
    global_tid_46028 = get_global_id(0);
    local_tid_46029 = get_local_id(0);
    group_sizze_46032 = get_local_size(0);
    wave_sizze_46031 = LOCKSTEP_WIDTH;
    group_tid_46030 = get_group_id(0);
    
    int32_t gid_flat_43143;
    
    gid_flat_43143 = group_tid_46030;
    
    int32_t ltid_pre_46033;
    
    ltid_pre_46033 = squot32(local_tid_46029, sext_i64_i32(tile_sizze_43138));
    
    int32_t ltid_pre_46034;
    
    ltid_pre_46034 = local_tid_46029 - squot32(local_tid_46029,
                                               sext_i64_i32(tile_sizze_43138)) *
        sext_i64_i32(tile_sizze_43138);
    
    int64_t gid_x_43135;
    
    gid_x_43135 = squot64(sext_i32_i64(group_tid_46030), num_groups_y_43141);
    
    int64_t gid_y_43136;
    
    gid_y_43136 = sext_i32_i64(group_tid_46030) -
        squot64(sext_i32_i64(group_tid_46030), num_groups_y_43141) *
        num_groups_y_43141;
    
    float mem_44884[1];
    int64_t ltid_y_43162 = sext_i32_i64(ltid_pre_46033);
    int64_t ltid_x_43160 = sext_i32_i64(ltid_pre_46034);
    int32_t ltid_flat_43161 = local_tid_46029;
    
    if (slt64(ltid_y_43162, tile_sizze_43138) && slt64(ltid_x_43160,
                                                       tile_sizze_43138)) {
        mem_44884[(int64_t) 0] = 0.0F;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t binop_x_43245 = gid_x_43135 * tile_sizze_43138;
    int64_t binop_x_43260 = gid_y_43136 * tile_sizze_43138;
    __local char *mem_44889;
    
    mem_44889 = (__local char *) mem_44889_backing_0;
    
    float accs_mem_44894[1];
    float mem_param_44885[1];
    
    for (int32_t i_1 = 0; i_1 < 1; i_1++)
        mem_param_44885[i_1] = mem_44884[i_1];
    for (int64_t tile_id_43171 = 0; tile_id_43171 < num_whole_tiles_43159;
         tile_id_43171++) {
        int64_t binop_x_43243 = tile_sizze_43138 * tile_id_43171;
        int64_t ltid_y_43174 = sext_i32_i64(ltid_pre_46033);
        int64_t ltid_x_43172 = sext_i32_i64(ltid_pre_46034);
        int32_t ltid_flat_43173 = local_tid_46029;
        int64_t j_43244 = ltid_x_43172 + binop_x_43243;
        int64_t gtid_43246 = ltid_y_43174 + binop_x_43245;
        bool binop_x_43251 = slt64(j_43244, i32_res_29181);
        bool binop_y_43252 = slt64(gtid_43246, m_29166);
        bool cond_43253 = binop_x_43251 && binop_y_43252;
        float pre_43254;
        
        if (cond_43253) {
            float x_43255 = ((__global
                              float *) defunc_3_map_res_mem_44850)[gtid_43246 *
                                                                   i32_res_29181 +
                                                                   j_43244];
            
            pre_43254 = x_43255;
        } else {
            pre_43254 = 0.0F;
        }
        ((__local float *) mem_44889)[ltid_y_43174 * tile_sizze_43138 +
                                      ltid_x_43172] = pre_43254;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_44893[1];
        int64_t ltid_y_43206 = sext_i32_i64(ltid_pre_46033);
        int64_t ltid_x_43204 = sext_i32_i64(ltid_pre_46034);
        int32_t ltid_flat_43205 = local_tid_46029;
        int64_t gtid_43259 = ltid_y_43206 + binop_x_43245;
        int64_t gtid_43261 = ltid_x_43204 + binop_x_43260;
        float acc_43264 = mem_param_44885[(int64_t) 0];
        bool binop_x_43268 = slt64(gtid_43259, m_29166);
        bool binop_y_43269 = slt64(gtid_43261, i32_res_29181);
        bool cond_43270 = binop_x_43268 && binop_y_43269;
        float acc_43271;
        
        if (cond_43270) {
            float x_43272;
            float redout_44315 = acc_43264;
            
            for (int64_t i_44316 = 0; i_44316 < tile_sizze_43138; i_44316++) {
                float x_43276 = ((__local float *) mem_44889)[ltid_y_43206 *
                                                              tile_sizze_43138 +
                                                              i_44316];
                int64_t slice_44367 = binop_x_43243 + i_44316;
                float x_43277 = ((__global float *) mem_44879)[slice_44367 *
                                                               (i32_res_29181 *
                                                                m_29166) +
                                                               gtid_43259 *
                                                               i32_res_29181 +
                                                               gtid_43261];
                float defunc_1_f_res_43278 = x_43276 * x_43277;
                float defunc_1_op_res_43275 = defunc_1_f_res_43278 +
                      redout_44315;
                float redout_tmp_46037 = defunc_1_op_res_43275;
                
                redout_44315 = redout_tmp_46037;
            }
            x_43272 = redout_44315;
            acc_43271 = x_43272;
        } else {
            acc_43271 = acc_43264;
        }
        mem_44893[(int64_t) 0] = acc_43271;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_param_tmp_46035[1];
        
        for (int32_t i_2 = 0; i_2 < 1; i_2++)
            mem_param_tmp_46035[i_2] = mem_44893[i_2];
        for (int32_t i_3 = 0; i_3 < 1; i_3++)
            mem_param_44885[i_3] = mem_param_tmp_46035[i_3];
    }
    for (int32_t i_4 = 0; i_4 < 1; i_4++)
        accs_mem_44894[i_4] = mem_param_44885[i_4];
    
    __local char *mem_44898;
    
    mem_44898 = (__local char *) mem_44898_backing_5;
    
    float mem_44902[1];
    float mem_45482[1];
    
    if (cond_43287) {
        mem_45482[(int64_t) 0] = accs_mem_44894[(int64_t) 0];
    } else {
        int64_t binop_x_43360 = tile_sizze_43138 * num_whole_tiles_43159;
        int64_t ltid_y_43290 = sext_i32_i64(ltid_pre_46033);
        int64_t ltid_x_43288 = sext_i32_i64(ltid_pre_46034);
        int32_t ltid_flat_43289 = local_tid_46029;
        int64_t j_43361 = ltid_x_43288 + binop_x_43360;
        int64_t gtid_43363 = binop_x_43245 + ltid_y_43290;
        bool binop_x_43368 = slt64(j_43361, i32_res_29181);
        bool binop_y_43369 = slt64(gtid_43363, m_29166);
        bool cond_43370 = binop_x_43368 && binop_y_43369;
        float pre_43371;
        
        if (cond_43370) {
            float x_43372 = ((__global
                              float *) defunc_3_map_res_mem_44850)[gtid_43363 *
                                                                   i32_res_29181 +
                                                                   j_43361];
            
            pre_43371 = x_43372;
        } else {
            pre_43371 = 0.0F;
        }
        ((__local float *) mem_44898)[ltid_y_43290 * tile_sizze_43138 +
                                      ltid_x_43288] = pre_43371;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int64_t ltid_y_43323 = sext_i32_i64(ltid_pre_46033);
        int64_t ltid_x_43321 = sext_i32_i64(ltid_pre_46034);
        int32_t ltid_flat_43322 = local_tid_46029;
        int64_t gtid_43377 = binop_x_43245 + ltid_y_43323;
        int64_t gtid_43379 = binop_x_43260 + ltid_x_43321;
        float acc_43382 = accs_mem_44894[(int64_t) 0];
        bool binop_x_43386 = slt64(gtid_43377, m_29166);
        bool binop_y_43387 = slt64(gtid_43379, i32_res_29181);
        bool cond_43388 = binop_x_43386 && binop_y_43387;
        float acc_43389;
        
        if (cond_43388) {
            float x_43390;
            float redout_44317 = acc_43382;
            
            for (int64_t i_44318 = 0; i_44318 < residual_input_43286;
                 i_44318++) {
                float x_43394 = ((__local float *) mem_44898)[ltid_y_43323 *
                                                              tile_sizze_43138 +
                                                              i_44318];
                int64_t slice_44368 = binop_x_43360 + i_44318;
                float x_43395 = ((__global float *) mem_44879)[slice_44368 *
                                                               (i32_res_29181 *
                                                                m_29166) +
                                                               gtid_43377 *
                                                               i32_res_29181 +
                                                               gtid_43379];
                float defunc_1_f_res_43396 = x_43394 * x_43395;
                float defunc_1_op_res_43393 = defunc_1_f_res_43396 +
                      redout_44317;
                float redout_tmp_46038 = defunc_1_op_res_43393;
                
                redout_44317 = redout_tmp_46038;
            }
            x_43390 = redout_44317;
            acc_43389 = x_43390;
        } else {
            acc_43389 = acc_43382;
        }
        mem_44902[(int64_t) 0] = acc_43389;
        barrier(CLK_LOCAL_MEM_FENCE);
        mem_45482[(int64_t) 0] = mem_44902[(int64_t) 0];
    }
    
    int64_t thread_out_index_46039 = gid_x_43135 * tile_sizze_43138 +
            sext_i32_i64(ltid_pre_46033);
    int64_t thread_out_index_46040 = gid_y_43136 * tile_sizze_43138 +
            sext_i32_i64(ltid_pre_46034);
    
    if (slt64(thread_out_index_46039, m_29166) && slt64(thread_out_index_46040,
                                                        i32_res_29181)) {
        ((__global float *) mem_44906)[thread_out_index_46039 * i32_res_29181 +
                                       thread_out_index_46040] =
            mem_45482[(int64_t) 0];
    }
    
  error_5:
    return;
    #undef tile_sizze_43138
}
__kernel void mainzisegmap_intragroup_43435(__global int *global_failure,
                                            __local volatile
                                            int64_t *mem_44958_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_44956_backing_aligned_1,
                                            int64_t N_29165, int64_t m_29166,
                                            int64_t i32_res_29181,
                                            int64_t gridDim_x_43429,
                                            int64_t full_tiles_43460,
                                            int64_t kk_43663, __global
                                            unsigned char *defunc_4_map_res_mem_44916,
                                            __global unsigned char *mem_44940,
                                            __global unsigned char *mem_45130)
{
    #define Ty_43416 (mainziTy_43413)
    #define Ry_43417 (mainziRy_43415)
    #define Tx_43418 (mainziTx_43412)
    #define Rx_43419 (mainziRx_43414)
    #define Tk_43420 (mainziTk_43411)
    #define tk_div_tx_43421 (sdiv_up64(mainziTk_43411, mainziTx_43412))
    #define tk_div_ty_43422 (sdiv_up64(mainziTk_43411, mainziTy_43413))
    #define TxRx_43423 (mainziTx_43412 * mainziRx_43414)
    #define TyRy_43424 (mainziTy_43413 * mainziRy_43415)
    #define a_loc_szz_43426 (mainziTk_43411 * (mainziTy_43413 * mainziRy_43415))
    #define b_loc_szz_43428 (mainziRx_43414 * (mainziTx_43412 * mainziTk_43411))
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44958_backing_1 = (__local volatile
                                                           char *) mem_44958_backing_aligned_0;
    __local volatile char *restrict mem_44956_backing_0 = (__local volatile
                                                           char *) mem_44956_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46116;
    int32_t local_tid_46117;
    int64_t group_sizze_46120;
    int32_t wave_sizze_46119;
    int32_t group_tid_46118;
    
    global_tid_46116 = get_global_id(0);
    local_tid_46117 = get_local_id(0);
    group_sizze_46120 = get_local_size(0);
    wave_sizze_46119 = LOCKSTEP_WIDTH;
    group_tid_46118 = get_group_id(0);
    
    int32_t gid_flat_43435;
    
    gid_flat_43435 = group_tid_46118;
    
    int32_t ltid_pre_46121;
    
    ltid_pre_46121 = squot32(local_tid_46117, sext_i64_i32(Tx_43418));
    
    int32_t ltid_pre_46122;
    
    ltid_pre_46122 = local_tid_46117 - squot32(local_tid_46117,
                                               sext_i64_i32(Tx_43418)) *
        sext_i64_i32(Tx_43418);
    
    int64_t gid_y_43434;
    
    gid_y_43434 = squot64(sext_i32_i64(group_tid_46118), gridDim_x_43429);
    
    int64_t gid_x_43433;
    
    gid_x_43433 = sext_i32_i64(group_tid_46118) -
        squot64(sext_i32_i64(group_tid_46118), gridDim_x_43429) *
        gridDim_x_43429;
    
    int64_t iii_43436;
    
    iii_43436 = TyRy_43424 * gid_y_43434;
    
    int64_t jjj_43437 = TxRx_43423 * gid_x_43433;
    float mem_44954[Ry_43417 * Rx_43419];
    int64_t ltid_y_43440 = sext_i32_i64(ltid_pre_46121);
    int64_t ltid_x_43438 = sext_i32_i64(ltid_pre_46122);
    int32_t ltid_flat_43439 = local_tid_46117;
    float mem_44945[Ry_43417 * Rx_43419];
    
    for (int64_t i_43451 = 0; i_43451 < Ry_43417; i_43451++) {
        for (int64_t i_43454 = 0; i_43454 < Rx_43419; i_43454++) {
            mem_44945[i_43451 * Rx_43419 + i_43454] = 0.0F;
        }
    }
    for (int64_t i_46125 = 0; i_46125 < Ry_43417; i_46125++) {
        for (int64_t i_46126 = 0; i_46126 < Rx_43419; i_46126++) {
            mem_44954[i_46125 * Rx_43419 + i_46126] = mem_44945[i_46125 *
                                                                Rx_43419 +
                                                                i_46126];
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_44956;
    
    mem_44956 = (__local char *) mem_44956_backing_0;
    
    __local char *mem_44958;
    
    mem_44958 = (__local char *) mem_44958_backing_1;
    
    float mem_45029[Ry_43417];
    float mem_45033[Rx_43419];
    float loop_mem_45045[Ry_43417 * Rx_43419];
    float mem_param_44959[Ry_43417 * Rx_43419];
    
    for (int32_t i_2 = 0; i_2 < Ry_43417 * Rx_43419; i_2++)
        mem_param_44959[i_2] = mem_44954[i_2];
    for (int64_t i_43461 = 0; i_43461 < full_tiles_43460; i_43461++) {
        int64_t kk_43465 = Tk_43420 * i_43461;
        
        for (int64_t i_43466 = 0; i_43466 < Ry_43417; i_43466++) {
            int64_t binop_y_43489 = Ty_43416 * i_43466;
            
            for (int64_t i_43468 = 0; i_43468 < tk_div_tx_43421; i_43468++) {
                int64_t binop_y_43487 = Tx_43418 * i_43468;
                int64_t ltid_x_43470 = sext_i32_i64(ltid_pre_46121);
                int64_t ltid_y_43471 = sext_i32_i64(ltid_pre_46122);
                int32_t ltid_flat_43472 = local_tid_46117;
                int64_t k_43488 = ltid_y_43471 + binop_y_43487;
                int64_t i_43490 = ltid_x_43470 + binop_y_43489;
                int64_t gtid_43491 = iii_43436 + i_43490;
                int64_t A_col_idx_43492 = kk_43465 + k_43488;
                bool cond_43493 = slt64(gtid_43491, m_29166);
                float A_elem_43494;
                
                if (cond_43493) {
                    float A_elem_43496 = ((__global
                                           float *) defunc_4_map_res_mem_44916)[gtid_43491 *
                                                                                i32_res_29181 +
                                                                                A_col_idx_43492];
                    
                    A_elem_43494 = A_elem_43496;
                } else {
                    A_elem_43494 = 0.0F;
                }
                
                bool cond_43498 = slt64(k_43488, Tk_43420);
                int64_t a_loc_ind_43499;
                
                if (cond_43498) {
                    int64_t binop_y_43500 = Tk_43420 * i_43490;
                    int64_t loc_fi_43501 = k_43488 + binop_y_43500;
                    
                    a_loc_ind_43499 = loc_fi_43501;
                } else {
                    a_loc_ind_43499 = (int64_t) -1;
                }
                if (sle64((int64_t) 0, a_loc_ind_43499) &&
                    slt64(a_loc_ind_43499, a_loc_szz_43426)) {
                    ((__local float *) mem_44956)[a_loc_ind_43499] =
                        A_elem_43494;
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        for (int64_t i_43506 = 0; i_43506 < tk_div_ty_43422; i_43506++) {
            int64_t binop_y_43527 = Ty_43416 * i_43506;
            
            for (int64_t i_43508 = 0; i_43508 < Rx_43419; i_43508++) {
                int64_t binop_y_43529 = Tx_43418 * i_43508;
                int64_t ltid_x_43510 = sext_i32_i64(ltid_pre_46121);
                int64_t ltid_y_43511 = sext_i32_i64(ltid_pre_46122);
                int32_t ltid_flat_43512 = local_tid_46117;
                int64_t k_43528 = ltid_x_43510 + binop_y_43527;
                int64_t j_43530 = ltid_y_43511 + binop_y_43529;
                int64_t gtid_43531 = jjj_43437 + j_43530;
                int64_t B_row_idx_43532 = kk_43465 + k_43528;
                bool cond_43533 = slt64(gtid_43531, N_29165);
                float B_elem_43534;
                
                if (cond_43533) {
                    float B_elem_43536 = ((__global
                                           float *) mem_44940)[B_row_idx_43532 *
                                                               N_29165 +
                                                               gtid_43531];
                    
                    B_elem_43534 = B_elem_43536;
                } else {
                    B_elem_43534 = 0.0F;
                }
                
                bool cond_43538 = slt64(k_43528, Tk_43420);
                int64_t b_loc_ind_43539;
                
                if (cond_43538) {
                    int64_t binop_y_43540 = TxRx_43423 * k_43528;
                    int64_t loc_fi_43541 = j_43530 + binop_y_43540;
                    
                    b_loc_ind_43539 = loc_fi_43541;
                } else {
                    b_loc_ind_43539 = (int64_t) -1;
                }
                if (sle64((int64_t) 0, b_loc_ind_43539) &&
                    slt64(b_loc_ind_43539, b_loc_szz_43428)) {
                    ((__local float *) mem_44958)[b_loc_ind_43539] =
                        B_elem_43534;
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        
        float loop_mem_45044[Ry_43417 * Rx_43419];
        float mem_param_45016[Ry_43417 * Rx_43419];
        
        for (int32_t i_3 = 0; i_3 < Ry_43417 * Rx_43419; i_3++)
            mem_param_45016[i_3] = mem_param_44959[i_3];
        for (int64_t i_43546 = 0; i_43546 < Tk_43420; i_43546++) {
            int64_t binop_y_43585 = TxRx_43423 * i_43546;
            int64_t ltid_y_43550 = sext_i32_i64(ltid_pre_46121);
            int64_t ltid_x_43548 = sext_i32_i64(ltid_pre_46122);
            int32_t ltid_flat_43549 = local_tid_46117;
            float mem_45019[Ry_43417];
            float mem_45021[Rx_43419];
            int64_t binop_x_43576 = Ry_43417 * ltid_y_43550;
            
            for (int64_t i_43574 = 0; i_43574 < Ry_43417; i_43574++) {
                int64_t binop_x_43577 = i_43574 + binop_x_43576;
                int64_t binop_y_43578 = Tk_43420 * binop_x_43577;
                int64_t a_loc_ind_43579 = i_43546 + binop_y_43578;
                
                for (int64_t i_46138 = 0; i_46138 < (int64_t) 1; i_46138++) {
                    mem_45019[i_43574 + i_46138] = ((__local
                                                     float *) mem_44956)[a_loc_ind_43579 +
                                                                         i_46138];
                }
            }
            
            int64_t binop_y_43587 = Rx_43419 * ltid_x_43548;
            
            for (int64_t i_43583 = 0; i_43583 < Rx_43419; i_43583++) {
                int64_t binop_x_43586 = i_43583 + binop_y_43585;
                int64_t b_loc_ind_43588 = binop_x_43586 + binop_y_43587;
                
                for (int64_t i_46140 = 0; i_46140 < (int64_t) 1; i_46140++) {
                    mem_45021[i_43583 + i_46140] = ((__local
                                                     float *) mem_44958)[b_loc_ind_43588 +
                                                                         i_46140];
                }
            }
            for (int64_t i_46141 = 0; i_46141 < Ry_43417; i_46141++) {
                mem_45029[i_46141] = mem_45019[i_46141];
            }
            for (int64_t i_46142 = 0; i_46142 < Rx_43419; i_46142++) {
                mem_45033[i_46142] = mem_45021[i_46142];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            float mem_45043[Ry_43417 * Rx_43419];
            int64_t ltid_y_43595 = sext_i32_i64(ltid_pre_46121);
            int64_t ltid_x_43593 = sext_i32_i64(ltid_pre_46122);
            int32_t ltid_flat_43594 = local_tid_46117;
            int64_t binop_y_43636 = Ry_43417 * ltid_y_43595;
            int64_t binop_y_43640 = Rx_43419 * ltid_x_43593;
            
            for (int64_t i_43630 = 0; i_43630 < Ry_43417; i_43630++) {
                int64_t binop_x_43635 = iii_43436 + i_43630;
                int64_t cmpop_x_43637 = binop_x_43635 + binop_y_43636;
                bool binop_x_43638 = slt64(cmpop_x_43637, m_29166);
                
                for (int64_t i_43633 = 0; i_43633 < Rx_43419; i_43633++) {
                    int64_t binop_x_43639 = jjj_43437 + i_43633;
                    int64_t cmpop_x_43641 = binop_x_43639 + binop_y_43640;
                    bool binop_y_43642 = slt64(cmpop_x_43641, N_29165);
                    bool cond_43643 = binop_x_43638 && binop_y_43642;
                    
                    if (cond_43643) {
                        float a_43645 = mem_45029[i_43630];
                        float b_43646 = mem_45033[i_43633];
                        float c_43647 = mem_param_45016[i_43630 * Rx_43419 +
                                                        i_43633];
                        float defunc_1_f_res_43650 = a_43645 * b_43646;
                        float defunc_1_op_res_43654 = c_43647 +
                              defunc_1_f_res_43650;
                        
                        mem_param_45016[i_43630 * Rx_43419 + i_43633] =
                            defunc_1_op_res_43654;
                    }
                }
            }
            for (int64_t i_46145 = 0; i_46145 < Ry_43417; i_46145++) {
                for (int64_t i_46146 = 0; i_46146 < Rx_43419; i_46146++) {
                    mem_45043[i_46145 * Rx_43419 + i_46146] =
                        mem_param_45016[i_46145 * Rx_43419 + i_46146];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            float mem_param_tmp_46135[Ry_43417 * Rx_43419];
            
            for (int32_t i_4 = 0; i_4 < Ry_43417 * Rx_43419; i_4++)
                mem_param_tmp_46135[i_4] = mem_45043[i_4];
            for (int32_t i_5 = 0; i_5 < Ry_43417 * Rx_43419; i_5++)
                mem_param_45016[i_5] = mem_param_tmp_46135[i_5];
        }
        for (int32_t i_6 = 0; i_6 < Ry_43417 * Rx_43419; i_6++)
            loop_mem_45044[i_6] = mem_param_45016[i_6];
        
        float mem_param_tmp_46127[Ry_43417 * Rx_43419];
        
        for (int32_t i_7 = 0; i_7 < Ry_43417 * Rx_43419; i_7++)
            mem_param_tmp_46127[i_7] = loop_mem_45044[i_7];
        for (int32_t i_8 = 0; i_8 < Ry_43417 * Rx_43419; i_8++)
            mem_param_44959[i_8] = mem_param_tmp_46127[i_8];
    }
    for (int32_t i_9 = 0; i_9 < Ry_43417 * Rx_43419; i_9++)
        loop_mem_45045[i_9] = mem_param_44959[i_9];
    for (int64_t i_43664 = 0; i_43664 < Ry_43417; i_43664++) {
        int64_t binop_y_43689 = Ty_43416 * i_43664;
        
        for (int64_t i_43666 = 0; i_43666 < tk_div_tx_43421; i_43666++) {
            int64_t binop_y_43687 = Tx_43418 * i_43666;
            int64_t ltid_x_43668 = sext_i32_i64(ltid_pre_46121);
            int64_t ltid_y_43669 = sext_i32_i64(ltid_pre_46122);
            int32_t ltid_flat_43670 = local_tid_46117;
            int64_t k_43688 = ltid_y_43669 + binop_y_43687;
            int64_t i_43690 = ltid_x_43668 + binop_y_43689;
            int64_t gtid_43691 = iii_43436 + i_43690;
            int64_t A_col_idx_43692 = kk_43663 + k_43688;
            bool binop_x_43693 = slt64(gtid_43691, m_29166);
            bool binop_y_43694 = slt64(A_col_idx_43692, i32_res_29181);
            bool cond_43695 = binop_x_43693 && binop_y_43694;
            float A_elem_43696;
            
            if (cond_43695) {
                float A_elem_43698 = ((__global
                                       float *) defunc_4_map_res_mem_44916)[gtid_43691 *
                                                                            i32_res_29181 +
                                                                            A_col_idx_43692];
                
                A_elem_43696 = A_elem_43698;
            } else {
                A_elem_43696 = 0.0F;
            }
            
            bool cond_43700 = slt64(k_43688, Tk_43420);
            int64_t a_loc_ind_43701;
            
            if (cond_43700) {
                int64_t binop_y_43702 = Tk_43420 * i_43690;
                int64_t loc_fi_43703 = k_43688 + binop_y_43702;
                
                a_loc_ind_43701 = loc_fi_43703;
            } else {
                a_loc_ind_43701 = (int64_t) -1;
            }
            if (sle64((int64_t) 0, a_loc_ind_43701) && slt64(a_loc_ind_43701,
                                                             a_loc_szz_43426)) {
                ((__local float *) mem_44956)[a_loc_ind_43701] = A_elem_43696;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    for (int64_t i_43708 = 0; i_43708 < tk_div_ty_43422; i_43708++) {
        int64_t binop_y_43731 = Ty_43416 * i_43708;
        
        for (int64_t i_43710 = 0; i_43710 < Rx_43419; i_43710++) {
            int64_t binop_y_43733 = Tx_43418 * i_43710;
            int64_t ltid_x_43712 = sext_i32_i64(ltid_pre_46121);
            int64_t ltid_y_43713 = sext_i32_i64(ltid_pre_46122);
            int32_t ltid_flat_43714 = local_tid_46117;
            int64_t k_43732 = ltid_x_43712 + binop_y_43731;
            int64_t j_43734 = ltid_y_43713 + binop_y_43733;
            int64_t gtid_43735 = jjj_43437 + j_43734;
            int64_t B_row_idx_43736 = kk_43663 + k_43732;
            bool binop_x_43737 = slt64(gtid_43735, N_29165);
            bool binop_y_43738 = slt64(B_row_idx_43736, i32_res_29181);
            bool cond_43739 = binop_x_43737 && binop_y_43738;
            float B_elem_43740;
            
            if (cond_43739) {
                float B_elem_43742 = ((__global
                                       float *) mem_44940)[B_row_idx_43736 *
                                                           N_29165 +
                                                           gtid_43735];
                
                B_elem_43740 = B_elem_43742;
            } else {
                B_elem_43740 = 0.0F;
            }
            
            bool cond_43744 = slt64(k_43732, Tk_43420);
            int64_t b_loc_ind_43745;
            
            if (cond_43744) {
                int64_t binop_y_43746 = TxRx_43423 * k_43732;
                int64_t loc_fi_43747 = j_43734 + binop_y_43746;
                
                b_loc_ind_43745 = loc_fi_43747;
            } else {
                b_loc_ind_43745 = (int64_t) -1;
            }
            if (sle64((int64_t) 0, b_loc_ind_43745) && slt64(b_loc_ind_43745,
                                                             b_loc_szz_43428)) {
                ((__local float *) mem_44958)[b_loc_ind_43745] = B_elem_43740;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    
    float mem_45111[Ry_43417];
    float mem_45115[Rx_43419];
    float mem_45125[Ry_43417 * Rx_43419];
    float loop_mem_45127[Ry_43417 * Rx_43419];
    float mem_param_45098[Ry_43417 * Rx_43419];
    
    for (int32_t i_10 = 0; i_10 < Ry_43417 * Rx_43419; i_10++)
        mem_param_45098[i_10] = loop_mem_45045[i_10];
    for (int64_t i_43752 = 0; i_43752 < Tk_43420; i_43752++) {
        int64_t cmpop_x_43754 = kk_43663 + i_43752;
        bool cond_43755 = slt64(cmpop_x_43754, i32_res_29181);
        float mem_45498[Ry_43417 * Rx_43419];
        
        if (cond_43755) {
            int64_t binop_y_43793 = TxRx_43423 * i_43752;
            int64_t bytes_45100 = (int64_t) 4 * Ry_43417;
            int64_t bytes_45102 = (int64_t) 4 * Rx_43419;
            int64_t ltid_y_43758 = sext_i32_i64(ltid_pre_46121);
            int64_t ltid_x_43756 = sext_i32_i64(ltid_pre_46122);
            int32_t ltid_flat_43757 = local_tid_46117;
            float mem_45101[Ry_43417];
            float mem_45103[Rx_43419];
            int64_t binop_x_43784 = Ry_43417 * ltid_y_43758;
            
            for (int64_t i_43782 = 0; i_43782 < Ry_43417; i_43782++) {
                int64_t binop_x_43785 = i_43782 + binop_x_43784;
                int64_t binop_y_43786 = Tk_43420 * binop_x_43785;
                int64_t a_loc_ind_43787 = i_43752 + binop_y_43786;
                
                for (int64_t i_46154 = 0; i_46154 < (int64_t) 1; i_46154++) {
                    mem_45101[i_43782 + i_46154] = ((__local
                                                     float *) mem_44956)[a_loc_ind_43787 +
                                                                         i_46154];
                }
            }
            
            int64_t binop_y_43795 = Rx_43419 * ltid_x_43756;
            
            for (int64_t i_43791 = 0; i_43791 < Rx_43419; i_43791++) {
                int64_t binop_x_43794 = i_43791 + binop_y_43793;
                int64_t b_loc_ind_43796 = binop_x_43794 + binop_y_43795;
                
                for (int64_t i_46156 = 0; i_46156 < (int64_t) 1; i_46156++) {
                    mem_45103[i_43791 + i_46156] = ((__local
                                                     float *) mem_44958)[b_loc_ind_43796 +
                                                                         i_46156];
                }
            }
            for (int64_t i_46157 = 0; i_46157 < Ry_43417; i_46157++) {
                mem_45111[i_46157] = mem_45101[i_46157];
            }
            for (int64_t i_46158 = 0; i_46158 < Rx_43419; i_46158++) {
                mem_45115[i_46158] = mem_45103[i_46158];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            int64_t ltid_y_43803 = sext_i32_i64(ltid_pre_46121);
            int64_t ltid_x_43801 = sext_i32_i64(ltid_pre_46122);
            int32_t ltid_flat_43802 = local_tid_46117;
            int64_t binop_y_43844 = Ry_43417 * ltid_y_43803;
            int64_t binop_y_43848 = Rx_43419 * ltid_x_43801;
            
            for (int64_t i_43838 = 0; i_43838 < Ry_43417; i_43838++) {
                int64_t binop_x_43843 = iii_43436 + i_43838;
                int64_t cmpop_x_43845 = binop_x_43843 + binop_y_43844;
                bool binop_x_43846 = slt64(cmpop_x_43845, m_29166);
                
                for (int64_t i_43841 = 0; i_43841 < Rx_43419; i_43841++) {
                    int64_t binop_x_43847 = jjj_43437 + i_43841;
                    int64_t cmpop_x_43849 = binop_x_43847 + binop_y_43848;
                    bool binop_y_43850 = slt64(cmpop_x_43849, N_29165);
                    bool cond_43851 = binop_x_43846 && binop_y_43850;
                    
                    if (cond_43851) {
                        float a_43853 = mem_45111[i_43838];
                        float b_43854 = mem_45115[i_43841];
                        float c_43855 = mem_param_45098[i_43838 * Rx_43419 +
                                                        i_43841];
                        float defunc_1_f_res_43858 = a_43853 * b_43854;
                        float defunc_1_op_res_43862 = c_43855 +
                              defunc_1_f_res_43858;
                        
                        mem_param_45098[i_43838 * Rx_43419 + i_43841] =
                            defunc_1_op_res_43862;
                    }
                }
            }
            for (int64_t i_46161 = 0; i_46161 < Ry_43417; i_46161++) {
                for (int64_t i_46162 = 0; i_46162 < Rx_43419; i_46162++) {
                    mem_45125[i_46161 * Rx_43419 + i_46162] =
                        mem_param_45098[i_46161 * Rx_43419 + i_46162];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            for (int64_t i_46163 = 0; i_46163 < Ry_43417; i_46163++) {
                for (int64_t i_46164 = 0; i_46164 < Rx_43419; i_46164++) {
                    mem_45498[i_46163 * Rx_43419 + i_46164] =
                        mem_45125[i_46163 * Rx_43419 + i_46164];
                }
            }
        } else {
            for (int64_t i_46165 = 0; i_46165 < Ry_43417; i_46165++) {
                for (int64_t i_46166 = 0; i_46166 < Rx_43419; i_46166++) {
                    mem_45498[i_46165 * Rx_43419 + i_46166] =
                        mem_param_45098[i_46165 * Rx_43419 + i_46166];
                }
            }
        }
        
        float mem_param_tmp_46151[Ry_43417 * Rx_43419];
        
        for (int32_t i_11 = 0; i_11 < Ry_43417 * Rx_43419; i_11++)
            mem_param_tmp_46151[i_11] = mem_45498[i_11];
        for (int32_t i_12 = 0; i_12 < Ry_43417 * Rx_43419; i_12++)
            mem_param_45098[i_12] = mem_param_tmp_46151[i_12];
    }
    for (int32_t i_13 = 0; i_13 < Ry_43417 * Rx_43419; i_13++)
        loop_mem_45127[i_13] = mem_param_45098[i_13];
    
    int64_t reg_tile_i_46167 = squot64(sext_i32_i64(local_tid_46117), Tx_43418);
    int64_t reg_tile_i_46168 = sext_i32_i64(local_tid_46117) -
            squot64(sext_i32_i64(local_tid_46117), Tx_43418) * Tx_43418;
    int64_t tile_dim_start_46169 = Ry_43417 * (Ty_43416 * gid_y_43434 +
                                               reg_tile_i_46167);
    int64_t tile_dim_start_46170 = Rx_43419 * (Tx_43418 * gid_x_43433 +
                                               reg_tile_i_46168);
    
    for (int64_t nest_i_46171 = 0; nest_i_46171 < Ry_43417; nest_i_46171++) {
        for (int64_t nest_i_46172 = 0; nest_i_46172 < Rx_43419;
             nest_i_46172++) {
            if (slt64(tile_dim_start_46169 + nest_i_46171, m_29166) &&
                slt64(tile_dim_start_46170 + nest_i_46172, N_29165)) {
                ((__global float *) mem_45130)[(tile_dim_start_46169 +
                                                nest_i_46171) * N_29165 +
                                               (tile_dim_start_46170 +
                                                nest_i_46172)] =
                    loop_mem_45127[nest_i_46171 * Rx_43419 + nest_i_46172];
            }
        }
    }
    
  error_9:
    return;
    #undef Ty_43416
    #undef Ry_43417
    #undef Tx_43418
    #undef Rx_43419
    #undef Tk_43420
    #undef tk_div_tx_43421
    #undef tk_div_ty_43422
    #undef TxRx_43423
    #undef TyRy_43424
    #undef a_loc_szz_43426
    #undef b_loc_szz_43428
}
__kernel void mainzisegmap_intragroup_43869(__global int *global_failure,
                                            int failure_is_an_option, __global
                                            int64_t *global_failure_args,
                                            __local volatile
                                            int64_t *mem_45203_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_45196_backing_aligned_1,
                                            int64_t N_29165, int64_t m_29166,
                                            int32_t n_29169, float hfrac_29171,
                                            int64_t i32_res_29175,
                                            int32_t k2p2_29177,
                                            int64_t num_whole_tiles_43891,
                                            int64_t residual_input_43992,
                                            unsigned char cond_43993, __global
                                            unsigned char *mem_45182, __global
                                            unsigned char *mem_45185, __global
                                            unsigned char *mem_45216, __global
                                            unsigned char *mem_45218, __global
                                            unsigned char *mem_45220)
{
    #define segmap_group_sizze_41211 (mainzisegmap_group_sizze_41176)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_45203_backing_5 = (__local volatile
                                                           char *) mem_45203_backing_aligned_0;
    __local volatile char *restrict mem_45196_backing_0 = (__local volatile
                                                           char *) mem_45196_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46340;
    int32_t local_tid_46341;
    int64_t group_sizze_46344;
    int32_t wave_sizze_46343;
    int32_t group_tid_46342;
    
    global_tid_46340 = get_global_id(0);
    local_tid_46341 = get_local_id(0);
    group_sizze_46344 = get_local_size(0);
    wave_sizze_46343 = LOCKSTEP_WIDTH;
    group_tid_46342 = get_group_id(0);
    
    int32_t gid_flat_43869;
    
    gid_flat_43869 = group_tid_46342;
    
    int32_t ltid_pre_46345;
    
    ltid_pre_46345 = local_tid_46341;
    
    int64_t gid_43868;
    
    gid_43868 = sext_i32_i64(group_tid_46342);
    
    int64_t binop_x_43876;
    
    binop_x_43876 = segmap_group_sizze_41211 * gid_43868;
    
    int32_t mem_45189[1];
    int64_t ltid_43870 = sext_i32_i64(ltid_pre_46345);
    int32_t ltid_flat_43871 = local_tid_46341;
    int64_t gtid_43877 = ltid_43870 + binop_x_43876;
    bool cond_43878 = slt64(gtid_43877, m_29166);
    int32_t pre_43879;
    
    if (cond_43878) {
        int32_t defunc_0_f_res_43881;
        int32_t redout_44325 = 0;
        
        for (int32_t i_44370 = 0; i_44370 < n_29169; i_44370++) {
            int64_t i_44326 = sext_i32_i64(i_44370);
            float x_43885 = ((__global float *) mem_45182)[i_44326 * m_29166 +
                                                           gtid_43877];
            bool isnan_res_43886;
            
            isnan_res_43886 = futrts_isnan32(x_43885);
            
            bool cond_43887 = !isnan_res_43886;
            int32_t defunc_0_f_res_43888 = btoi_bool_i32(cond_43887);
            int32_t defunc_1_op_res_43884 = add32(defunc_0_f_res_43888,
                                                  redout_44325);
            int32_t redout_tmp_46346 = defunc_1_op_res_43884;
            
            redout_44325 = redout_tmp_46346;
        }
        defunc_0_f_res_43881 = redout_44325;
        pre_43879 = defunc_0_f_res_43881;
    } else {
        pre_43879 = 0;
    }
    mem_45189[(int64_t) 0] = pre_43879;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    float mem_45192[1];
    int64_t ltid_43892 = sext_i32_i64(ltid_pre_46345);
    int32_t ltid_flat_43893 = local_tid_46341;
    
    mem_45192[(int64_t) 0] = 0.0F;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_45196;
    
    mem_45196 = (__local char *) mem_45196_backing_0;
    
    float accs_mem_45200[1];
    float mem_param_45193[1];
    
    for (int32_t i_1 = 0; i_1 < 1; i_1++)
        mem_param_45193[i_1] = mem_45192[i_1];
    for (int64_t tile_id_43899 = 0; tile_id_43899 < num_whole_tiles_43891;
         tile_id_43899++) {
        int64_t binop_x_43948 = segmap_group_sizze_41211 * tile_id_43899;
        int64_t ltid_43900 = sext_i32_i64(ltid_pre_46345);
        int32_t ltid_flat_43901 = local_tid_46341;
        int64_t j_43949 = ltid_43900 + binop_x_43948;
        bool cond_43953 = slt64(j_43949, i32_res_29175);
        int32_t pre_43954;
        
        if (cond_43953) {
            int32_t index_primexp_44283 = sext_i64_i32(j_43949);
            
            pre_43954 = index_primexp_44283;
        } else {
            pre_43954 = 0;
        }
        ((__local int32_t *) mem_45196)[ltid_43900] = pre_43954;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_45199[1];
        int64_t ltid_43919 = sext_i32_i64(ltid_pre_46345);
        int32_t ltid_flat_43920 = local_tid_46341;
        int64_t gtid_43959 = binop_x_43876 + ltid_43919;
        float acc_43961 = mem_param_45193[(int64_t) 0];
        bool cond_43962 = slt64(gtid_43959, m_29166);
        float acc_43963;
        
        if (cond_43962) {
            int32_t defunc_0_f_res_43960 = mem_45189[(int64_t) 0];
            float x_43964;
            float redout_44327 = acc_43961;
            
            for (int64_t i_44328 = 0; i_44328 < segmap_group_sizze_41211;
                 i_44328++) {
                int32_t x_43968 = ((__local int32_t *) mem_45196)[i_44328];
                bool cond_43969 = slt32(x_43968, defunc_0_f_res_43960);
                float defunc_0_f_res_43970;
                
                if (cond_43969) {
                    int64_t i_43971 = sext_i32_i64(x_43968);
                    bool x_43972 = sle64((int64_t) 0, i_43971);
                    bool y_43973 = slt64(i_43971, N_29165);
                    bool bounds_check_43974 = x_43972 && y_43973;
                    bool index_certs_43975;
                    
                    if (!bounds_check_43974) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          15) == -1) {
                                global_failure_args[0] = i_43971;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_3;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_43976 = ((__global
                                                         float *) mem_45185)[i_43971 *
                                                                             m_29166 +
                                                                             gtid_43959];
                    
                    defunc_0_f_res_43970 = defunc_0_f_res_t_res_43976;
                } else {
                    defunc_0_f_res_43970 = 0.0F;
                }
                
                float defunc_0_f_res_43977 = defunc_0_f_res_43970 *
                      defunc_0_f_res_43970;
                float defunc_1_op_res_43967 = defunc_0_f_res_43977 +
                      redout_44327;
                float redout_tmp_46349 = defunc_1_op_res_43967;
                
                redout_44327 = redout_tmp_46349;
            }
            x_43964 = redout_44327;
            acc_43963 = x_43964;
        } else {
            acc_43963 = acc_43961;
        }
        mem_45199[(int64_t) 0] = acc_43963;
        
      error_3:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_param_tmp_46347[1];
        
        for (int32_t i_2 = 0; i_2 < 1; i_2++)
            mem_param_tmp_46347[i_2] = mem_45199[i_2];
        for (int32_t i_3 = 0; i_3 < 1; i_3++)
            mem_param_45193[i_3] = mem_param_tmp_46347[i_3];
    }
    for (int32_t i_4 = 0; i_4 < 1; i_4++)
        accs_mem_45200[i_4] = mem_param_45193[i_4];
    
    __local char *mem_45203;
    
    mem_45203 = (__local char *) mem_45203_backing_5;
    
    float mem_45206[1];
    float mem_45511[1];
    
    if (cond_43993) {
        mem_45511[(int64_t) 0] = accs_mem_45200[(int64_t) 0];
    } else {
        int64_t binop_x_44003 = segmap_group_sizze_41211 *
                num_whole_tiles_43891;
        int64_t ltid_43994 = sext_i32_i64(ltid_pre_46345);
        int32_t ltid_flat_43995 = local_tid_46341;
        int64_t j_44004 = ltid_43994 + binop_x_44003;
        bool cond_44008 = slt64(j_44004, i32_res_29175);
        int32_t pre_44009;
        
        if (cond_44008) {
            int32_t index_primexp_44284 = sext_i64_i32(j_44004);
            
            pre_44009 = index_primexp_44284;
        } else {
            pre_44009 = 0;
        }
        ((__local int32_t *) mem_45203)[ltid_43994] = pre_44009;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int64_t ltid_44014 = sext_i32_i64(ltid_pre_46345);
        int32_t ltid_flat_44015 = local_tid_46341;
        int64_t gtid_44022 = binop_x_43876 + ltid_44014;
        float acc_44024 = accs_mem_45200[(int64_t) 0];
        bool cond_44025 = slt64(gtid_44022, m_29166);
        float acc_44026;
        
        if (cond_44025) {
            int32_t defunc_0_f_res_44023 = mem_45189[(int64_t) 0];
            float x_44027;
            float redout_44329 = acc_44024;
            
            for (int64_t i_44330 = 0; i_44330 < residual_input_43992;
                 i_44330++) {
                int32_t x_44031 = ((__local int32_t *) mem_45203)[i_44330];
                bool cond_44032 = slt32(x_44031, defunc_0_f_res_44023);
                float defunc_0_f_res_44033;
                
                if (cond_44032) {
                    int64_t i_44034 = sext_i32_i64(x_44031);
                    bool x_44035 = sle64((int64_t) 0, i_44034);
                    bool y_44036 = slt64(i_44034, N_29165);
                    bool bounds_check_44037 = x_44035 && y_44036;
                    bool index_certs_44038;
                    
                    if (!bounds_check_44037) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          16) == -1) {
                                global_failure_args[0] = i_44034;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_5;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_44039 = ((__global
                                                         float *) mem_45185)[i_44034 *
                                                                             m_29166 +
                                                                             gtid_44022];
                    
                    defunc_0_f_res_44033 = defunc_0_f_res_t_res_44039;
                } else {
                    defunc_0_f_res_44033 = 0.0F;
                }
                
                float defunc_0_f_res_44040 = defunc_0_f_res_44033 *
                      defunc_0_f_res_44033;
                float defunc_1_op_res_44030 = defunc_0_f_res_44040 +
                      redout_44329;
                float redout_tmp_46350 = defunc_1_op_res_44030;
                
                redout_44329 = redout_tmp_46350;
            }
            x_44027 = redout_44329;
            acc_44026 = x_44027;
        } else {
            acc_44026 = acc_44024;
        }
        mem_45206[(int64_t) 0] = acc_44026;
        
      error_5:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        mem_45511[(int64_t) 0] = mem_45206[(int64_t) 0];
    }
    
    int32_t mem_45210[1];
    int32_t mem_45212[1];
    float mem_45214[1];
    int64_t ltid_44043 = sext_i32_i64(ltid_pre_46345);
    int32_t ltid_flat_44044 = local_tid_46341;
    int64_t gtid_44054 = binop_x_43876 + ltid_44043;
    bool cond_44056 = slt64(gtid_44054, m_29166);
    int32_t postlude_44057;
    int32_t postlude_44058;
    float postlude_44059;
    
    if (cond_44056) {
        float defunc_0_f_res_44055 = mem_45511[(int64_t) 0];
        int32_t defunc_0_f_res_44060 = mem_45189[(int64_t) 0];
        int32_t r32_arg_44061 = sub32(defunc_0_f_res_44060, k2p2_29177);
        float i32_res_44062 = sitofp_i32_f32(r32_arg_44061);
        float sqrt_arg_44063 = defunc_0_f_res_44055 / i32_res_44062;
        float sqrt_res_44064;
        
        sqrt_res_44064 = futrts_sqrt32(sqrt_arg_44063);
        
        float i32_res_44065 = sitofp_i32_f32(defunc_0_f_res_44060);
        float t32_arg_44066 = hfrac_29171 * i32_res_44065;
        int32_t f32_res_44067 = fptosi_f32_i32(t32_arg_44066);
        
        postlude_44057 = f32_res_44067;
        postlude_44058 = defunc_0_f_res_44060;
        postlude_44059 = sqrt_res_44064;
    } else {
        postlude_44057 = 0;
        postlude_44058 = 0;
        postlude_44059 = 0.0F;
    }
    mem_45210[(int64_t) 0] = postlude_44057;
    mem_45212[(int64_t) 0] = postlude_44058;
    mem_45214[(int64_t) 0] = postlude_44059;
    barrier(CLK_LOCAL_MEM_FENCE);
    if (slt64(sext_i32_i64(local_tid_46341) + segmap_group_sizze_41211 *
              sext_i32_i64(group_tid_46342), m_29166)) {
        ((__global int32_t *) mem_45216)[sext_i32_i64(local_tid_46341) +
                                         segmap_group_sizze_41211 *
                                         sext_i32_i64(group_tid_46342)] =
            mem_45210[(int64_t) 0];
    }
    if (slt64(sext_i32_i64(local_tid_46341) + segmap_group_sizze_41211 *
              sext_i32_i64(group_tid_46342), m_29166)) {
        ((__global int32_t *) mem_45218)[sext_i32_i64(local_tid_46341) +
                                         segmap_group_sizze_41211 *
                                         sext_i32_i64(group_tid_46342)] =
            mem_45212[(int64_t) 0];
    }
    if (slt64(sext_i32_i64(local_tid_46341) + segmap_group_sizze_41211 *
              sext_i32_i64(group_tid_46342), m_29166)) {
        ((__global float *) mem_45220)[sext_i32_i64(local_tid_46341) +
                                       segmap_group_sizze_41211 *
                                       sext_i32_i64(group_tid_46342)] =
            mem_45214[(int64_t) 0];
    }
    
  error_7:
    return;
    #undef segmap_group_sizze_41211
}
__kernel void mainzisegmap_intragroup_44075(__global int *global_failure,
                                            int failure_is_an_option, __global
                                            int64_t *global_failure_args,
                                            __local volatile
                                            int64_t *mem_45269_backing_aligned_0,
                                            __local volatile
                                            int64_t *mem_45262_backing_aligned_1,
                                            int64_t N_29165, int64_t m_29166,
                                            int64_t i32_res_29568,
                                            int64_t num_whole_tiles_44095,
                                            int64_t residual_input_44206,
                                            unsigned char cond_44207, __global
                                            unsigned char *defunc_4_map_res_mem_45178,
                                            __global
                                            unsigned char *defunc_3_map_res_mem_45244,
                                            __global
                                            unsigned char *defunc_3_map_res_mem_45245,
                                            __global unsigned char *mem_45275)
{
    #define segmap_group_sizze_41468 (mainzisegmap_group_sizze_41445)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_45269_backing_5 = (__local volatile
                                                           char *) mem_45269_backing_aligned_0;
    __local volatile char *restrict mem_45262_backing_0 = (__local volatile
                                                           char *) mem_45262_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46524;
    int32_t local_tid_46525;
    int64_t group_sizze_46528;
    int32_t wave_sizze_46527;
    int32_t group_tid_46526;
    
    global_tid_46524 = get_global_id(0);
    local_tid_46525 = get_local_id(0);
    group_sizze_46528 = get_local_size(0);
    wave_sizze_46527 = LOCKSTEP_WIDTH;
    group_tid_46526 = get_group_id(0);
    
    int32_t gid_flat_44075;
    
    gid_flat_44075 = group_tid_46526;
    
    int32_t ltid_pre_46529;
    
    ltid_pre_46529 = local_tid_46525;
    
    int64_t gid_44074;
    
    gid_44074 = sext_i32_i64(group_tid_46526);
    
    int64_t binop_x_44084;
    
    binop_x_44084 = segmap_group_sizze_41468 * gid_44074;
    
    int32_t mem_45253[1];
    int32_t mem_45255[1];
    int64_t ltid_44076 = sext_i32_i64(ltid_pre_46529);
    int32_t ltid_flat_44077 = local_tid_46525;
    int64_t gtid_44085 = ltid_44076 + binop_x_44084;
    bool cond_44086 = slt64(gtid_44085, m_29166);
    int32_t pre_44087;
    int32_t pre_44088;
    
    if (cond_44086) {
        int32_t x_44089 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_44085];
        int32_t x_44090 = ((__global
                            int32_t *) defunc_3_map_res_mem_45244)[gtid_44085];
        
        pre_44087 = x_44089;
        pre_44088 = x_44090;
    } else {
        pre_44087 = 0;
        pre_44088 = 0;
    }
    mem_45253[(int64_t) 0] = pre_44087;
    mem_45255[(int64_t) 0] = pre_44088;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    float mem_45258[1];
    int64_t ltid_44096 = sext_i32_i64(ltid_pre_46529);
    int32_t ltid_flat_44097 = local_tid_46525;
    
    mem_45258[(int64_t) 0] = 0.0F;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_45262;
    
    mem_45262 = (__local char *) mem_45262_backing_0;
    
    float accs_mem_45266[1];
    float mem_param_45259[1];
    
    for (int32_t i_1 = 0; i_1 < 1; i_1++)
        mem_param_45259[i_1] = mem_45258[i_1];
    for (int64_t tile_id_44103 = 0; tile_id_44103 < num_whole_tiles_44095;
         tile_id_44103++) {
        int64_t binop_x_44156 = segmap_group_sizze_41468 * tile_id_44103;
        int64_t ltid_44104 = sext_i32_i64(ltid_pre_46529);
        int32_t ltid_flat_44105 = local_tid_46525;
        int64_t j_44157 = ltid_44104 + binop_x_44156;
        bool cond_44162 = slt64(j_44157, i32_res_29568);
        int32_t pre_44163;
        
        if (cond_44162) {
            int32_t index_primexp_44285 = sext_i64_i32(j_44157);
            int32_t tile_elem_44164 = index_primexp_44285;
            
            pre_44163 = tile_elem_44164;
        } else {
            pre_44163 = 0;
        }
        ((__local int32_t *) mem_45262)[ltid_44104] = pre_44163;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_45265[1];
        int64_t ltid_44124 = sext_i32_i64(ltid_pre_46529);
        int32_t ltid_flat_44125 = local_tid_46525;
        int64_t gtid_44168 = binop_x_44084 + ltid_44124;
        float acc_44171 = mem_param_45259[(int64_t) 0];
        bool cond_44172 = slt64(gtid_44168, m_29166);
        float acc_44173;
        
        if (cond_44172) {
            int32_t x_44169 = mem_45253[(int64_t) 0];
            int32_t x_44170 = mem_45255[(int64_t) 0];
            float x_44174;
            float redout_44331 = acc_44171;
            
            for (int64_t i_44332 = 0; i_44332 < segmap_group_sizze_41468;
                 i_44332++) {
                int32_t x_44178 = ((__local int32_t *) mem_45262)[i_44332];
                bool cond_44179 = slt32(x_44178, x_44170);
                float defunc_0_f_res_44180;
                
                if (cond_44179) {
                    int32_t x_44181 = add32(x_44169, x_44178);
                    int32_t x_44182 = sub32(x_44181, x_44170);
                    int32_t i_44183 = add32(1, x_44182);
                    int64_t i_44184 = sext_i32_i64(i_44183);
                    bool x_44185 = sle64((int64_t) 0, i_44184);
                    bool y_44186 = slt64(i_44184, N_29165);
                    bool bounds_check_44187 = x_44185 && y_44186;
                    bool index_certs_44188;
                    
                    if (!bounds_check_44187) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          20) == -1) {
                                global_failure_args[0] = i_44184;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_3;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_44189 = ((__global
                                                         float *) defunc_4_map_res_mem_45178)[gtid_44168 *
                                                                                              N_29165 +
                                                                                              i_44184];
                    
                    defunc_0_f_res_44180 = defunc_0_f_res_t_res_44189;
                } else {
                    defunc_0_f_res_44180 = 0.0F;
                }
                
                float defunc_1_op_res_44177 = defunc_0_f_res_44180 +
                      redout_44331;
                float redout_tmp_46532 = defunc_1_op_res_44177;
                
                redout_44331 = redout_tmp_46532;
            }
            x_44174 = redout_44331;
            acc_44173 = x_44174;
        } else {
            acc_44173 = acc_44171;
        }
        mem_45265[(int64_t) 0] = acc_44173;
        
      error_3:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        float mem_param_tmp_46530[1];
        
        for (int32_t i_2 = 0; i_2 < 1; i_2++)
            mem_param_tmp_46530[i_2] = mem_45265[i_2];
        for (int32_t i_3 = 0; i_3 < 1; i_3++)
            mem_param_45259[i_3] = mem_param_tmp_46530[i_3];
    }
    for (int32_t i_4 = 0; i_4 < 1; i_4++)
        accs_mem_45266[i_4] = mem_param_45259[i_4];
    
    __local char *mem_45269;
    
    mem_45269 = (__local char *) mem_45269_backing_5;
    
    float mem_45272[1];
    float mem_45520[1];
    
    if (cond_44207) {
        mem_45520[(int64_t) 0] = accs_mem_45266[(int64_t) 0];
    } else {
        int64_t binop_x_44217 = segmap_group_sizze_41468 *
                num_whole_tiles_44095;
        int64_t ltid_44208 = sext_i32_i64(ltid_pre_46529);
        int32_t ltid_flat_44209 = local_tid_46525;
        int64_t j_44218 = ltid_44208 + binop_x_44217;
        bool cond_44223 = slt64(j_44218, i32_res_29568);
        int32_t pre_44224;
        
        if (cond_44223) {
            int32_t index_primexp_44286 = sext_i64_i32(j_44218);
            int32_t tile_elem_44225 = index_primexp_44286;
            
            pre_44224 = tile_elem_44225;
        } else {
            pre_44224 = 0;
        }
        ((__local int32_t *) mem_45269)[ltid_44208] = pre_44224;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int64_t ltid_44229 = sext_i32_i64(ltid_pre_46529);
        int32_t ltid_flat_44230 = local_tid_46525;
        int64_t gtid_44237 = binop_x_44084 + ltid_44229;
        float acc_44240 = accs_mem_45266[(int64_t) 0];
        bool cond_44241 = slt64(gtid_44237, m_29166);
        float acc_44242;
        
        if (cond_44241) {
            int32_t x_44238 = mem_45253[(int64_t) 0];
            int32_t x_44239 = mem_45255[(int64_t) 0];
            float x_44243;
            float redout_44333 = acc_44240;
            
            for (int64_t i_44334 = 0; i_44334 < residual_input_44206;
                 i_44334++) {
                int32_t x_44247 = ((__local int32_t *) mem_45269)[i_44334];
                bool cond_44248 = slt32(x_44247, x_44239);
                float defunc_0_f_res_44249;
                
                if (cond_44248) {
                    int32_t x_44250 = add32(x_44238, x_44247);
                    int32_t x_44251 = sub32(x_44250, x_44239);
                    int32_t i_44252 = add32(1, x_44251);
                    int64_t i_44253 = sext_i32_i64(i_44252);
                    bool x_44254 = sle64((int64_t) 0, i_44253);
                    bool y_44255 = slt64(i_44253, N_29165);
                    bool bounds_check_44256 = x_44254 && y_44255;
                    bool index_certs_44257;
                    
                    if (!bounds_check_44256) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          21) == -1) {
                                global_failure_args[0] = i_44253;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_5;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_44258 = ((__global
                                                         float *) defunc_4_map_res_mem_45178)[gtid_44237 *
                                                                                              N_29165 +
                                                                                              i_44253];
                    
                    defunc_0_f_res_44249 = defunc_0_f_res_t_res_44258;
                } else {
                    defunc_0_f_res_44249 = 0.0F;
                }
                
                float defunc_1_op_res_44246 = defunc_0_f_res_44249 +
                      redout_44333;
                float redout_tmp_46533 = defunc_1_op_res_44246;
                
                redout_44333 = redout_tmp_46533;
            }
            x_44243 = redout_44333;
            acc_44242 = x_44243;
        } else {
            acc_44242 = acc_44240;
        }
        mem_45272[(int64_t) 0] = acc_44242;
        
      error_5:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        mem_45520[(int64_t) 0] = mem_45272[(int64_t) 0];
    }
    if (slt64(sext_i32_i64(local_tid_46525) + segmap_group_sizze_41468 *
              sext_i32_i64(group_tid_46526), m_29166)) {
        ((__global float *) mem_45275)[sext_i32_i64(local_tid_46525) +
                                       segmap_group_sizze_41468 *
                                       sext_i32_i64(group_tid_46526)] =
            mem_45520[(int64_t) 0];
    }
    
  error_6:
    return;
    #undef segmap_group_sizze_41468
}
__kernel void mainzisegred_large_39115(__global int *global_failure,
                                       __local volatile
                                       int64_t *sync_arr_mem_45793_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_45791_backing_aligned_1,
                                       int64_t N_29165, int64_t i32_res_29175,
                                       int64_t i32_res_29181,
                                       int64_t num_groups_39254,
                                       int64_t groups_per_segment_45777,
                                       int64_t elements_per_thread_45778,
                                       int64_t virt_num_groups_45779,
                                       int64_t threads_per_segment_45781,
                                       __global unsigned char *images_mem_44381,
                                       __global
                                       unsigned char *binop_p_mem_44390,
                                       __global unsigned char *mem_44531,
                                       __global unsigned char *mem_44536,
                                       __global
                                       unsigned char *group_res_arr_mem_45782,
                                       __global
                                       unsigned char *mainzicounter_mem_45784)
{
    #define segred_group_sizze_39253 (mainzisegred_group_sizze_39109)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_45793_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_45793_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_45791_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_45791_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45786;
    int32_t local_tid_45787;
    int64_t group_sizze_45790;
    int32_t wave_sizze_45789;
    int32_t group_tid_45788;
    
    global_tid_45786 = get_global_id(0);
    local_tid_45787 = get_local_id(0);
    group_sizze_45790 = get_local_size(0);
    wave_sizze_45789 = LOCKSTEP_WIDTH;
    group_tid_45788 = get_group_id(0);
    
    int32_t phys_tid_39115;
    
    phys_tid_39115 = global_tid_45786;
    
    __local char *red_arr_mem_45791;
    
    red_arr_mem_45791 = (__local char *) red_arr_mem_45791_backing_0;
    
    __local char *sync_arr_mem_45793;
    
    sync_arr_mem_45793 = (__local char *) sync_arr_mem_45793_backing_1;
    
    int32_t phys_group_id_45795;
    
    phys_group_id_45795 = get_group_id(0);
    for (int32_t i_45796 = 0; i_45796 <
         sdiv_up32(sext_i64_i32(virt_num_groups_45779) - phys_group_id_45795,
                   sext_i64_i32(num_groups_39254)); i_45796++) {
        int32_t virt_group_id_45797 = phys_group_id_45795 + i_45796 *
                sext_i64_i32(num_groups_39254);
        int32_t flat_segment_id_45798 = squot32(virt_group_id_45797,
                                                sext_i64_i32(groups_per_segment_45777));
        int64_t global_tid_45799 = srem64(sext_i32_i64(virt_group_id_45797) *
                                          segred_group_sizze_39253 +
                                          sext_i32_i64(local_tid_45787),
                                          segred_group_sizze_39253 *
                                          groups_per_segment_45777);
        int64_t gtid_39102 = squot64(sext_i32_i64(flat_segment_id_45798),
                                     i32_res_29181 * i32_res_29181);
        int64_t gtid_39103 = squot64(sext_i32_i64(flat_segment_id_45798) -
                                     squot64(sext_i32_i64(flat_segment_id_45798),
                                             i32_res_29181 * i32_res_29181) *
                                     (i32_res_29181 * i32_res_29181),
                                     i32_res_29181);
        int64_t gtid_39104 = sext_i32_i64(flat_segment_id_45798) -
                squot64(sext_i32_i64(flat_segment_id_45798), i32_res_29181 *
                        i32_res_29181) * (i32_res_29181 * i32_res_29181) -
                squot64(sext_i32_i64(flat_segment_id_45798) -
                        squot64(sext_i32_i64(flat_segment_id_45798),
                                i32_res_29181 * i32_res_29181) *
                        (i32_res_29181 * i32_res_29181), i32_res_29181) *
                i32_res_29181;
        int64_t gtid_39114;
        float x_acc_45800;
        int64_t chunk_sizze_45801;
        
        chunk_sizze_45801 = smin64(elements_per_thread_45778,
                                   sdiv_up64(i32_res_29175 -
                                             sext_i32_i64(sext_i64_i32(global_tid_45799)),
                                             threads_per_segment_45781));
        
        float x_39257;
        float x_39258;
        
        // neutral-initialise the accumulators
        {
            x_acc_45800 = 0.0F;
        }
        for (int64_t i_45805 = 0; i_45805 < chunk_sizze_45801; i_45805++) {
            gtid_39114 = sext_i32_i64(sext_i64_i32(global_tid_45799)) +
                threads_per_segment_45781 * i_45805;
            // apply map function
            {
                float x_39263 = ((__global
                                  float *) images_mem_44381)[gtid_39102 *
                                                             N_29165 +
                                                             gtid_39114];
                float x_39264 = ((__global
                                  float *) binop_p_mem_44390)[gtid_39103 *
                                                              N_29165 +
                                                              gtid_39114];
                float x_39265 = ((__global float *) mem_44531)[gtid_39104 *
                                                               N_29165 +
                                                               gtid_39114];
                float x_39266 = x_39264 * x_39265;
                bool isnan_res_39267;
                
                isnan_res_39267 = futrts_isnan32(x_39263);
                
                float y_39268;
                
                if (isnan_res_39267) {
                    y_39268 = 0.0F;
                } else {
                    y_39268 = 1.0F;
                }
                
                float defunc_2_f_res_39269 = x_39266 * y_39268;
                
                // save map-out results
                { }
                // load accumulator
                {
                    x_39257 = x_acc_45800;
                }
                // load new values
                {
                    x_39258 = defunc_2_f_res_39269;
                }
                // apply reduction operator
                {
                    float defunc_1_op_res_39259 = x_39257 + x_39258;
                    
                    // store in accumulator
                    {
                        x_acc_45800 = defunc_1_op_res_39259;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_39257 = x_acc_45800;
            ((__local
              float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] =
                x_39257;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_45806;
        int32_t skip_waves_45807;
        
        skip_waves_45807 = 1;
        
        float x_45802;
        float x_45803;
        
        offset_45806 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_45787,
                      sext_i64_i32(segred_group_sizze_39253))) {
                x_45802 = ((__local
                            float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 +
                                                        offset_45806)];
            }
        }
        offset_45806 = 1;
        while (slt32(offset_45806, wave_sizze_45789)) {
            if (slt32(local_tid_45787 + offset_45806,
                      sext_i64_i32(segred_group_sizze_39253)) &&
                ((local_tid_45787 - squot32(local_tid_45787, wave_sizze_45789) *
                  wave_sizze_45789) & (2 * offset_45806 - 1)) == 0) {
                // read array element
                {
                    x_45803 = ((volatile __local
                                float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 +
                                                            offset_45806)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_45804 = x_45802 + x_45803;
                    
                    x_45802 = defunc_1_op_res_45804;
                }
                // write result of operation
                {
                    ((volatile __local
                      float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] =
                        x_45802;
                }
            }
            offset_45806 *= 2;
        }
        while (slt32(skip_waves_45807,
                     squot32(sext_i64_i32(segred_group_sizze_39253) +
                             wave_sizze_45789 - 1, wave_sizze_45789))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_45806 = skip_waves_45807 * wave_sizze_45789;
            if (slt32(local_tid_45787 + offset_45806,
                      sext_i64_i32(segred_group_sizze_39253)) &&
                ((local_tid_45787 - squot32(local_tid_45787, wave_sizze_45789) *
                  wave_sizze_45789) == 0 && (squot32(local_tid_45787,
                                                     wave_sizze_45789) & (2 *
                                                                          skip_waves_45807 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_45803 = ((__local
                                float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 +
                                                            offset_45806)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_45804 = x_45802 + x_45803;
                    
                    x_45802 = defunc_1_op_res_45804;
                }
                // write result of operation
                {
                    ((__local
                      float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] =
                        x_45802;
                }
            }
            skip_waves_45807 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_45787) == (int64_t) 0) {
                x_acc_45800 = x_45802;
            }
        }
        if (groups_per_segment_45777 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_45787 == 0) {
                    ((__global float *) mem_44536)[gtid_39102 * (i32_res_29181 *
                                                                 i32_res_29181) +
                                                   gtid_39103 * i32_res_29181 +
                                                   gtid_39104] = x_acc_45800;
                }
            }
        } else {
            int32_t old_counter_45808;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_45787 == 0) {
                    ((__global
                      float *) group_res_arr_mem_45782)[sext_i32_i64(virt_group_id_45797) *
                                                        segred_group_sizze_39253] =
                        x_acc_45800;
                    mem_fence_global();
                    old_counter_45808 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_45784)[sext_i32_i64(srem32(flat_segment_id_45798,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_45793)[(int64_t) 0] =
                        old_counter_45808 == groups_per_segment_45777 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_45809;
            
            is_last_group_45809 = ((__local
                                    bool *) sync_arr_mem_45793)[(int64_t) 0];
            if (is_last_group_45809) {
                if (local_tid_45787 == 0) {
                    old_counter_45808 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_45784)[sext_i32_i64(srem32(flat_segment_id_45798,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_45777));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_45810 =
                            sdiv_up64(groups_per_segment_45777,
                                      segred_group_sizze_39253);
                    
                    x_39257 = 0.0F;
                    for (int64_t i_45811 = 0; i_45811 < read_per_thread_45810;
                         i_45811++) {
                        int64_t group_res_id_45812 =
                                sext_i32_i64(local_tid_45787) *
                                read_per_thread_45810 + i_45811;
                        int64_t index_of_group_res_45813 =
                                sext_i32_i64(flat_segment_id_45798) *
                                groups_per_segment_45777 + group_res_id_45812;
                        
                        if (slt64(group_res_id_45812,
                                  groups_per_segment_45777)) {
                            x_39258 = ((__global
                                        float *) group_res_arr_mem_45782)[index_of_group_res_45813 *
                                                                          segred_group_sizze_39253];
                            
                            float defunc_1_op_res_39259;
                            
                            defunc_1_op_res_39259 = x_39257 + x_39258;
                            x_39257 = defunc_1_op_res_39259;
                        }
                    }
                }
                ((__local
                  float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] =
                    x_39257;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_45814;
                    int32_t skip_waves_45815;
                    
                    skip_waves_45815 = 1;
                    
                    float x_45802;
                    float x_45803;
                    
                    offset_45814 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_45787,
                                  sext_i64_i32(segred_group_sizze_39253))) {
                            x_45802 = ((__local
                                        float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 +
                                                                    offset_45814)];
                        }
                    }
                    offset_45814 = 1;
                    while (slt32(offset_45814, wave_sizze_45789)) {
                        if (slt32(local_tid_45787 + offset_45814,
                                  sext_i64_i32(segred_group_sizze_39253)) &&
                            ((local_tid_45787 - squot32(local_tid_45787,
                                                        wave_sizze_45789) *
                              wave_sizze_45789) & (2 * offset_45814 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_45803 = ((volatile __local
                                            float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 +
                                                                        offset_45814)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_45804 = x_45802 + x_45803;
                                
                                x_45802 = defunc_1_op_res_45804;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] =
                                    x_45802;
                            }
                        }
                        offset_45814 *= 2;
                    }
                    while (slt32(skip_waves_45815,
                                 squot32(sext_i64_i32(segred_group_sizze_39253) +
                                         wave_sizze_45789 - 1,
                                         wave_sizze_45789))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_45814 = skip_waves_45815 * wave_sizze_45789;
                        if (slt32(local_tid_45787 + offset_45814,
                                  sext_i64_i32(segred_group_sizze_39253)) &&
                            ((local_tid_45787 - squot32(local_tid_45787,
                                                        wave_sizze_45789) *
                              wave_sizze_45789) == 0 &&
                             (squot32(local_tid_45787, wave_sizze_45789) & (2 *
                                                                            skip_waves_45815 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_45803 = ((__local
                                            float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 +
                                                                        offset_45814)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_45804 = x_45802 + x_45803;
                                
                                x_45802 = defunc_1_op_res_45804;
                            }
                            // write result of operation
                            {
                                ((__local
                                  float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] =
                                    x_45802;
                            }
                        }
                        skip_waves_45815 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_45787 == 0) {
                            ((__global float *) mem_44536)[gtid_39102 *
                                                           (i32_res_29181 *
                                                            i32_res_29181) +
                                                           gtid_39103 *
                                                           i32_res_29181 +
                                                           gtid_39104] =
                                x_45802;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_39253
}
__kernel void mainzisegred_large_40466(__global int *global_failure,
                                       __local volatile
                                       int64_t *sync_arr_mem_45990_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_45988_backing_aligned_1,
                                       int64_t N_29165, int64_t i32_res_29175,
                                       int64_t i32_res_29181,
                                       int64_t num_groups_40519,
                                       int64_t groups_per_segment_45974,
                                       int64_t elements_per_thread_45975,
                                       int64_t virt_num_groups_45976,
                                       int64_t threads_per_segment_45978,
                                       __global unsigned char *images_mem_44381,
                                       __global
                                       unsigned char *binop_p_mem_44390,
                                       __global unsigned char *mem_44844,
                                       __global
                                       unsigned char *group_res_arr_mem_45979,
                                       __global
                                       unsigned char *mainzicounter_mem_45981)
{
    #define segred_group_sizze_40518 (mainzisegred_group_sizze_40460)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_45990_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_45990_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_45988_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_45988_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45983;
    int32_t local_tid_45984;
    int64_t group_sizze_45987;
    int32_t wave_sizze_45986;
    int32_t group_tid_45985;
    
    global_tid_45983 = get_global_id(0);
    local_tid_45984 = get_local_id(0);
    group_sizze_45987 = get_local_size(0);
    wave_sizze_45986 = LOCKSTEP_WIDTH;
    group_tid_45985 = get_group_id(0);
    
    int32_t phys_tid_40466;
    
    phys_tid_40466 = global_tid_45983;
    
    __local char *red_arr_mem_45988;
    
    red_arr_mem_45988 = (__local char *) red_arr_mem_45988_backing_0;
    
    __local char *sync_arr_mem_45990;
    
    sync_arr_mem_45990 = (__local char *) sync_arr_mem_45990_backing_1;
    
    int32_t phys_group_id_45992;
    
    phys_group_id_45992 = get_group_id(0);
    for (int32_t i_45993 = 0; i_45993 <
         sdiv_up32(sext_i64_i32(virt_num_groups_45976) - phys_group_id_45992,
                   sext_i64_i32(num_groups_40519)); i_45993++) {
        int32_t virt_group_id_45994 = phys_group_id_45992 + i_45993 *
                sext_i64_i32(num_groups_40519);
        int32_t flat_segment_id_45995 = squot32(virt_group_id_45994,
                                                sext_i64_i32(groups_per_segment_45974));
        int64_t global_tid_45996 = srem64(sext_i32_i64(virt_group_id_45994) *
                                          segred_group_sizze_40518 +
                                          sext_i32_i64(local_tid_45984),
                                          segred_group_sizze_40518 *
                                          groups_per_segment_45974);
        int64_t gtid_40455 = squot64(sext_i32_i64(flat_segment_id_45995),
                                     i32_res_29181);
        int64_t gtid_40456 = sext_i32_i64(flat_segment_id_45995) -
                squot64(sext_i32_i64(flat_segment_id_45995), i32_res_29181) *
                i32_res_29181;
        int64_t gtid_40465;
        float x_acc_45997;
        int64_t chunk_sizze_45998;
        
        chunk_sizze_45998 = smin64(elements_per_thread_45975,
                                   sdiv_up64(i32_res_29175 -
                                             sext_i32_i64(sext_i64_i32(global_tid_45996)),
                                             threads_per_segment_45978));
        
        float x_40522;
        float x_40523;
        
        // neutral-initialise the accumulators
        {
            x_acc_45997 = 0.0F;
        }
        for (int64_t i_46002 = 0; i_46002 < chunk_sizze_45998; i_46002++) {
            gtid_40465 = sext_i32_i64(sext_i64_i32(global_tid_45996)) +
                threads_per_segment_45978 * i_46002;
            // apply map function
            {
                float x_40528 = ((__global
                                  float *) images_mem_44381)[gtid_40455 *
                                                             N_29165 +
                                                             gtid_40465];
                bool isnan_res_40529;
                
                isnan_res_40529 = futrts_isnan32(x_40528);
                
                float defunc_1_f_res_40530;
                
                if (isnan_res_40529) {
                    defunc_1_f_res_40530 = 0.0F;
                } else {
                    float x_40527 = ((__global
                                      float *) binop_p_mem_44390)[gtid_40456 *
                                                                  N_29165 +
                                                                  gtid_40465];
                    float defunc_1_f_res_f_res_40531 = x_40527 * x_40528;
                    
                    defunc_1_f_res_40530 = defunc_1_f_res_f_res_40531;
                }
                // save map-out results
                { }
                // load accumulator
                {
                    x_40522 = x_acc_45997;
                }
                // load new values
                {
                    x_40523 = defunc_1_f_res_40530;
                }
                // apply reduction operator
                {
                    float defunc_1_op_res_40524 = x_40522 + x_40523;
                    
                    // store in accumulator
                    {
                        x_acc_45997 = defunc_1_op_res_40524;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_40522 = x_acc_45997;
            ((__local
              float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] =
                x_40522;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_46003;
        int32_t skip_waves_46004;
        
        skip_waves_46004 = 1;
        
        float x_45999;
        float x_46000;
        
        offset_46003 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_45984,
                      sext_i64_i32(segred_group_sizze_40518))) {
                x_45999 = ((__local
                            float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 +
                                                        offset_46003)];
            }
        }
        offset_46003 = 1;
        while (slt32(offset_46003, wave_sizze_45986)) {
            if (slt32(local_tid_45984 + offset_46003,
                      sext_i64_i32(segred_group_sizze_40518)) &&
                ((local_tid_45984 - squot32(local_tid_45984, wave_sizze_45986) *
                  wave_sizze_45986) & (2 * offset_46003 - 1)) == 0) {
                // read array element
                {
                    x_46000 = ((volatile __local
                                float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 +
                                                            offset_46003)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46001 = x_45999 + x_46000;
                    
                    x_45999 = defunc_1_op_res_46001;
                }
                // write result of operation
                {
                    ((volatile __local
                      float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] =
                        x_45999;
                }
            }
            offset_46003 *= 2;
        }
        while (slt32(skip_waves_46004,
                     squot32(sext_i64_i32(segred_group_sizze_40518) +
                             wave_sizze_45986 - 1, wave_sizze_45986))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_46003 = skip_waves_46004 * wave_sizze_45986;
            if (slt32(local_tid_45984 + offset_46003,
                      sext_i64_i32(segred_group_sizze_40518)) &&
                ((local_tid_45984 - squot32(local_tid_45984, wave_sizze_45986) *
                  wave_sizze_45986) == 0 && (squot32(local_tid_45984,
                                                     wave_sizze_45986) & (2 *
                                                                          skip_waves_46004 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_46000 = ((__local
                                float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 +
                                                            offset_46003)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46001 = x_45999 + x_46000;
                    
                    x_45999 = defunc_1_op_res_46001;
                }
                // write result of operation
                {
                    ((__local
                      float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] =
                        x_45999;
                }
            }
            skip_waves_46004 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_45984) == (int64_t) 0) {
                x_acc_45997 = x_45999;
            }
        }
        if (groups_per_segment_45974 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_45984 == 0) {
                    ((__global float *) mem_44844)[gtid_40455 * i32_res_29181 +
                                                   gtid_40456] = x_acc_45997;
                }
            }
        } else {
            int32_t old_counter_46005;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_45984 == 0) {
                    ((__global
                      float *) group_res_arr_mem_45979)[sext_i32_i64(virt_group_id_45994) *
                                                        segred_group_sizze_40518] =
                        x_acc_45997;
                    mem_fence_global();
                    old_counter_46005 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_45981)[sext_i32_i64(srem32(flat_segment_id_45995,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_45990)[(int64_t) 0] =
                        old_counter_46005 == groups_per_segment_45974 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46006;
            
            is_last_group_46006 = ((__local
                                    bool *) sync_arr_mem_45990)[(int64_t) 0];
            if (is_last_group_46006) {
                if (local_tid_45984 == 0) {
                    old_counter_46005 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_45981)[sext_i32_i64(srem32(flat_segment_id_45995,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_45974));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46007 =
                            sdiv_up64(groups_per_segment_45974,
                                      segred_group_sizze_40518);
                    
                    x_40522 = 0.0F;
                    for (int64_t i_46008 = 0; i_46008 < read_per_thread_46007;
                         i_46008++) {
                        int64_t group_res_id_46009 =
                                sext_i32_i64(local_tid_45984) *
                                read_per_thread_46007 + i_46008;
                        int64_t index_of_group_res_46010 =
                                sext_i32_i64(flat_segment_id_45995) *
                                groups_per_segment_45974 + group_res_id_46009;
                        
                        if (slt64(group_res_id_46009,
                                  groups_per_segment_45974)) {
                            x_40523 = ((__global
                                        float *) group_res_arr_mem_45979)[index_of_group_res_46010 *
                                                                          segred_group_sizze_40518];
                            
                            float defunc_1_op_res_40524;
                            
                            defunc_1_op_res_40524 = x_40522 + x_40523;
                            x_40522 = defunc_1_op_res_40524;
                        }
                    }
                }
                ((__local
                  float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] =
                    x_40522;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46011;
                    int32_t skip_waves_46012;
                    
                    skip_waves_46012 = 1;
                    
                    float x_45999;
                    float x_46000;
                    
                    offset_46011 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_45984,
                                  sext_i64_i32(segred_group_sizze_40518))) {
                            x_45999 = ((__local
                                        float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 +
                                                                    offset_46011)];
                        }
                    }
                    offset_46011 = 1;
                    while (slt32(offset_46011, wave_sizze_45986)) {
                        if (slt32(local_tid_45984 + offset_46011,
                                  sext_i64_i32(segred_group_sizze_40518)) &&
                            ((local_tid_45984 - squot32(local_tid_45984,
                                                        wave_sizze_45986) *
                              wave_sizze_45986) & (2 * offset_46011 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46000 = ((volatile __local
                                            float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 +
                                                                        offset_46011)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46001 = x_45999 + x_46000;
                                
                                x_45999 = defunc_1_op_res_46001;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] =
                                    x_45999;
                            }
                        }
                        offset_46011 *= 2;
                    }
                    while (slt32(skip_waves_46012,
                                 squot32(sext_i64_i32(segred_group_sizze_40518) +
                                         wave_sizze_45986 - 1,
                                         wave_sizze_45986))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46011 = skip_waves_46012 * wave_sizze_45986;
                        if (slt32(local_tid_45984 + offset_46011,
                                  sext_i64_i32(segred_group_sizze_40518)) &&
                            ((local_tid_45984 - squot32(local_tid_45984,
                                                        wave_sizze_45986) *
                              wave_sizze_45986) == 0 &&
                             (squot32(local_tid_45984, wave_sizze_45986) & (2 *
                                                                            skip_waves_46012 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46000 = ((__local
                                            float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 +
                                                                        offset_46011)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46001 = x_45999 + x_46000;
                                
                                x_45999 = defunc_1_op_res_46001;
                            }
                            // write result of operation
                            {
                                ((__local
                                  float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] =
                                    x_45999;
                            }
                        }
                        skip_waves_46012 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_45984 == 0) {
                            ((__global float *) mem_44844)[gtid_40455 *
                                                           i32_res_29181 +
                                                           gtid_40456] =
                                x_45999;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_40518
}
__kernel void mainzisegred_large_40603(__global int *global_failure,
                                       __local volatile
                                       int64_t *sync_arr_mem_46078_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46076_backing_aligned_1,
                                       int64_t i32_res_29181,
                                       int64_t num_groups_40652,
                                       int64_t groups_per_segment_46062,
                                       int64_t elements_per_thread_46063,
                                       int64_t virt_num_groups_46064,
                                       int64_t threads_per_segment_46066,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_44629,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_44850,
                                       __global unsigned char *mem_44910,
                                       __global
                                       unsigned char *group_res_arr_mem_46067,
                                       __global
                                       unsigned char *mainzicounter_mem_46069)
{
    #define segred_group_sizze_40651 (mainzisegred_group_sizze_40597)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_46078_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_46078_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46076_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46076_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46071;
    int32_t local_tid_46072;
    int64_t group_sizze_46075;
    int32_t wave_sizze_46074;
    int32_t group_tid_46073;
    
    global_tid_46071 = get_global_id(0);
    local_tid_46072 = get_local_id(0);
    group_sizze_46075 = get_local_size(0);
    wave_sizze_46074 = LOCKSTEP_WIDTH;
    group_tid_46073 = get_group_id(0);
    
    int32_t phys_tid_40603;
    
    phys_tid_40603 = global_tid_46071;
    
    __local char *red_arr_mem_46076;
    
    red_arr_mem_46076 = (__local char *) red_arr_mem_46076_backing_0;
    
    __local char *sync_arr_mem_46078;
    
    sync_arr_mem_46078 = (__local char *) sync_arr_mem_46078_backing_1;
    
    int32_t phys_group_id_46080;
    
    phys_group_id_46080 = get_group_id(0);
    for (int32_t i_46081 = 0; i_46081 <
         sdiv_up32(sext_i64_i32(virt_num_groups_46064) - phys_group_id_46080,
                   sext_i64_i32(num_groups_40652)); i_46081++) {
        int32_t virt_group_id_46082 = phys_group_id_46080 + i_46081 *
                sext_i64_i32(num_groups_40652);
        int32_t flat_segment_id_46083 = squot32(virt_group_id_46082,
                                                sext_i64_i32(groups_per_segment_46062));
        int64_t global_tid_46084 = srem64(sext_i32_i64(virt_group_id_46082) *
                                          segred_group_sizze_40651 +
                                          sext_i32_i64(local_tid_46072),
                                          segred_group_sizze_40651 *
                                          groups_per_segment_46062);
        int64_t gtid_40592 = squot64(sext_i32_i64(flat_segment_id_46083),
                                     i32_res_29181);
        int64_t gtid_40593 = sext_i32_i64(flat_segment_id_46083) -
                squot64(sext_i32_i64(flat_segment_id_46083), i32_res_29181) *
                i32_res_29181;
        int64_t gtid_40602;
        float x_acc_46085;
        int64_t chunk_sizze_46086;
        
        chunk_sizze_46086 = smin64(elements_per_thread_46063,
                                   sdiv_up64(i32_res_29181 -
                                             sext_i32_i64(sext_i64_i32(global_tid_46084)),
                                             threads_per_segment_46066));
        
        float x_40655;
        float x_40656;
        
        // neutral-initialise the accumulators
        {
            x_acc_46085 = 0.0F;
        }
        for (int64_t i_46090 = 0; i_46090 < chunk_sizze_46086; i_46090++) {
            gtid_40602 = sext_i32_i64(sext_i64_i32(global_tid_46084)) +
                threads_per_segment_46066 * i_46090;
            // apply map function
            {
                float x_40661 = ((__global
                                  float *) defunc_3_map_res_mem_44850)[gtid_40592 *
                                                                       i32_res_29181 +
                                                                       gtid_40602];
                float x_40662 = ((__global
                                  float *) defunc_3_map_res_mem_44629)[gtid_40592 *
                                                                       (i32_res_29181 *
                                                                        i32_res_29181) +
                                                                       gtid_40593 *
                                                                       i32_res_29181 +
                                                                       gtid_40602];
                float defunc_1_f_res_40663 = x_40661 * x_40662;
                
                // save map-out results
                { }
                // load accumulator
                {
                    x_40655 = x_acc_46085;
                }
                // load new values
                {
                    x_40656 = defunc_1_f_res_40663;
                }
                // apply reduction operator
                {
                    float defunc_1_op_res_40657 = x_40655 + x_40656;
                    
                    // store in accumulator
                    {
                        x_acc_46085 = defunc_1_op_res_40657;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_40655 = x_acc_46085;
            ((__local
              float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] =
                x_40655;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_46091;
        int32_t skip_waves_46092;
        
        skip_waves_46092 = 1;
        
        float x_46087;
        float x_46088;
        
        offset_46091 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_46072,
                      sext_i64_i32(segred_group_sizze_40651))) {
                x_46087 = ((__local
                            float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 +
                                                        offset_46091)];
            }
        }
        offset_46091 = 1;
        while (slt32(offset_46091, wave_sizze_46074)) {
            if (slt32(local_tid_46072 + offset_46091,
                      sext_i64_i32(segred_group_sizze_40651)) &&
                ((local_tid_46072 - squot32(local_tid_46072, wave_sizze_46074) *
                  wave_sizze_46074) & (2 * offset_46091 - 1)) == 0) {
                // read array element
                {
                    x_46088 = ((volatile __local
                                float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 +
                                                            offset_46091)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46089 = x_46087 + x_46088;
                    
                    x_46087 = defunc_1_op_res_46089;
                }
                // write result of operation
                {
                    ((volatile __local
                      float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] =
                        x_46087;
                }
            }
            offset_46091 *= 2;
        }
        while (slt32(skip_waves_46092,
                     squot32(sext_i64_i32(segred_group_sizze_40651) +
                             wave_sizze_46074 - 1, wave_sizze_46074))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_46091 = skip_waves_46092 * wave_sizze_46074;
            if (slt32(local_tid_46072 + offset_46091,
                      sext_i64_i32(segred_group_sizze_40651)) &&
                ((local_tid_46072 - squot32(local_tid_46072, wave_sizze_46074) *
                  wave_sizze_46074) == 0 && (squot32(local_tid_46072,
                                                     wave_sizze_46074) & (2 *
                                                                          skip_waves_46092 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_46088 = ((__local
                                float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 +
                                                            offset_46091)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46089 = x_46087 + x_46088;
                    
                    x_46087 = defunc_1_op_res_46089;
                }
                // write result of operation
                {
                    ((__local
                      float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] =
                        x_46087;
                }
            }
            skip_waves_46092 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_46072) == (int64_t) 0) {
                x_acc_46085 = x_46087;
            }
        }
        if (groups_per_segment_46062 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_46072 == 0) {
                    ((__global float *) mem_44910)[gtid_40592 * i32_res_29181 +
                                                   gtid_40593] = x_acc_46085;
                }
            }
        } else {
            int32_t old_counter_46093;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_46072 == 0) {
                    ((__global
                      float *) group_res_arr_mem_46067)[sext_i32_i64(virt_group_id_46082) *
                                                        segred_group_sizze_40651] =
                        x_acc_46085;
                    mem_fence_global();
                    old_counter_46093 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46069)[sext_i32_i64(srem32(flat_segment_id_46083,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_46078)[(int64_t) 0] =
                        old_counter_46093 == groups_per_segment_46062 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46094;
            
            is_last_group_46094 = ((__local
                                    bool *) sync_arr_mem_46078)[(int64_t) 0];
            if (is_last_group_46094) {
                if (local_tid_46072 == 0) {
                    old_counter_46093 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46069)[sext_i32_i64(srem32(flat_segment_id_46083,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_46062));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46095 =
                            sdiv_up64(groups_per_segment_46062,
                                      segred_group_sizze_40651);
                    
                    x_40655 = 0.0F;
                    for (int64_t i_46096 = 0; i_46096 < read_per_thread_46095;
                         i_46096++) {
                        int64_t group_res_id_46097 =
                                sext_i32_i64(local_tid_46072) *
                                read_per_thread_46095 + i_46096;
                        int64_t index_of_group_res_46098 =
                                sext_i32_i64(flat_segment_id_46083) *
                                groups_per_segment_46062 + group_res_id_46097;
                        
                        if (slt64(group_res_id_46097,
                                  groups_per_segment_46062)) {
                            x_40656 = ((__global
                                        float *) group_res_arr_mem_46067)[index_of_group_res_46098 *
                                                                          segred_group_sizze_40651];
                            
                            float defunc_1_op_res_40657;
                            
                            defunc_1_op_res_40657 = x_40655 + x_40656;
                            x_40655 = defunc_1_op_res_40657;
                        }
                    }
                }
                ((__local
                  float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] =
                    x_40655;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46099;
                    int32_t skip_waves_46100;
                    
                    skip_waves_46100 = 1;
                    
                    float x_46087;
                    float x_46088;
                    
                    offset_46099 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_46072,
                                  sext_i64_i32(segred_group_sizze_40651))) {
                            x_46087 = ((__local
                                        float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 +
                                                                    offset_46099)];
                        }
                    }
                    offset_46099 = 1;
                    while (slt32(offset_46099, wave_sizze_46074)) {
                        if (slt32(local_tid_46072 + offset_46099,
                                  sext_i64_i32(segred_group_sizze_40651)) &&
                            ((local_tid_46072 - squot32(local_tid_46072,
                                                        wave_sizze_46074) *
                              wave_sizze_46074) & (2 * offset_46099 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46088 = ((volatile __local
                                            float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 +
                                                                        offset_46099)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46089 = x_46087 + x_46088;
                                
                                x_46087 = defunc_1_op_res_46089;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] =
                                    x_46087;
                            }
                        }
                        offset_46099 *= 2;
                    }
                    while (slt32(skip_waves_46100,
                                 squot32(sext_i64_i32(segred_group_sizze_40651) +
                                         wave_sizze_46074 - 1,
                                         wave_sizze_46074))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46099 = skip_waves_46100 * wave_sizze_46074;
                        if (slt32(local_tid_46072 + offset_46099,
                                  sext_i64_i32(segred_group_sizze_40651)) &&
                            ((local_tid_46072 - squot32(local_tid_46072,
                                                        wave_sizze_46074) *
                              wave_sizze_46074) == 0 &&
                             (squot32(local_tid_46072, wave_sizze_46074) & (2 *
                                                                            skip_waves_46100 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46088 = ((__local
                                            float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 +
                                                                        offset_46099)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46089 = x_46087 + x_46088;
                                
                                x_46087 = defunc_1_op_res_46089;
                            }
                            // write result of operation
                            {
                                ((__local
                                  float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] =
                                    x_46087;
                            }
                        }
                        skip_waves_46100 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_46072 == 0) {
                            ((__global float *) mem_44910)[gtid_40592 *
                                                           i32_res_29181 +
                                                           gtid_40593] =
                                x_46087;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_40651
}
__kernel void mainzisegred_large_40733(__global int *global_failure,
                                       __local volatile
                                       int64_t *sync_arr_mem_46210_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46208_backing_aligned_1,
                                       int64_t N_29165, int64_t i32_res_29181,
                                       int64_t num_groups_40780,
                                       int64_t groups_per_segment_46194,
                                       int64_t elements_per_thread_46195,
                                       int64_t virt_num_groups_46196,
                                       int64_t threads_per_segment_46198,
                                       __global unsigned char *mem_44397,
                                       __global
                                       unsigned char *defunc_4_map_res_mem_44916,
                                       __global unsigned char *mem_45134,
                                       __global
                                       unsigned char *group_res_arr_mem_46199,
                                       __global
                                       unsigned char *mainzicounter_mem_46201)
{
    #define segred_group_sizze_40779 (mainzisegred_group_sizze_40727)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_46210_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_46210_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46208_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46208_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46203;
    int32_t local_tid_46204;
    int64_t group_sizze_46207;
    int32_t wave_sizze_46206;
    int32_t group_tid_46205;
    
    global_tid_46203 = get_global_id(0);
    local_tid_46204 = get_local_id(0);
    group_sizze_46207 = get_local_size(0);
    wave_sizze_46206 = LOCKSTEP_WIDTH;
    group_tid_46205 = get_group_id(0);
    
    int32_t phys_tid_40733;
    
    phys_tid_40733 = global_tid_46203;
    
    __local char *red_arr_mem_46208;
    
    red_arr_mem_46208 = (__local char *) red_arr_mem_46208_backing_0;
    
    __local char *sync_arr_mem_46210;
    
    sync_arr_mem_46210 = (__local char *) sync_arr_mem_46210_backing_1;
    
    int32_t phys_group_id_46212;
    
    phys_group_id_46212 = get_group_id(0);
    for (int32_t i_46213 = 0; i_46213 <
         sdiv_up32(sext_i64_i32(virt_num_groups_46196) - phys_group_id_46212,
                   sext_i64_i32(num_groups_40780)); i_46213++) {
        int32_t virt_group_id_46214 = phys_group_id_46212 + i_46213 *
                sext_i64_i32(num_groups_40780);
        int32_t flat_segment_id_46215 = squot32(virt_group_id_46214,
                                                sext_i64_i32(groups_per_segment_46194));
        int64_t global_tid_46216 = srem64(sext_i32_i64(virt_group_id_46214) *
                                          segred_group_sizze_40779 +
                                          sext_i32_i64(local_tid_46204),
                                          segred_group_sizze_40779 *
                                          groups_per_segment_46194);
        int64_t gtid_40722 = squot64(sext_i32_i64(flat_segment_id_46215),
                                     N_29165);
        int64_t gtid_40723 = sext_i32_i64(flat_segment_id_46215) -
                squot64(sext_i32_i64(flat_segment_id_46215), N_29165) * N_29165;
        int64_t gtid_40732;
        float x_acc_46217;
        int64_t chunk_sizze_46218;
        
        chunk_sizze_46218 = smin64(elements_per_thread_46195,
                                   sdiv_up64(i32_res_29181 -
                                             sext_i32_i64(sext_i64_i32(global_tid_46216)),
                                             threads_per_segment_46198));
        
        float x_40783;
        float x_40784;
        
        // neutral-initialise the accumulators
        {
            x_acc_46217 = 0.0F;
        }
        for (int64_t i_46222 = 0; i_46222 < chunk_sizze_46218; i_46222++) {
            gtid_40732 = sext_i32_i64(sext_i64_i32(global_tid_46216)) +
                threads_per_segment_46198 * i_46222;
            // apply map function
            {
                float x_40788 = ((__global
                                  float *) defunc_4_map_res_mem_44916)[gtid_40722 *
                                                                       i32_res_29181 +
                                                                       gtid_40732];
                float x_40789 = ((__global float *) mem_44397)[gtid_40723 *
                                                               i32_res_29181 +
                                                               gtid_40732];
                float defunc_1_f_res_40790 = x_40788 * x_40789;
                
                // save map-out results
                { }
                // load accumulator
                {
                    x_40783 = x_acc_46217;
                }
                // load new values
                {
                    x_40784 = defunc_1_f_res_40790;
                }
                // apply reduction operator
                {
                    float defunc_1_op_res_40785 = x_40783 + x_40784;
                    
                    // store in accumulator
                    {
                        x_acc_46217 = defunc_1_op_res_40785;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_40783 = x_acc_46217;
            ((__local
              float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] =
                x_40783;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_46223;
        int32_t skip_waves_46224;
        
        skip_waves_46224 = 1;
        
        float x_46219;
        float x_46220;
        
        offset_46223 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_46204,
                      sext_i64_i32(segred_group_sizze_40779))) {
                x_46219 = ((__local
                            float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 +
                                                        offset_46223)];
            }
        }
        offset_46223 = 1;
        while (slt32(offset_46223, wave_sizze_46206)) {
            if (slt32(local_tid_46204 + offset_46223,
                      sext_i64_i32(segred_group_sizze_40779)) &&
                ((local_tid_46204 - squot32(local_tid_46204, wave_sizze_46206) *
                  wave_sizze_46206) & (2 * offset_46223 - 1)) == 0) {
                // read array element
                {
                    x_46220 = ((volatile __local
                                float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 +
                                                            offset_46223)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46221 = x_46219 + x_46220;
                    
                    x_46219 = defunc_1_op_res_46221;
                }
                // write result of operation
                {
                    ((volatile __local
                      float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] =
                        x_46219;
                }
            }
            offset_46223 *= 2;
        }
        while (slt32(skip_waves_46224,
                     squot32(sext_i64_i32(segred_group_sizze_40779) +
                             wave_sizze_46206 - 1, wave_sizze_46206))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_46223 = skip_waves_46224 * wave_sizze_46206;
            if (slt32(local_tid_46204 + offset_46223,
                      sext_i64_i32(segred_group_sizze_40779)) &&
                ((local_tid_46204 - squot32(local_tid_46204, wave_sizze_46206) *
                  wave_sizze_46206) == 0 && (squot32(local_tid_46204,
                                                     wave_sizze_46206) & (2 *
                                                                          skip_waves_46224 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_46220 = ((__local
                                float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 +
                                                            offset_46223)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46221 = x_46219 + x_46220;
                    
                    x_46219 = defunc_1_op_res_46221;
                }
                // write result of operation
                {
                    ((__local
                      float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] =
                        x_46219;
                }
            }
            skip_waves_46224 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_46204) == (int64_t) 0) {
                x_acc_46217 = x_46219;
            }
        }
        if (groups_per_segment_46194 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_46204 == 0) {
                    ((__global float *) mem_45134)[gtid_40722 * N_29165 +
                                                   gtid_40723] = x_acc_46217;
                }
            }
        } else {
            int32_t old_counter_46225;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_46204 == 0) {
                    ((__global
                      float *) group_res_arr_mem_46199)[sext_i32_i64(virt_group_id_46214) *
                                                        segred_group_sizze_40779] =
                        x_acc_46217;
                    mem_fence_global();
                    old_counter_46225 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46201)[sext_i32_i64(srem32(flat_segment_id_46215,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_46210)[(int64_t) 0] =
                        old_counter_46225 == groups_per_segment_46194 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46226;
            
            is_last_group_46226 = ((__local
                                    bool *) sync_arr_mem_46210)[(int64_t) 0];
            if (is_last_group_46226) {
                if (local_tid_46204 == 0) {
                    old_counter_46225 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46201)[sext_i32_i64(srem32(flat_segment_id_46215,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_46194));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46227 =
                            sdiv_up64(groups_per_segment_46194,
                                      segred_group_sizze_40779);
                    
                    x_40783 = 0.0F;
                    for (int64_t i_46228 = 0; i_46228 < read_per_thread_46227;
                         i_46228++) {
                        int64_t group_res_id_46229 =
                                sext_i32_i64(local_tid_46204) *
                                read_per_thread_46227 + i_46228;
                        int64_t index_of_group_res_46230 =
                                sext_i32_i64(flat_segment_id_46215) *
                                groups_per_segment_46194 + group_res_id_46229;
                        
                        if (slt64(group_res_id_46229,
                                  groups_per_segment_46194)) {
                            x_40784 = ((__global
                                        float *) group_res_arr_mem_46199)[index_of_group_res_46230 *
                                                                          segred_group_sizze_40779];
                            
                            float defunc_1_op_res_40785;
                            
                            defunc_1_op_res_40785 = x_40783 + x_40784;
                            x_40783 = defunc_1_op_res_40785;
                        }
                    }
                }
                ((__local
                  float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] =
                    x_40783;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46231;
                    int32_t skip_waves_46232;
                    
                    skip_waves_46232 = 1;
                    
                    float x_46219;
                    float x_46220;
                    
                    offset_46231 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_46204,
                                  sext_i64_i32(segred_group_sizze_40779))) {
                            x_46219 = ((__local
                                        float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 +
                                                                    offset_46231)];
                        }
                    }
                    offset_46231 = 1;
                    while (slt32(offset_46231, wave_sizze_46206)) {
                        if (slt32(local_tid_46204 + offset_46231,
                                  sext_i64_i32(segred_group_sizze_40779)) &&
                            ((local_tid_46204 - squot32(local_tid_46204,
                                                        wave_sizze_46206) *
                              wave_sizze_46206) & (2 * offset_46231 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46220 = ((volatile __local
                                            float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 +
                                                                        offset_46231)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46221 = x_46219 + x_46220;
                                
                                x_46219 = defunc_1_op_res_46221;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] =
                                    x_46219;
                            }
                        }
                        offset_46231 *= 2;
                    }
                    while (slt32(skip_waves_46232,
                                 squot32(sext_i64_i32(segred_group_sizze_40779) +
                                         wave_sizze_46206 - 1,
                                         wave_sizze_46206))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46231 = skip_waves_46232 * wave_sizze_46206;
                        if (slt32(local_tid_46204 + offset_46231,
                                  sext_i64_i32(segred_group_sizze_40779)) &&
                            ((local_tid_46204 - squot32(local_tid_46204,
                                                        wave_sizze_46206) *
                              wave_sizze_46206) == 0 &&
                             (squot32(local_tid_46204, wave_sizze_46206) & (2 *
                                                                            skip_waves_46232 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46220 = ((__local
                                            float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 +
                                                                        offset_46231)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46221 = x_46219 + x_46220;
                                
                                x_46219 = defunc_1_op_res_46221;
                            }
                            // write result of operation
                            {
                                ((__local
                                  float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] =
                                    x_46219;
                            }
                        }
                        skip_waves_46232 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_46204 == 0) {
                            ((__global float *) mem_45134)[gtid_40722 *
                                                           N_29165 +
                                                           gtid_40723] =
                                x_46219;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_40779
}
__kernel void mainzisegred_large_41311(__global int *global_failure,
                                       int failure_is_an_option, __global
                                       int64_t *global_failure_args,
                                       __local volatile
                                       int64_t *sync_arr_mem_46462_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46460_backing_aligned_1,
                                       int64_t N_29165, int64_t i32_res_29175,
                                       int64_t num_groups_41362,
                                       int64_t groups_per_segment_46446,
                                       int64_t elements_per_thread_46447,
                                       int64_t virt_num_groups_46448,
                                       int64_t threads_per_segment_46450,
                                       __global
                                       unsigned char *defunc_4_map_res_mem_45178,
                                       __global unsigned char *mem_45232,
                                       __global unsigned char *mem_45235,
                                       __global
                                       unsigned char *group_res_arr_mem_46451,
                                       __global
                                       unsigned char *mainzicounter_mem_46453)
{
    #define segred_group_sizze_41361 (mainzisegred_group_sizze_41305)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_46462_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_46462_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46460_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46460_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46455;
    int32_t local_tid_46456;
    int64_t group_sizze_46459;
    int32_t wave_sizze_46458;
    int32_t group_tid_46457;
    
    global_tid_46455 = get_global_id(0);
    local_tid_46456 = get_local_id(0);
    group_sizze_46459 = get_local_size(0);
    wave_sizze_46458 = LOCKSTEP_WIDTH;
    group_tid_46457 = get_group_id(0);
    
    int32_t phys_tid_41311;
    
    phys_tid_41311 = global_tid_46455;
    
    __local char *red_arr_mem_46460;
    
    red_arr_mem_46460 = (__local char *) red_arr_mem_46460_backing_0;
    
    __local char *sync_arr_mem_46462;
    
    sync_arr_mem_46462 = (__local char *) sync_arr_mem_46462_backing_1;
    
    int32_t phys_group_id_46464;
    
    phys_group_id_46464 = get_group_id(0);
    for (int32_t i_46465 = 0; i_46465 <
         sdiv_up32(sext_i64_i32(virt_num_groups_46448) - phys_group_id_46464,
                   sext_i64_i32(num_groups_41362)); i_46465++) {
        int32_t virt_group_id_46466 = phys_group_id_46464 + i_46465 *
                sext_i64_i32(num_groups_41362);
        int32_t flat_segment_id_46467 = squot32(virt_group_id_46466,
                                                sext_i64_i32(groups_per_segment_46446));
        int64_t global_tid_46468 = srem64(sext_i32_i64(virt_group_id_46466) *
                                          segred_group_sizze_41361 +
                                          sext_i32_i64(local_tid_46456),
                                          segred_group_sizze_41361 *
                                          groups_per_segment_46446);
        int64_t gtid_41302 = sext_i32_i64(flat_segment_id_46467);
        int64_t gtid_41310;
        float x_acc_46469;
        int64_t chunk_sizze_46470;
        
        chunk_sizze_46470 = smin64(elements_per_thread_46447,
                                   sdiv_up64(i32_res_29175 -
                                             sext_i32_i64(sext_i64_i32(global_tid_46468)),
                                             threads_per_segment_46450));
        
        float x_41365;
        float x_41366;
        
        // neutral-initialise the accumulators
        {
            x_acc_46469 = 0.0F;
        }
        for (int64_t i_46474 = 0; i_46474 < chunk_sizze_46470; i_46474++) {
            gtid_41310 = sext_i32_i64(sext_i64_i32(global_tid_46468)) +
                threads_per_segment_46450 * i_46474;
            // apply map function
            {
                int32_t defunc_0_f_res_41369 = ((__global
                                                 int32_t *) mem_45232)[gtid_41302];
                int32_t index_primexp_42385 = sext_i64_i32(gtid_41310);
                bool cond_41371 = slt32(index_primexp_42385,
                                        defunc_0_f_res_41369);
                float defunc_0_f_res_41372;
                
                if (cond_41371) {
                    int64_t i_41373 = sext_i32_i64(index_primexp_42385);
                    bool x_41374 = sle64((int64_t) 0, i_41373);
                    bool y_41375 = slt64(i_41373, N_29165);
                    bool bounds_check_41376 = x_41374 && y_41375;
                    bool index_certs_41377;
                    
                    if (!bounds_check_41376) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          19) == -1) {
                                global_failure_args[0] = i_41373;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_0;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_41378 = ((__global
                                                         float *) defunc_4_map_res_mem_45178)[gtid_41302 *
                                                                                              N_29165 +
                                                                                              i_41373];
                    
                    defunc_0_f_res_41372 = defunc_0_f_res_t_res_41378;
                } else {
                    defunc_0_f_res_41372 = 0.0F;
                }
                
                float defunc_0_f_res_41379 = defunc_0_f_res_41372 *
                      defunc_0_f_res_41372;
                
                // save map-out results
                { }
                // load accumulator
                {
                    x_41365 = x_acc_46469;
                }
                // load new values
                {
                    x_41366 = defunc_0_f_res_41379;
                }
                // apply reduction operator
                {
                    float defunc_1_op_res_41367 = x_41365 + x_41366;
                    
                    // store in accumulator
                    {
                        x_acc_46469 = defunc_1_op_res_41367;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_41365 = x_acc_46469;
            ((__local
              float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] =
                x_41365;
        }
        
      error_0:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_46475;
        int32_t skip_waves_46476;
        
        skip_waves_46476 = 1;
        
        float x_46471;
        float x_46472;
        
        offset_46475 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_46456,
                      sext_i64_i32(segred_group_sizze_41361))) {
                x_46471 = ((__local
                            float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 +
                                                        offset_46475)];
            }
        }
        offset_46475 = 1;
        while (slt32(offset_46475, wave_sizze_46458)) {
            if (slt32(local_tid_46456 + offset_46475,
                      sext_i64_i32(segred_group_sizze_41361)) &&
                ((local_tid_46456 - squot32(local_tid_46456, wave_sizze_46458) *
                  wave_sizze_46458) & (2 * offset_46475 - 1)) == 0) {
                // read array element
                {
                    x_46472 = ((volatile __local
                                float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 +
                                                            offset_46475)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46473 = x_46471 + x_46472;
                    
                    x_46471 = defunc_1_op_res_46473;
                }
                // write result of operation
                {
                    ((volatile __local
                      float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] =
                        x_46471;
                }
            }
            offset_46475 *= 2;
        }
        while (slt32(skip_waves_46476,
                     squot32(sext_i64_i32(segred_group_sizze_41361) +
                             wave_sizze_46458 - 1, wave_sizze_46458))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_46475 = skip_waves_46476 * wave_sizze_46458;
            if (slt32(local_tid_46456 + offset_46475,
                      sext_i64_i32(segred_group_sizze_41361)) &&
                ((local_tid_46456 - squot32(local_tid_46456, wave_sizze_46458) *
                  wave_sizze_46458) == 0 && (squot32(local_tid_46456,
                                                     wave_sizze_46458) & (2 *
                                                                          skip_waves_46476 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_46472 = ((__local
                                float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 +
                                                            offset_46475)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46473 = x_46471 + x_46472;
                    
                    x_46471 = defunc_1_op_res_46473;
                }
                // write result of operation
                {
                    ((__local
                      float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] =
                        x_46471;
                }
            }
            skip_waves_46476 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_46456) == (int64_t) 0) {
                x_acc_46469 = x_46471;
            }
        }
        if (groups_per_segment_46446 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_46456 == 0) {
                    ((__global float *) mem_45235)[gtid_41302] = x_acc_46469;
                }
            }
        } else {
            int32_t old_counter_46477;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_46456 == 0) {
                    ((__global
                      float *) group_res_arr_mem_46451)[sext_i32_i64(virt_group_id_46466) *
                                                        segred_group_sizze_41361] =
                        x_acc_46469;
                    mem_fence_global();
                    old_counter_46477 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46453)[sext_i32_i64(srem32(flat_segment_id_46467,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_46462)[(int64_t) 0] =
                        old_counter_46477 == groups_per_segment_46446 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46478;
            
            is_last_group_46478 = ((__local
                                    bool *) sync_arr_mem_46462)[(int64_t) 0];
            if (is_last_group_46478) {
                if (local_tid_46456 == 0) {
                    old_counter_46477 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46453)[sext_i32_i64(srem32(flat_segment_id_46467,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_46446));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46479 =
                            sdiv_up64(groups_per_segment_46446,
                                      segred_group_sizze_41361);
                    
                    x_41365 = 0.0F;
                    for (int64_t i_46480 = 0; i_46480 < read_per_thread_46479;
                         i_46480++) {
                        int64_t group_res_id_46481 =
                                sext_i32_i64(local_tid_46456) *
                                read_per_thread_46479 + i_46480;
                        int64_t index_of_group_res_46482 =
                                sext_i32_i64(flat_segment_id_46467) *
                                groups_per_segment_46446 + group_res_id_46481;
                        
                        if (slt64(group_res_id_46481,
                                  groups_per_segment_46446)) {
                            x_41366 = ((__global
                                        float *) group_res_arr_mem_46451)[index_of_group_res_46482 *
                                                                          segred_group_sizze_41361];
                            
                            float defunc_1_op_res_41367;
                            
                            defunc_1_op_res_41367 = x_41365 + x_41366;
                            x_41365 = defunc_1_op_res_41367;
                        }
                    }
                }
                ((__local
                  float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] =
                    x_41365;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46483;
                    int32_t skip_waves_46484;
                    
                    skip_waves_46484 = 1;
                    
                    float x_46471;
                    float x_46472;
                    
                    offset_46483 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_46456,
                                  sext_i64_i32(segred_group_sizze_41361))) {
                            x_46471 = ((__local
                                        float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 +
                                                                    offset_46483)];
                        }
                    }
                    offset_46483 = 1;
                    while (slt32(offset_46483, wave_sizze_46458)) {
                        if (slt32(local_tid_46456 + offset_46483,
                                  sext_i64_i32(segred_group_sizze_41361)) &&
                            ((local_tid_46456 - squot32(local_tid_46456,
                                                        wave_sizze_46458) *
                              wave_sizze_46458) & (2 * offset_46483 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46472 = ((volatile __local
                                            float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 +
                                                                        offset_46483)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46473 = x_46471 + x_46472;
                                
                                x_46471 = defunc_1_op_res_46473;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] =
                                    x_46471;
                            }
                        }
                        offset_46483 *= 2;
                    }
                    while (slt32(skip_waves_46484,
                                 squot32(sext_i64_i32(segred_group_sizze_41361) +
                                         wave_sizze_46458 - 1,
                                         wave_sizze_46458))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46483 = skip_waves_46484 * wave_sizze_46458;
                        if (slt32(local_tid_46456 + offset_46483,
                                  sext_i64_i32(segred_group_sizze_41361)) &&
                            ((local_tid_46456 - squot32(local_tid_46456,
                                                        wave_sizze_46458) *
                              wave_sizze_46458) == 0 &&
                             (squot32(local_tid_46456, wave_sizze_46458) & (2 *
                                                                            skip_waves_46484 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46472 = ((__local
                                            float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 +
                                                                        offset_46483)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46473 = x_46471 + x_46472;
                                
                                x_46471 = defunc_1_op_res_46473;
                            }
                            // write result of operation
                            {
                                ((__local
                                  float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] =
                                    x_46471;
                            }
                        }
                        skip_waves_46484 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_46456 == 0) {
                            ((__global float *) mem_45235)[gtid_41302] =
                                x_46471;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41361
}
__kernel void mainzisegred_large_41336(__global int *global_failure,
                                       __local volatile
                                       int64_t *sync_arr_mem_46402_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46400_backing_aligned_1,
                                       int64_t N_29165, int64_t i32_res_29175,
                                       int64_t num_groups_41348,
                                       int64_t groups_per_segment_46386,
                                       int64_t elements_per_thread_46387,
                                       int64_t virt_num_groups_46388,
                                       int64_t threads_per_segment_46390,
                                       __global unsigned char *images_mem_44381,
                                       __global unsigned char *mem_45232,
                                       __global
                                       unsigned char *group_res_arr_mem_46391,
                                       __global
                                       unsigned char *mainzicounter_mem_46393)
{
    #define segred_group_sizze_41347 (mainzisegred_group_sizze_41330)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_46402_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_46402_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46400_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46400_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46395;
    int32_t local_tid_46396;
    int64_t group_sizze_46399;
    int32_t wave_sizze_46398;
    int32_t group_tid_46397;
    
    global_tid_46395 = get_global_id(0);
    local_tid_46396 = get_local_id(0);
    group_sizze_46399 = get_local_size(0);
    wave_sizze_46398 = LOCKSTEP_WIDTH;
    group_tid_46397 = get_group_id(0);
    
    int32_t phys_tid_41336;
    
    phys_tid_41336 = global_tid_46395;
    
    __local char *red_arr_mem_46400;
    
    red_arr_mem_46400 = (__local char *) red_arr_mem_46400_backing_0;
    
    __local char *sync_arr_mem_46402;
    
    sync_arr_mem_46402 = (__local char *) sync_arr_mem_46402_backing_1;
    
    int32_t phys_group_id_46404;
    
    phys_group_id_46404 = get_group_id(0);
    for (int32_t i_46405 = 0; i_46405 <
         sdiv_up32(sext_i64_i32(virt_num_groups_46388) - phys_group_id_46404,
                   sext_i64_i32(num_groups_41348)); i_46405++) {
        int32_t virt_group_id_46406 = phys_group_id_46404 + i_46405 *
                sext_i64_i32(num_groups_41348);
        int32_t flat_segment_id_46407 = squot32(virt_group_id_46406,
                                                sext_i64_i32(groups_per_segment_46386));
        int64_t global_tid_46408 = srem64(sext_i32_i64(virt_group_id_46406) *
                                          segred_group_sizze_41347 +
                                          sext_i32_i64(local_tid_46396),
                                          segred_group_sizze_41347 *
                                          groups_per_segment_46386);
        int64_t gtid_41327 = sext_i32_i64(flat_segment_id_46407);
        int64_t gtid_41335;
        int32_t x_acc_46409;
        int64_t chunk_sizze_46410;
        
        chunk_sizze_46410 = smin64(elements_per_thread_46387,
                                   sdiv_up64(i32_res_29175 -
                                             sext_i32_i64(sext_i64_i32(global_tid_46408)),
                                             threads_per_segment_46390));
        
        int32_t x_41351;
        int32_t x_41352;
        
        // neutral-initialise the accumulators
        {
            x_acc_46409 = 0;
        }
        for (int64_t i_46414 = 0; i_46414 < chunk_sizze_46410; i_46414++) {
            gtid_41335 = sext_i32_i64(sext_i64_i32(global_tid_46408)) +
                threads_per_segment_46390 * i_46414;
            // apply map function
            {
                float x_41355 = ((__global
                                  float *) images_mem_44381)[gtid_41327 *
                                                             N_29165 +
                                                             gtid_41335];
                bool isnan_res_41356;
                
                isnan_res_41356 = futrts_isnan32(x_41355);
                
                bool cond_41357 = !isnan_res_41356;
                int32_t defunc_0_f_res_41358 = btoi_bool_i32(cond_41357);
                
                // save map-out results
                { }
                // load accumulator
                {
                    x_41351 = x_acc_46409;
                }
                // load new values
                {
                    x_41352 = defunc_0_f_res_41358;
                }
                // apply reduction operator
                {
                    int32_t defunc_1_op_res_41353 = add32(x_41351, x_41352);
                    
                    // store in accumulator
                    {
                        x_acc_46409 = defunc_1_op_res_41353;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_41351 = x_acc_46409;
            ((__local
              int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] =
                x_41351;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_46415;
        int32_t skip_waves_46416;
        
        skip_waves_46416 = 1;
        
        int32_t x_46411;
        int32_t x_46412;
        
        offset_46415 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_46396,
                      sext_i64_i32(segred_group_sizze_41347))) {
                x_46411 = ((__local
                            int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 +
                                                          offset_46415)];
            }
        }
        offset_46415 = 1;
        while (slt32(offset_46415, wave_sizze_46398)) {
            if (slt32(local_tid_46396 + offset_46415,
                      sext_i64_i32(segred_group_sizze_41347)) &&
                ((local_tid_46396 - squot32(local_tid_46396, wave_sizze_46398) *
                  wave_sizze_46398) & (2 * offset_46415 - 1)) == 0) {
                // read array element
                {
                    x_46412 = ((volatile __local
                                int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 +
                                                              offset_46415)];
                }
                // apply reduction operation
                {
                    int32_t defunc_1_op_res_46413 = add32(x_46411, x_46412);
                    
                    x_46411 = defunc_1_op_res_46413;
                }
                // write result of operation
                {
                    ((volatile __local
                      int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] =
                        x_46411;
                }
            }
            offset_46415 *= 2;
        }
        while (slt32(skip_waves_46416,
                     squot32(sext_i64_i32(segred_group_sizze_41347) +
                             wave_sizze_46398 - 1, wave_sizze_46398))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_46415 = skip_waves_46416 * wave_sizze_46398;
            if (slt32(local_tid_46396 + offset_46415,
                      sext_i64_i32(segred_group_sizze_41347)) &&
                ((local_tid_46396 - squot32(local_tid_46396, wave_sizze_46398) *
                  wave_sizze_46398) == 0 && (squot32(local_tid_46396,
                                                     wave_sizze_46398) & (2 *
                                                                          skip_waves_46416 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_46412 = ((__local
                                int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 +
                                                              offset_46415)];
                }
                // apply reduction operation
                {
                    int32_t defunc_1_op_res_46413 = add32(x_46411, x_46412);
                    
                    x_46411 = defunc_1_op_res_46413;
                }
                // write result of operation
                {
                    ((__local
                      int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] =
                        x_46411;
                }
            }
            skip_waves_46416 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_46396) == (int64_t) 0) {
                x_acc_46409 = x_46411;
            }
        }
        if (groups_per_segment_46386 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_46396 == 0) {
                    ((__global int32_t *) mem_45232)[gtid_41327] = x_acc_46409;
                }
            }
        } else {
            int32_t old_counter_46417;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_46396 == 0) {
                    ((__global
                      int32_t *) group_res_arr_mem_46391)[sext_i32_i64(virt_group_id_46406) *
                                                          segred_group_sizze_41347] =
                        x_acc_46409;
                    mem_fence_global();
                    old_counter_46417 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46393)[sext_i32_i64(srem32(flat_segment_id_46407,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_46402)[(int64_t) 0] =
                        old_counter_46417 == groups_per_segment_46386 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46418;
            
            is_last_group_46418 = ((__local
                                    bool *) sync_arr_mem_46402)[(int64_t) 0];
            if (is_last_group_46418) {
                if (local_tid_46396 == 0) {
                    old_counter_46417 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46393)[sext_i32_i64(srem32(flat_segment_id_46407,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_46386));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46419 =
                            sdiv_up64(groups_per_segment_46386,
                                      segred_group_sizze_41347);
                    
                    x_41351 = 0;
                    for (int64_t i_46420 = 0; i_46420 < read_per_thread_46419;
                         i_46420++) {
                        int64_t group_res_id_46421 =
                                sext_i32_i64(local_tid_46396) *
                                read_per_thread_46419 + i_46420;
                        int64_t index_of_group_res_46422 =
                                sext_i32_i64(flat_segment_id_46407) *
                                groups_per_segment_46386 + group_res_id_46421;
                        
                        if (slt64(group_res_id_46421,
                                  groups_per_segment_46386)) {
                            x_41352 = ((__global
                                        int32_t *) group_res_arr_mem_46391)[index_of_group_res_46422 *
                                                                            segred_group_sizze_41347];
                            
                            int32_t defunc_1_op_res_41353;
                            
                            defunc_1_op_res_41353 = add32(x_41351, x_41352);
                            x_41351 = defunc_1_op_res_41353;
                        }
                    }
                }
                ((__local
                  int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] =
                    x_41351;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46423;
                    int32_t skip_waves_46424;
                    
                    skip_waves_46424 = 1;
                    
                    int32_t x_46411;
                    int32_t x_46412;
                    
                    offset_46423 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_46396,
                                  sext_i64_i32(segred_group_sizze_41347))) {
                            x_46411 = ((__local
                                        int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 +
                                                                      offset_46423)];
                        }
                    }
                    offset_46423 = 1;
                    while (slt32(offset_46423, wave_sizze_46398)) {
                        if (slt32(local_tid_46396 + offset_46423,
                                  sext_i64_i32(segred_group_sizze_41347)) &&
                            ((local_tid_46396 - squot32(local_tid_46396,
                                                        wave_sizze_46398) *
                              wave_sizze_46398) & (2 * offset_46423 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46412 = ((volatile __local
                                            int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 +
                                                                          offset_46423)];
                            }
                            // apply reduction operation
                            {
                                int32_t defunc_1_op_res_46413 = add32(x_46411,
                                                                      x_46412);
                                
                                x_46411 = defunc_1_op_res_46413;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] =
                                    x_46411;
                            }
                        }
                        offset_46423 *= 2;
                    }
                    while (slt32(skip_waves_46424,
                                 squot32(sext_i64_i32(segred_group_sizze_41347) +
                                         wave_sizze_46398 - 1,
                                         wave_sizze_46398))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46423 = skip_waves_46424 * wave_sizze_46398;
                        if (slt32(local_tid_46396 + offset_46423,
                                  sext_i64_i32(segred_group_sizze_41347)) &&
                            ((local_tid_46396 - squot32(local_tid_46396,
                                                        wave_sizze_46398) *
                              wave_sizze_46398) == 0 &&
                             (squot32(local_tid_46396, wave_sizze_46398) & (2 *
                                                                            skip_waves_46424 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46412 = ((__local
                                            int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 +
                                                                          offset_46423)];
                            }
                            // apply reduction operation
                            {
                                int32_t defunc_1_op_res_46413 = add32(x_46411,
                                                                      x_46412);
                                
                                x_46411 = defunc_1_op_res_46413;
                            }
                            // write result of operation
                            {
                                ((__local
                                  int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] =
                                    x_46411;
                            }
                        }
                        skip_waves_46424 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_46396 == 0) {
                            ((__global int32_t *) mem_45232)[gtid_41327] =
                                x_46411;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41347
}
__kernel void mainzisegred_large_41499(__global int *global_failure,
                                       int failure_is_an_option, __global
                                       int64_t *global_failure_args,
                                       __local volatile
                                       int64_t *sync_arr_mem_46571_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46569_backing_aligned_1,
                                       int64_t N_29165, int64_t i32_res_29568,
                                       int64_t num_groups_41521,
                                       int64_t groups_per_segment_46555,
                                       int64_t elements_per_thread_46556,
                                       int64_t virt_num_groups_46557,
                                       int64_t threads_per_segment_46559,
                                       __global
                                       unsigned char *defunc_4_map_res_mem_45178,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_45244,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_45245,
                                       __global unsigned char *mem_45278,
                                       __global
                                       unsigned char *group_res_arr_mem_46560,
                                       __global
                                       unsigned char *mainzicounter_mem_46562)
{
    #define segred_group_sizze_41520 (mainzisegred_group_sizze_41493)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_46571_backing_1 =
                          (__local volatile
                           char *) sync_arr_mem_46571_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46569_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46569_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46564;
    int32_t local_tid_46565;
    int64_t group_sizze_46568;
    int32_t wave_sizze_46567;
    int32_t group_tid_46566;
    
    global_tid_46564 = get_global_id(0);
    local_tid_46565 = get_local_id(0);
    group_sizze_46568 = get_local_size(0);
    wave_sizze_46567 = LOCKSTEP_WIDTH;
    group_tid_46566 = get_group_id(0);
    
    int32_t phys_tid_41499;
    
    phys_tid_41499 = global_tid_46564;
    
    __local char *red_arr_mem_46569;
    
    red_arr_mem_46569 = (__local char *) red_arr_mem_46569_backing_0;
    
    __local char *sync_arr_mem_46571;
    
    sync_arr_mem_46571 = (__local char *) sync_arr_mem_46571_backing_1;
    
    int32_t phys_group_id_46573;
    
    phys_group_id_46573 = get_group_id(0);
    for (int32_t i_46574 = 0; i_46574 <
         sdiv_up32(sext_i64_i32(virt_num_groups_46557) - phys_group_id_46573,
                   sext_i64_i32(num_groups_41521)); i_46574++) {
        int32_t virt_group_id_46575 = phys_group_id_46573 + i_46574 *
                sext_i64_i32(num_groups_41521);
        int32_t flat_segment_id_46576 = squot32(virt_group_id_46575,
                                                sext_i64_i32(groups_per_segment_46555));
        int64_t global_tid_46577 = srem64(sext_i32_i64(virt_group_id_46575) *
                                          segred_group_sizze_41520 +
                                          sext_i32_i64(local_tid_46565),
                                          segred_group_sizze_41520 *
                                          groups_per_segment_46555);
        int64_t gtid_41490 = sext_i32_i64(flat_segment_id_46576);
        int64_t gtid_41498;
        float x_acc_46578;
        int64_t chunk_sizze_46579;
        
        chunk_sizze_46579 = smin64(elements_per_thread_46556,
                                   sdiv_up64(i32_res_29568 -
                                             sext_i32_i64(sext_i64_i32(global_tid_46577)),
                                             threads_per_segment_46559));
        
        float x_41524;
        float x_41525;
        
        // neutral-initialise the accumulators
        {
            x_acc_46578 = 0.0F;
        }
        for (int64_t i_46583 = 0; i_46583 < chunk_sizze_46579; i_46583++) {
            gtid_41498 = sext_i32_i64(sext_i64_i32(global_tid_46577)) +
                threads_per_segment_46559 * i_46583;
            // apply map function
            {
                int32_t x_41529 = ((__global
                                    int32_t *) defunc_3_map_res_mem_45244)[gtid_41490];
                int32_t index_primexp_42390 = sext_i64_i32(gtid_41498);
                bool cond_41531 = slt32(index_primexp_42390, x_41529);
                float defunc_0_f_res_41532;
                
                if (cond_41531) {
                    int32_t x_41528 = ((__global
                                        int32_t *) defunc_3_map_res_mem_45245)[gtid_41490];
                    int32_t x_41533 = add32(x_41528, index_primexp_42390);
                    int32_t x_41534 = sub32(x_41533, x_41529);
                    int32_t i_41535 = add32(1, x_41534);
                    int64_t i_41536 = sext_i32_i64(i_41535);
                    bool x_41537 = sle64((int64_t) 0, i_41536);
                    bool y_41538 = slt64(i_41536, N_29165);
                    bool bounds_check_41539 = x_41537 && y_41538;
                    bool index_certs_41540;
                    
                    if (!bounds_check_41539) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          23) == -1) {
                                global_failure_args[0] = i_41536;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_0;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_41541 = ((__global
                                                         float *) defunc_4_map_res_mem_45178)[gtid_41490 *
                                                                                              N_29165 +
                                                                                              i_41536];
                    
                    defunc_0_f_res_41532 = defunc_0_f_res_t_res_41541;
                } else {
                    defunc_0_f_res_41532 = 0.0F;
                }
                // save map-out results
                { }
                // load accumulator
                {
                    x_41524 = x_acc_46578;
                }
                // load new values
                {
                    x_41525 = defunc_0_f_res_41532;
                }
                // apply reduction operator
                {
                    float defunc_1_op_res_41526 = x_41524 + x_41525;
                    
                    // store in accumulator
                    {
                        x_acc_46578 = defunc_1_op_res_41526;
                    }
                }
            }
        }
        // to reduce current chunk, first store our result in memory
        {
            x_41524 = x_acc_46578;
            ((__local
              float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] =
                x_41524;
        }
        
      error_0:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int32_t offset_46584;
        int32_t skip_waves_46585;
        
        skip_waves_46585 = 1;
        
        float x_46580;
        float x_46581;
        
        offset_46584 = 0;
        // participating threads read initial accumulator
        {
            if (slt32(local_tid_46565,
                      sext_i64_i32(segred_group_sizze_41520))) {
                x_46580 = ((__local
                            float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 +
                                                        offset_46584)];
            }
        }
        offset_46584 = 1;
        while (slt32(offset_46584, wave_sizze_46567)) {
            if (slt32(local_tid_46565 + offset_46584,
                      sext_i64_i32(segred_group_sizze_41520)) &&
                ((local_tid_46565 - squot32(local_tid_46565, wave_sizze_46567) *
                  wave_sizze_46567) & (2 * offset_46584 - 1)) == 0) {
                // read array element
                {
                    x_46581 = ((volatile __local
                                float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 +
                                                            offset_46584)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46582 = x_46580 + x_46581;
                    
                    x_46580 = defunc_1_op_res_46582;
                }
                // write result of operation
                {
                    ((volatile __local
                      float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] =
                        x_46580;
                }
            }
            offset_46584 *= 2;
        }
        while (slt32(skip_waves_46585,
                     squot32(sext_i64_i32(segred_group_sizze_41520) +
                             wave_sizze_46567 - 1, wave_sizze_46567))) {
            barrier(CLK_LOCAL_MEM_FENCE);
            offset_46584 = skip_waves_46585 * wave_sizze_46567;
            if (slt32(local_tid_46565 + offset_46584,
                      sext_i64_i32(segred_group_sizze_41520)) &&
                ((local_tid_46565 - squot32(local_tid_46565, wave_sizze_46567) *
                  wave_sizze_46567) == 0 && (squot32(local_tid_46565,
                                                     wave_sizze_46567) & (2 *
                                                                          skip_waves_46585 -
                                                                          1)) ==
                 0)) {
                // read array element
                {
                    x_46581 = ((__local
                                float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 +
                                                            offset_46584)];
                }
                // apply reduction operation
                {
                    float defunc_1_op_res_46582 = x_46580 + x_46581;
                    
                    x_46580 = defunc_1_op_res_46582;
                }
                // write result of operation
                {
                    ((__local
                      float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] =
                        x_46580;
                }
            }
            skip_waves_46585 *= 2;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // first thread saves the result in accumulator
        {
            if (sext_i32_i64(local_tid_46565) == (int64_t) 0) {
                x_acc_46578 = x_46580;
            }
        }
        if (groups_per_segment_46555 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_46565 == 0) {
                    ((__global float *) mem_45278)[gtid_41490] = x_acc_46578;
                }
            }
        } else {
            int32_t old_counter_46586;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_46565 == 0) {
                    ((__global
                      float *) group_res_arr_mem_46560)[sext_i32_i64(virt_group_id_46575) *
                                                        segred_group_sizze_41520] =
                        x_acc_46578;
                    mem_fence_global();
                    old_counter_46586 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46562)[sext_i32_i64(srem32(flat_segment_id_46576,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_46571)[(int64_t) 0] =
                        old_counter_46586 == groups_per_segment_46555 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46587;
            
            is_last_group_46587 = ((__local
                                    bool *) sync_arr_mem_46571)[(int64_t) 0];
            if (is_last_group_46587) {
                if (local_tid_46565 == 0) {
                    old_counter_46586 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46562)[sext_i32_i64(srem32(flat_segment_id_46576,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_46555));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46588 =
                            sdiv_up64(groups_per_segment_46555,
                                      segred_group_sizze_41520);
                    
                    x_41524 = 0.0F;
                    for (int64_t i_46589 = 0; i_46589 < read_per_thread_46588;
                         i_46589++) {
                        int64_t group_res_id_46590 =
                                sext_i32_i64(local_tid_46565) *
                                read_per_thread_46588 + i_46589;
                        int64_t index_of_group_res_46591 =
                                sext_i32_i64(flat_segment_id_46576) *
                                groups_per_segment_46555 + group_res_id_46590;
                        
                        if (slt64(group_res_id_46590,
                                  groups_per_segment_46555)) {
                            x_41525 = ((__global
                                        float *) group_res_arr_mem_46560)[index_of_group_res_46591 *
                                                                          segred_group_sizze_41520];
                            
                            float defunc_1_op_res_41526;
                            
                            defunc_1_op_res_41526 = x_41524 + x_41525;
                            x_41524 = defunc_1_op_res_41526;
                        }
                    }
                }
                ((__local
                  float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] =
                    x_41524;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46592;
                    int32_t skip_waves_46593;
                    
                    skip_waves_46593 = 1;
                    
                    float x_46580;
                    float x_46581;
                    
                    offset_46592 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_46565,
                                  sext_i64_i32(segred_group_sizze_41520))) {
                            x_46580 = ((__local
                                        float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 +
                                                                    offset_46592)];
                        }
                    }
                    offset_46592 = 1;
                    while (slt32(offset_46592, wave_sizze_46567)) {
                        if (slt32(local_tid_46565 + offset_46592,
                                  sext_i64_i32(segred_group_sizze_41520)) &&
                            ((local_tid_46565 - squot32(local_tid_46565,
                                                        wave_sizze_46567) *
                              wave_sizze_46567) & (2 * offset_46592 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46581 = ((volatile __local
                                            float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 +
                                                                        offset_46592)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46582 = x_46580 + x_46581;
                                
                                x_46580 = defunc_1_op_res_46582;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] =
                                    x_46580;
                            }
                        }
                        offset_46592 *= 2;
                    }
                    while (slt32(skip_waves_46593,
                                 squot32(sext_i64_i32(segred_group_sizze_41520) +
                                         wave_sizze_46567 - 1,
                                         wave_sizze_46567))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46592 = skip_waves_46593 * wave_sizze_46567;
                        if (slt32(local_tid_46565 + offset_46592,
                                  sext_i64_i32(segred_group_sizze_41520)) &&
                            ((local_tid_46565 - squot32(local_tid_46565,
                                                        wave_sizze_46567) *
                              wave_sizze_46567) == 0 &&
                             (squot32(local_tid_46565, wave_sizze_46567) & (2 *
                                                                            skip_waves_46593 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46581 = ((__local
                                            float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 +
                                                                        offset_46592)];
                            }
                            // apply reduction operation
                            {
                                float defunc_1_op_res_46582 = x_46580 + x_46581;
                                
                                x_46580 = defunc_1_op_res_46582;
                            }
                            // write result of operation
                            {
                                ((__local
                                  float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] =
                                    x_46580;
                            }
                        }
                        skip_waves_46593 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_46565 == 0) {
                            ((__global float *) mem_45278)[gtid_41490] =
                                x_46580;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41520
}
__kernel void mainzisegred_large_42060(__global int *global_failure,
                                       __local volatile
                                       int64_t *sync_arr_mem_46752_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46750_backing_aligned_1,
                                       __local volatile
                                       int64_t *red_arr_mem_46748_backing_aligned_2,
                                       __local volatile
                                       int64_t *red_arr_mem_46746_backing_aligned_3,
                                       int64_t iota32_arg_29597,
                                       int64_t num_groups_42247,
                                       int64_t groups_per_segment_46728,
                                       int64_t elements_per_thread_46729,
                                       int64_t virt_num_groups_46730, __global
                                       unsigned char *mem_45284, __global
                                       unsigned char *mem_45296, __global
                                       unsigned char *mem_45298, __global
                                       unsigned char *mem_45302, __global
                                       unsigned char *mem_45305, __global
                                       unsigned char *mem_45307, __global
                                       unsigned char *mem_45309, __global
                                       unsigned char *group_res_arr_mem_46733,
                                       __global
                                       unsigned char *group_res_arr_mem_46735,
                                       __global
                                       unsigned char *group_res_arr_mem_46737,
                                       __global
                                       unsigned char *mainzicounter_mem_46739)
{
    #define segred_group_sizze_42246 (mainzisegred_group_sizze_42054)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict sync_arr_mem_46752_backing_3 =
                          (__local volatile
                           char *) sync_arr_mem_46752_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46750_backing_2 =
                          (__local volatile
                           char *) red_arr_mem_46750_backing_aligned_1;
    __local volatile char *restrict red_arr_mem_46748_backing_1 =
                          (__local volatile
                           char *) red_arr_mem_46748_backing_aligned_2;
    __local volatile char *restrict red_arr_mem_46746_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46746_backing_aligned_3;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46741;
    int32_t local_tid_46742;
    int64_t group_sizze_46745;
    int32_t wave_sizze_46744;
    int32_t group_tid_46743;
    
    global_tid_46741 = get_global_id(0);
    local_tid_46742 = get_local_id(0);
    group_sizze_46745 = get_local_size(0);
    wave_sizze_46744 = LOCKSTEP_WIDTH;
    group_tid_46743 = get_group_id(0);
    
    int32_t phys_tid_42060;
    
    phys_tid_42060 = global_tid_46741;
    
    __local char *red_arr_mem_46746;
    
    red_arr_mem_46746 = (__local char *) red_arr_mem_46746_backing_0;
    
    __local char *red_arr_mem_46748;
    
    red_arr_mem_46748 = (__local char *) red_arr_mem_46748_backing_1;
    
    __local char *red_arr_mem_46750;
    
    red_arr_mem_46750 = (__local char *) red_arr_mem_46750_backing_2;
    
    __local char *sync_arr_mem_46752;
    
    sync_arr_mem_46752 = (__local char *) sync_arr_mem_46752_backing_3;
    
    int32_t phys_group_id_46754;
    
    phys_group_id_46754 = get_group_id(0);
    for (int32_t i_46755 = 0; i_46755 <
         sdiv_up32(sext_i64_i32(virt_num_groups_46730) - phys_group_id_46754,
                   sext_i64_i32(num_groups_42247)); i_46755++) {
        int32_t virt_group_id_46756 = phys_group_id_46754 + i_46755 *
                sext_i64_i32(num_groups_42247);
        int32_t flat_segment_id_46757 = squot32(virt_group_id_46756,
                                                sext_i64_i32(groups_per_segment_46728));
        int64_t global_tid_46758 = srem64(sext_i32_i64(virt_group_id_46756) *
                                          segred_group_sizze_42246 +
                                          sext_i32_i64(local_tid_46742),
                                          segred_group_sizze_42246 *
                                          groups_per_segment_46728);
        int64_t gtid_42051 = sext_i32_i64(flat_segment_id_46757);
        int64_t gtid_42059;
        bool x_acc_46759;
        int32_t x_acc_46760;
        float x_acc_46761;
        int64_t chunk_sizze_46762;
        int64_t starting_point_46763;
        
        starting_point_46763 = sext_i32_i64(sext_i64_i32(global_tid_46758)) *
            elements_per_thread_46729;
        
        int64_t remaining_elements_46764;
        
        remaining_elements_46764 = iota32_arg_29597 - starting_point_46763;
        if (sle64(remaining_elements_46764, (int64_t) 0) ||
            sle64(iota32_arg_29597, starting_point_46763)) {
            chunk_sizze_46762 = (int64_t) 0;
        } else {
            if (slt64(iota32_arg_29597,
                      (sext_i32_i64(sext_i64_i32(global_tid_46758)) +
                       (int64_t) 1) * elements_per_thread_46729)) {
                chunk_sizze_46762 = iota32_arg_29597 -
                    sext_i32_i64(sext_i64_i32(global_tid_46758)) *
                    elements_per_thread_46729;
            } else {
                chunk_sizze_46762 = elements_per_thread_46729;
            }
        }
        
        bool x_42252;
        int32_t x_42253;
        float x_42254;
        bool x_42255;
        int32_t x_42256;
        float x_42257;
        
        // neutral-initialise the accumulators
        {
            x_acc_46759 = 0;
            x_acc_46760 = -1;
            x_acc_46761 = 0.0F;
        }
        for (int64_t i_46779 = 0; i_46779 < elements_per_thread_46729;
             i_46779++) {
            gtid_42059 = sext_i32_i64(local_tid_46742) +
                (sext_i32_i64(squot32(sext_i64_i32(global_tid_46758),
                                      sext_i64_i32(segred_group_sizze_42246))) *
                 elements_per_thread_46729 + i_46779) *
                segred_group_sizze_42246;
            if (slt64(gtid_42059, iota32_arg_29597)) {
                // apply map function
                {
                    int32_t y_42266 = ((__global
                                        int32_t *) mem_45298)[gtid_42051];
                    float y_42267 = ((__global float *) mem_45296)[gtid_42051];
                    float x_42271 = ((__global float *) mem_45302)[gtid_42051 *
                                                                   iota32_arg_29597 +
                                                                   gtid_42059];
                    float x_42272 = ((__global float *) mem_45284)[gtid_42059];
                    int32_t index_primexp_42404 = sext_i64_i32(gtid_42059);
                    float defunc_0_f_res_42275 = x_42271 / y_42267;
                    bool cond_42276 = slt32(index_primexp_42404, y_42266);
                    bool isnan_res_42277;
                    
                    isnan_res_42277 = futrts_isnan32(defunc_0_f_res_42275);
                    
                    bool cond_t_res_42278 = !isnan_res_42277;
                    bool x_42279 = cond_42276 && cond_t_res_42278;
                    float abs_res_42280 = (float) fabs(defunc_0_f_res_42275);
                    bool defunc_2_f_res_t_res_42281 = x_42272 < abs_res_42280;
                    bool x_42282 = x_42279 && defunc_2_f_res_t_res_42281;
                    float defunc_1_f_res_42283;
                    
                    if (cond_42276) {
                        defunc_1_f_res_42283 = defunc_0_f_res_42275;
                    } else {
                        defunc_1_f_res_42283 = 0.0F;
                    }
                    // save map-out results
                    { }
                    // load accumulator
                    {
                        x_42252 = x_acc_46759;
                        x_42253 = x_acc_46760;
                        x_42254 = x_acc_46761;
                    }
                    // load new values
                    {
                        x_42255 = x_42282;
                        x_42256 = index_primexp_42404;
                        x_42257 = defunc_1_f_res_42283;
                    }
                    // apply reduction operator
                    {
                        bool defunc_1_op_res_42258;
                        int32_t defunc_1_op_res_42259;
                        
                        if (x_42252) {
                            defunc_1_op_res_42258 = x_42252;
                            defunc_1_op_res_42259 = x_42253;
                        } else {
                            bool x_42260 = x_42255 && x_42255;
                            bool x_42261 = !x_42255;
                            bool y_42262 = x_42252 && x_42261;
                            bool defunc_1_op_res_f_res_42263 = x_42260 ||
                                 y_42262;
                            int32_t defunc_1_op_res_f_res_42264;
                            
                            if (x_42255) {
                                defunc_1_op_res_f_res_42264 = x_42256;
                            } else {
                                defunc_1_op_res_f_res_42264 = x_42253;
                            }
                            defunc_1_op_res_42258 = defunc_1_op_res_f_res_42263;
                            defunc_1_op_res_42259 = defunc_1_op_res_f_res_42264;
                        }
                        
                        float defunc_1_op_res_42265 = x_42254 + x_42257;
                        
                        // store in accumulator
                        {
                            x_acc_46759 = defunc_1_op_res_42258;
                            x_acc_46760 = defunc_1_op_res_42259;
                            x_acc_46761 = defunc_1_op_res_42265;
                        }
                    }
                }
            }
            // to reduce current chunk, first store our result in memory
            {
                x_42252 = x_acc_46759;
                x_42253 = x_acc_46760;
                x_42254 = x_acc_46761;
                ((__local
                  bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] =
                    x_42252;
                ((__local
                  int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] =
                    x_42253;
                ((__local
                  float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] =
                    x_42254;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            int32_t offset_46780;
            int32_t skip_waves_46781;
            
            skip_waves_46781 = 1;
            
            bool x_46765;
            int32_t x_46766;
            float x_46767;
            bool x_46768;
            int32_t x_46769;
            float x_46770;
            
            offset_46780 = 0;
            // participating threads read initial accumulator
            {
                if (slt32(local_tid_46742,
                          sext_i64_i32(segred_group_sizze_42246))) {
                    x_46765 = ((__local
                                bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 +
                                                           offset_46780)];
                    x_46766 = ((__local
                                int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 +
                                                              offset_46780)];
                    x_46767 = ((__local
                                float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 +
                                                            offset_46780)];
                }
            }
            offset_46780 = 1;
            while (slt32(offset_46780, wave_sizze_46744)) {
                if (slt32(local_tid_46742 + offset_46780,
                          sext_i64_i32(segred_group_sizze_42246)) &&
                    ((local_tid_46742 - squot32(local_tid_46742,
                                                wave_sizze_46744) *
                      wave_sizze_46744) & (2 * offset_46780 - 1)) == 0) {
                    // read array element
                    {
                        x_46768 = ((volatile __local
                                    bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 +
                                                               offset_46780)];
                        x_46769 = ((volatile __local
                                    int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 +
                                                                  offset_46780)];
                        x_46770 = ((volatile __local
                                    float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 +
                                                                offset_46780)];
                    }
                    // apply reduction operation
                    {
                        bool defunc_1_op_res_46771;
                        int32_t defunc_1_op_res_46772;
                        
                        if (x_46765) {
                            defunc_1_op_res_46771 = x_46765;
                            defunc_1_op_res_46772 = x_46766;
                        } else {
                            bool x_46773 = x_46768 && x_46768;
                            bool x_46774 = !x_46768;
                            bool y_46775 = x_46765 && x_46774;
                            bool defunc_1_op_res_f_res_46776 = x_46773 ||
                                 y_46775;
                            int32_t defunc_1_op_res_f_res_46777;
                            
                            if (x_46768) {
                                defunc_1_op_res_f_res_46777 = x_46769;
                            } else {
                                defunc_1_op_res_f_res_46777 = x_46766;
                            }
                            defunc_1_op_res_46771 = defunc_1_op_res_f_res_46776;
                            defunc_1_op_res_46772 = defunc_1_op_res_f_res_46777;
                        }
                        
                        float defunc_1_op_res_46778 = x_46767 + x_46770;
                        
                        x_46765 = defunc_1_op_res_46771;
                        x_46766 = defunc_1_op_res_46772;
                        x_46767 = defunc_1_op_res_46778;
                    }
                    // write result of operation
                    {
                        ((volatile __local
                          bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] =
                            x_46765;
                        ((volatile __local
                          int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] =
                            x_46766;
                        ((volatile __local
                          float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] =
                            x_46767;
                    }
                }
                offset_46780 *= 2;
            }
            while (slt32(skip_waves_46781,
                         squot32(sext_i64_i32(segred_group_sizze_42246) +
                                 wave_sizze_46744 - 1, wave_sizze_46744))) {
                barrier(CLK_LOCAL_MEM_FENCE);
                offset_46780 = skip_waves_46781 * wave_sizze_46744;
                if (slt32(local_tid_46742 + offset_46780,
                          sext_i64_i32(segred_group_sizze_42246)) &&
                    ((local_tid_46742 - squot32(local_tid_46742,
                                                wave_sizze_46744) *
                      wave_sizze_46744) == 0 && (squot32(local_tid_46742,
                                                         wave_sizze_46744) &
                                                 (2 * skip_waves_46781 - 1)) ==
                     0)) {
                    // read array element
                    {
                        x_46768 = ((__local
                                    bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 +
                                                               offset_46780)];
                        x_46769 = ((__local
                                    int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 +
                                                                  offset_46780)];
                        x_46770 = ((__local
                                    float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 +
                                                                offset_46780)];
                    }
                    // apply reduction operation
                    {
                        bool defunc_1_op_res_46771;
                        int32_t defunc_1_op_res_46772;
                        
                        if (x_46765) {
                            defunc_1_op_res_46771 = x_46765;
                            defunc_1_op_res_46772 = x_46766;
                        } else {
                            bool x_46773 = x_46768 && x_46768;
                            bool x_46774 = !x_46768;
                            bool y_46775 = x_46765 && x_46774;
                            bool defunc_1_op_res_f_res_46776 = x_46773 ||
                                 y_46775;
                            int32_t defunc_1_op_res_f_res_46777;
                            
                            if (x_46768) {
                                defunc_1_op_res_f_res_46777 = x_46769;
                            } else {
                                defunc_1_op_res_f_res_46777 = x_46766;
                            }
                            defunc_1_op_res_46771 = defunc_1_op_res_f_res_46776;
                            defunc_1_op_res_46772 = defunc_1_op_res_f_res_46777;
                        }
                        
                        float defunc_1_op_res_46778 = x_46767 + x_46770;
                        
                        x_46765 = defunc_1_op_res_46771;
                        x_46766 = defunc_1_op_res_46772;
                        x_46767 = defunc_1_op_res_46778;
                    }
                    // write result of operation
                    {
                        ((__local
                          bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] =
                            x_46765;
                        ((__local
                          int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] =
                            x_46766;
                        ((__local
                          float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] =
                            x_46767;
                    }
                }
                skip_waves_46781 *= 2;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // first thread saves the result in accumulator
            {
                if (sext_i32_i64(local_tid_46742) == (int64_t) 0) {
                    x_acc_46759 = x_46765;
                    x_acc_46760 = x_46766;
                    x_acc_46761 = x_46767;
                }
            }
            // first thread keeps accumulator; others reset to neutral element
            {
                if (!(sext_i32_i64(local_tid_46742) == (int64_t) 0)) {
                    x_acc_46759 = 0;
                    x_acc_46760 = -1;
                    x_acc_46761 = 0.0F;
                }
            }
        }
        x_42252 = x_acc_46759;
        x_42253 = x_acc_46760;
        x_42254 = x_acc_46761;
        if (groups_per_segment_46728 == (int64_t) 1) {
            // first thread in group saves final result to memory
            {
                if (local_tid_46742 == 0) {
                    ((__global bool *) mem_45305)[gtid_42051] = x_acc_46759;
                    ((__global int32_t *) mem_45307)[gtid_42051] = x_acc_46760;
                    ((__global float *) mem_45309)[gtid_42051] = x_acc_46761;
                }
            }
        } else {
            int32_t old_counter_46782;
            
            // first thread in group saves group result to global memory
            {
                if (local_tid_46742 == 0) {
                    ((__global
                      bool *) group_res_arr_mem_46733)[sext_i32_i64(virt_group_id_46756) *
                                                       segred_group_sizze_42246] =
                        x_acc_46759;
                    ((__global
                      int32_t *) group_res_arr_mem_46735)[sext_i32_i64(virt_group_id_46756) *
                                                          segred_group_sizze_42246] =
                        x_acc_46760;
                    ((__global
                      float *) group_res_arr_mem_46737)[sext_i32_i64(virt_group_id_46756) *
                                                        segred_group_sizze_42246] =
                        x_acc_46761;
                    mem_fence_global();
                    old_counter_46782 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46739)[sext_i32_i64(srem32(flat_segment_id_46757,
                                                                                                     10240))],
                                              (int) 1);
                    ((__local bool *) sync_arr_mem_46752)[(int64_t) 0] =
                        old_counter_46782 == groups_per_segment_46728 -
                        (int64_t) 1;
                }
            }
            barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
            
            bool is_last_group_46783;
            
            is_last_group_46783 = ((__local
                                    bool *) sync_arr_mem_46752)[(int64_t) 0];
            if (is_last_group_46783) {
                if (local_tid_46742 == 0) {
                    old_counter_46782 =
                        atomic_add_i32_global(&((volatile __global
                                                 int *) mainzicounter_mem_46739)[sext_i32_i64(srem32(flat_segment_id_46757,
                                                                                                     10240))],
                                              (int) ((int64_t) 0 -
                                                     groups_per_segment_46728));
                }
                // read in the per-group-results
                {
                    int64_t read_per_thread_46784 =
                            sdiv_up64(groups_per_segment_46728,
                                      segred_group_sizze_42246);
                    
                    x_42252 = 0;
                    x_42253 = -1;
                    x_42254 = 0.0F;
                    for (int64_t i_46785 = 0; i_46785 < read_per_thread_46784;
                         i_46785++) {
                        int64_t group_res_id_46786 =
                                sext_i32_i64(local_tid_46742) *
                                read_per_thread_46784 + i_46785;
                        int64_t index_of_group_res_46787 =
                                sext_i32_i64(flat_segment_id_46757) *
                                groups_per_segment_46728 + group_res_id_46786;
                        
                        if (slt64(group_res_id_46786,
                                  groups_per_segment_46728)) {
                            x_42255 = ((__global
                                        bool *) group_res_arr_mem_46733)[index_of_group_res_46787 *
                                                                         segred_group_sizze_42246];
                            x_42256 = ((__global
                                        int32_t *) group_res_arr_mem_46735)[index_of_group_res_46787 *
                                                                            segred_group_sizze_42246];
                            x_42257 = ((__global
                                        float *) group_res_arr_mem_46737)[index_of_group_res_46787 *
                                                                          segred_group_sizze_42246];
                            
                            bool defunc_1_op_res_42258;
                            int32_t defunc_1_op_res_42259;
                            
                            if (x_42252) {
                                defunc_1_op_res_42258 = x_42252;
                                defunc_1_op_res_42259 = x_42253;
                            } else {
                                bool x_42260 = x_42255 && x_42255;
                                bool x_42261 = !x_42255;
                                bool y_42262 = x_42252 && x_42261;
                                bool defunc_1_op_res_f_res_42263 = x_42260 ||
                                     y_42262;
                                int32_t defunc_1_op_res_f_res_42264;
                                
                                if (x_42255) {
                                    defunc_1_op_res_f_res_42264 = x_42256;
                                } else {
                                    defunc_1_op_res_f_res_42264 = x_42253;
                                }
                                defunc_1_op_res_42258 =
                                    defunc_1_op_res_f_res_42263;
                                defunc_1_op_res_42259 =
                                    defunc_1_op_res_f_res_42264;
                            }
                            
                            float defunc_1_op_res_42265 = x_42254 + x_42257;
                            
                            x_42252 = defunc_1_op_res_42258;
                            x_42253 = defunc_1_op_res_42259;
                            x_42254 = defunc_1_op_res_42265;
                        }
                    }
                }
                ((__local
                  bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] =
                    x_42252;
                ((__local
                  int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] =
                    x_42253;
                ((__local
                  float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] =
                    x_42254;
                barrier(CLK_LOCAL_MEM_FENCE);
                // reduce the per-group results
                {
                    int32_t offset_46788;
                    int32_t skip_waves_46789;
                    
                    skip_waves_46789 = 1;
                    
                    bool x_46765;
                    int32_t x_46766;
                    float x_46767;
                    bool x_46768;
                    int32_t x_46769;
                    float x_46770;
                    
                    offset_46788 = 0;
                    // participating threads read initial accumulator
                    {
                        if (slt32(local_tid_46742,
                                  sext_i64_i32(segred_group_sizze_42246))) {
                            x_46765 = ((__local
                                        bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 +
                                                                   offset_46788)];
                            x_46766 = ((__local
                                        int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 +
                                                                      offset_46788)];
                            x_46767 = ((__local
                                        float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 +
                                                                    offset_46788)];
                        }
                    }
                    offset_46788 = 1;
                    while (slt32(offset_46788, wave_sizze_46744)) {
                        if (slt32(local_tid_46742 + offset_46788,
                                  sext_i64_i32(segred_group_sizze_42246)) &&
                            ((local_tid_46742 - squot32(local_tid_46742,
                                                        wave_sizze_46744) *
                              wave_sizze_46744) & (2 * offset_46788 - 1)) ==
                            0) {
                            // read array element
                            {
                                x_46768 = ((volatile __local
                                            bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 +
                                                                       offset_46788)];
                                x_46769 = ((volatile __local
                                            int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 +
                                                                          offset_46788)];
                                x_46770 = ((volatile __local
                                            float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 +
                                                                        offset_46788)];
                            }
                            // apply reduction operation
                            {
                                bool defunc_1_op_res_46771;
                                int32_t defunc_1_op_res_46772;
                                
                                if (x_46765) {
                                    defunc_1_op_res_46771 = x_46765;
                                    defunc_1_op_res_46772 = x_46766;
                                } else {
                                    bool x_46773 = x_46768 && x_46768;
                                    bool x_46774 = !x_46768;
                                    bool y_46775 = x_46765 && x_46774;
                                    bool defunc_1_op_res_f_res_46776 =
                                         x_46773 || y_46775;
                                    int32_t defunc_1_op_res_f_res_46777;
                                    
                                    if (x_46768) {
                                        defunc_1_op_res_f_res_46777 = x_46769;
                                    } else {
                                        defunc_1_op_res_f_res_46777 = x_46766;
                                    }
                                    defunc_1_op_res_46771 =
                                        defunc_1_op_res_f_res_46776;
                                    defunc_1_op_res_46772 =
                                        defunc_1_op_res_f_res_46777;
                                }
                                
                                float defunc_1_op_res_46778 = x_46767 + x_46770;
                                
                                x_46765 = defunc_1_op_res_46771;
                                x_46766 = defunc_1_op_res_46772;
                                x_46767 = defunc_1_op_res_46778;
                            }
                            // write result of operation
                            {
                                ((volatile __local
                                  bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] =
                                    x_46765;
                                ((volatile __local
                                  int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] =
                                    x_46766;
                                ((volatile __local
                                  float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] =
                                    x_46767;
                            }
                        }
                        offset_46788 *= 2;
                    }
                    while (slt32(skip_waves_46789,
                                 squot32(sext_i64_i32(segred_group_sizze_42246) +
                                         wave_sizze_46744 - 1,
                                         wave_sizze_46744))) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                        offset_46788 = skip_waves_46789 * wave_sizze_46744;
                        if (slt32(local_tid_46742 + offset_46788,
                                  sext_i64_i32(segred_group_sizze_42246)) &&
                            ((local_tid_46742 - squot32(local_tid_46742,
                                                        wave_sizze_46744) *
                              wave_sizze_46744) == 0 &&
                             (squot32(local_tid_46742, wave_sizze_46744) & (2 *
                                                                            skip_waves_46789 -
                                                                            1)) ==
                             0)) {
                            // read array element
                            {
                                x_46768 = ((__local
                                            bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 +
                                                                       offset_46788)];
                                x_46769 = ((__local
                                            int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 +
                                                                          offset_46788)];
                                x_46770 = ((__local
                                            float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 +
                                                                        offset_46788)];
                            }
                            // apply reduction operation
                            {
                                bool defunc_1_op_res_46771;
                                int32_t defunc_1_op_res_46772;
                                
                                if (x_46765) {
                                    defunc_1_op_res_46771 = x_46765;
                                    defunc_1_op_res_46772 = x_46766;
                                } else {
                                    bool x_46773 = x_46768 && x_46768;
                                    bool x_46774 = !x_46768;
                                    bool y_46775 = x_46765 && x_46774;
                                    bool defunc_1_op_res_f_res_46776 =
                                         x_46773 || y_46775;
                                    int32_t defunc_1_op_res_f_res_46777;
                                    
                                    if (x_46768) {
                                        defunc_1_op_res_f_res_46777 = x_46769;
                                    } else {
                                        defunc_1_op_res_f_res_46777 = x_46766;
                                    }
                                    defunc_1_op_res_46771 =
                                        defunc_1_op_res_f_res_46776;
                                    defunc_1_op_res_46772 =
                                        defunc_1_op_res_f_res_46777;
                                }
                                
                                float defunc_1_op_res_46778 = x_46767 + x_46770;
                                
                                x_46765 = defunc_1_op_res_46771;
                                x_46766 = defunc_1_op_res_46772;
                                x_46767 = defunc_1_op_res_46778;
                            }
                            // write result of operation
                            {
                                ((__local
                                  bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] =
                                    x_46765;
                                ((__local
                                  int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] =
                                    x_46766;
                                ((__local
                                  float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] =
                                    x_46767;
                            }
                        }
                        skip_waves_46789 *= 2;
                    }
                    // and back to memory with the final result
                    {
                        if (local_tid_46742 == 0) {
                            ((__global bool *) mem_45305)[gtid_42051] = x_46765;
                            ((__global int32_t *) mem_45307)[gtid_42051] =
                                x_46766;
                            ((__global float *) mem_45309)[gtid_42051] =
                                x_46767;
                        }
                    }
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_42246
}
__kernel void mainzisegred_nonseg_41409(__global int *global_failure,
                                        __local volatile
                                        int64_t *red_arr_mem_46505_backing_aligned_0,
                                        __local volatile
                                        int64_t *sync_arr_mem_46503_backing_aligned_1,
                                        int64_t m_29166,
                                        int64_t num_groups_41404,
                                        int64_t num_threads_46497, __global
                                        unsigned char *defunc_3_map_res_mem_45244,
                                        __global unsigned char *mem_45249,
                                        __global
                                        unsigned char *mainzicounter_mem_46493,
                                        __global
                                        unsigned char *group_res_arr_mem_46495)
{
    #define segred_group_sizze_41402 (mainzisegred_group_sizze_41401)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46505_backing_1 =
                          (__local volatile
                           char *) red_arr_mem_46505_backing_aligned_0;
    __local volatile char *restrict sync_arr_mem_46503_backing_0 =
                          (__local volatile
                           char *) sync_arr_mem_46503_backing_aligned_1;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46498;
    int32_t local_tid_46499;
    int64_t group_sizze_46502;
    int32_t wave_sizze_46501;
    int32_t group_tid_46500;
    
    global_tid_46498 = get_global_id(0);
    local_tid_46499 = get_local_id(0);
    group_sizze_46502 = get_local_size(0);
    wave_sizze_46501 = LOCKSTEP_WIDTH;
    group_tid_46500 = get_group_id(0);
    
    int32_t phys_tid_41409;
    
    phys_tid_41409 = global_tid_46498;
    
    __local char *sync_arr_mem_46503;
    
    sync_arr_mem_46503 = (__local char *) sync_arr_mem_46503_backing_0;
    
    __local char *red_arr_mem_46505;
    
    red_arr_mem_46505 = (__local char *) red_arr_mem_46505_backing_1;
    
    int64_t dummy_41407;
    
    dummy_41407 = (int64_t) 0;
    
    int64_t gtid_41408;
    
    gtid_41408 = (int64_t) 0;
    
    int32_t x_acc_46507;
    int64_t chunk_sizze_46508;
    
    chunk_sizze_46508 = smin64(sdiv_up64(m_29166,
                                         sext_i32_i64(sext_i64_i32(segred_group_sizze_41402 *
                                         num_groups_41404))),
                               sdiv_up64(m_29166 - sext_i32_i64(phys_tid_41409),
                                         num_threads_46497));
    
    int32_t x_29564;
    int32_t x_29565;
    
    // neutral-initialise the accumulators
    {
        x_acc_46507 = 0;
    }
    for (int64_t i_46512 = 0; i_46512 < chunk_sizze_46508; i_46512++) {
        gtid_41408 = sext_i32_i64(phys_tid_41409) + num_threads_46497 * i_46512;
        // apply map function
        {
            int32_t x_29567 = ((__global
                                int32_t *) defunc_3_map_res_mem_45244)[gtid_41408];
            
            // save map-out results
            { }
            // load accumulator
            {
                x_29564 = x_acc_46507;
            }
            // load new values
            {
                x_29565 = x_29567;
            }
            // apply reduction operator
            {
                int32_t defunc_1_op_res_29566 = smax32(x_29564, x_29565);
                
                // store in accumulator
                {
                    x_acc_46507 = defunc_1_op_res_29566;
                }
            }
        }
    }
    // to reduce current chunk, first store our result in memory
    {
        x_29564 = x_acc_46507;
        ((__local int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] =
            x_29564;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t offset_46513;
    int32_t skip_waves_46514;
    
    skip_waves_46514 = 1;
    
    int32_t x_46509;
    int32_t x_46510;
    
    offset_46513 = 0;
    // participating threads read initial accumulator
    {
        if (slt32(local_tid_46499, sext_i64_i32(segred_group_sizze_41402))) {
            x_46509 = ((__local
                        int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 +
                                                      offset_46513)];
        }
    }
    offset_46513 = 1;
    while (slt32(offset_46513, wave_sizze_46501)) {
        if (slt32(local_tid_46499 + offset_46513,
                  sext_i64_i32(segred_group_sizze_41402)) && ((local_tid_46499 -
                                                               squot32(local_tid_46499,
                                                                       wave_sizze_46501) *
                                                               wave_sizze_46501) &
                                                              (2 *
                                                               offset_46513 -
                                                               1)) == 0) {
            // read array element
            {
                x_46510 = ((volatile __local
                            int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 +
                                                          offset_46513)];
            }
            // apply reduction operation
            {
                int32_t defunc_1_op_res_46511 = smax32(x_46509, x_46510);
                
                x_46509 = defunc_1_op_res_46511;
            }
            // write result of operation
            {
                ((volatile __local
                  int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] =
                    x_46509;
            }
        }
        offset_46513 *= 2;
    }
    while (slt32(skip_waves_46514,
                 squot32(sext_i64_i32(segred_group_sizze_41402) +
                         wave_sizze_46501 - 1, wave_sizze_46501))) {
        barrier(CLK_LOCAL_MEM_FENCE);
        offset_46513 = skip_waves_46514 * wave_sizze_46501;
        if (slt32(local_tid_46499 + offset_46513,
                  sext_i64_i32(segred_group_sizze_41402)) && ((local_tid_46499 -
                                                               squot32(local_tid_46499,
                                                                       wave_sizze_46501) *
                                                               wave_sizze_46501) ==
                                                              0 &&
                                                              (squot32(local_tid_46499,
                                                                       wave_sizze_46501) &
                                                               (2 *
                                                                skip_waves_46514 -
                                                                1)) == 0)) {
            // read array element
            {
                x_46510 = ((__local
                            int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 +
                                                          offset_46513)];
            }
            // apply reduction operation
            {
                int32_t defunc_1_op_res_46511 = smax32(x_46509, x_46510);
                
                x_46509 = defunc_1_op_res_46511;
            }
            // write result of operation
            {
                ((__local
                  int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] =
                    x_46509;
            }
        }
        skip_waves_46514 *= 2;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // first thread saves the result in accumulator
    {
        if (sext_i32_i64(local_tid_46499) == (int64_t) 0) {
            x_acc_46507 = x_46509;
        }
    }
    
    int32_t old_counter_46515;
    
    // first thread in group saves group result to global memory
    {
        if (local_tid_46499 == 0) {
            ((__global
              int32_t *) group_res_arr_mem_46495)[sext_i32_i64(group_tid_46500) *
                                                  segred_group_sizze_41402] =
                x_acc_46507;
            mem_fence_global();
            old_counter_46515 = atomic_add_i32_global(&((volatile __global
                                                         int *) mainzicounter_mem_46493)[(int64_t) 0],
                                                      (int) 1);
            ((__local bool *) sync_arr_mem_46503)[(int64_t) 0] =
                old_counter_46515 == num_groups_41404 - (int64_t) 1;
        }
    }
    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    
    bool is_last_group_46516;
    
    is_last_group_46516 = ((__local bool *) sync_arr_mem_46503)[(int64_t) 0];
    if (is_last_group_46516) {
        if (local_tid_46499 == 0) {
            old_counter_46515 = atomic_add_i32_global(&((volatile __global
                                                         int *) mainzicounter_mem_46493)[(int64_t) 0],
                                                      (int) ((int64_t) 0 -
                                                             num_groups_41404));
        }
        // read in the per-group-results
        {
            int64_t read_per_thread_46517 = sdiv_up64(num_groups_41404,
                                                      segred_group_sizze_41402);
            
            x_29564 = 0;
            for (int64_t i_46518 = 0; i_46518 < read_per_thread_46517;
                 i_46518++) {
                int64_t group_res_id_46519 = sext_i32_i64(local_tid_46499) *
                        read_per_thread_46517 + i_46518;
                int64_t index_of_group_res_46520 = group_res_id_46519;
                
                if (slt64(group_res_id_46519, num_groups_41404)) {
                    x_29565 = ((__global
                                int32_t *) group_res_arr_mem_46495)[index_of_group_res_46520 *
                                                                    segred_group_sizze_41402];
                    
                    int32_t defunc_1_op_res_29566;
                    
                    defunc_1_op_res_29566 = smax32(x_29564, x_29565);
                    x_29564 = defunc_1_op_res_29566;
                }
            }
        }
        ((__local int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] =
            x_29564;
        barrier(CLK_LOCAL_MEM_FENCE);
        // reduce the per-group results
        {
            int32_t offset_46521;
            int32_t skip_waves_46522;
            
            skip_waves_46522 = 1;
            
            int32_t x_46509;
            int32_t x_46510;
            
            offset_46521 = 0;
            // participating threads read initial accumulator
            {
                if (slt32(local_tid_46499,
                          sext_i64_i32(segred_group_sizze_41402))) {
                    x_46509 = ((__local
                                int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 +
                                                              offset_46521)];
                }
            }
            offset_46521 = 1;
            while (slt32(offset_46521, wave_sizze_46501)) {
                if (slt32(local_tid_46499 + offset_46521,
                          sext_i64_i32(segred_group_sizze_41402)) &&
                    ((local_tid_46499 - squot32(local_tid_46499,
                                                wave_sizze_46501) *
                      wave_sizze_46501) & (2 * offset_46521 - 1)) == 0) {
                    // read array element
                    {
                        x_46510 = ((volatile __local
                                    int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 +
                                                                  offset_46521)];
                    }
                    // apply reduction operation
                    {
                        int32_t defunc_1_op_res_46511 = smax32(x_46509,
                                                               x_46510);
                        
                        x_46509 = defunc_1_op_res_46511;
                    }
                    // write result of operation
                    {
                        ((volatile __local
                          int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] =
                            x_46509;
                    }
                }
                offset_46521 *= 2;
            }
            while (slt32(skip_waves_46522,
                         squot32(sext_i64_i32(segred_group_sizze_41402) +
                                 wave_sizze_46501 - 1, wave_sizze_46501))) {
                barrier(CLK_LOCAL_MEM_FENCE);
                offset_46521 = skip_waves_46522 * wave_sizze_46501;
                if (slt32(local_tid_46499 + offset_46521,
                          sext_i64_i32(segred_group_sizze_41402)) &&
                    ((local_tid_46499 - squot32(local_tid_46499,
                                                wave_sizze_46501) *
                      wave_sizze_46501) == 0 && (squot32(local_tid_46499,
                                                         wave_sizze_46501) &
                                                 (2 * skip_waves_46522 - 1)) ==
                     0)) {
                    // read array element
                    {
                        x_46510 = ((__local
                                    int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 +
                                                                  offset_46521)];
                    }
                    // apply reduction operation
                    {
                        int32_t defunc_1_op_res_46511 = smax32(x_46509,
                                                               x_46510);
                        
                        x_46509 = defunc_1_op_res_46511;
                    }
                    // write result of operation
                    {
                        ((__local
                          int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] =
                            x_46509;
                    }
                }
                skip_waves_46522 *= 2;
            }
            // and back to memory with the final result
            {
                if (local_tid_46499 == 0) {
                    ((__global int32_t *) mem_45249)[(int64_t) 0] = x_46509;
                }
            }
        }
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41402
}
__kernel void mainzisegred_small_39115(__global int *global_failure,
                                       __local volatile
                                       int64_t *red_arr_mem_45763_backing_aligned_0,
                                       int64_t N_29165, int64_t m_29166,
                                       int64_t i32_res_29175,
                                       int64_t i32_res_29181,
                                       int64_t num_groups_39254,
                                       int64_t segment_sizze_nonzzero_45756,
                                       __global unsigned char *images_mem_44381,
                                       __global
                                       unsigned char *binop_p_mem_44390,
                                       __global unsigned char *mem_44531,
                                       __global unsigned char *mem_44536)
{
    #define segred_group_sizze_39253 (mainzisegred_group_sizze_39109)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_45763_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_45763_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45758;
    int32_t local_tid_45759;
    int64_t group_sizze_45762;
    int32_t wave_sizze_45761;
    int32_t group_tid_45760;
    
    global_tid_45758 = get_global_id(0);
    local_tid_45759 = get_local_id(0);
    group_sizze_45762 = get_local_size(0);
    wave_sizze_45761 = LOCKSTEP_WIDTH;
    group_tid_45760 = get_group_id(0);
    
    int32_t phys_tid_39115;
    
    phys_tid_39115 = global_tid_45758;
    
    __local char *red_arr_mem_45763;
    
    red_arr_mem_45763 = (__local char *) red_arr_mem_45763_backing_0;
    
    int32_t phys_group_id_45765;
    
    phys_group_id_45765 = get_group_id(0);
    for (int32_t i_45766 = 0; i_45766 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181 *
                                          i32_res_29181,
                                          squot64(segred_group_sizze_39253,
                                                  segment_sizze_nonzzero_45756))) -
                   phys_group_id_45765, sext_i64_i32(num_groups_39254));
         i_45766++) {
        int32_t virt_group_id_45767 = phys_group_id_45765 + i_45766 *
                sext_i64_i32(num_groups_39254);
        int64_t gtid_39102 = squot64(squot64(sext_i32_i64(local_tid_45759),
                                             segment_sizze_nonzzero_45756) +
                                     sext_i32_i64(virt_group_id_45767) *
                                     squot64(segred_group_sizze_39253,
                                             segment_sizze_nonzzero_45756),
                                     i32_res_29181 * i32_res_29181);
        int64_t gtid_39103 = squot64(squot64(sext_i32_i64(local_tid_45759),
                                             segment_sizze_nonzzero_45756) +
                                     sext_i32_i64(virt_group_id_45767) *
                                     squot64(segred_group_sizze_39253,
                                             segment_sizze_nonzzero_45756) -
                                     squot64(squot64(sext_i32_i64(local_tid_45759),
                                                     segment_sizze_nonzzero_45756) +
                                             sext_i32_i64(virt_group_id_45767) *
                                             squot64(segred_group_sizze_39253,
                                                     segment_sizze_nonzzero_45756),
                                             i32_res_29181 * i32_res_29181) *
                                     (i32_res_29181 * i32_res_29181),
                                     i32_res_29181);
        int64_t gtid_39104 = squot64(sext_i32_i64(local_tid_45759),
                                     segment_sizze_nonzzero_45756) +
                sext_i32_i64(virt_group_id_45767) *
                squot64(segred_group_sizze_39253,
                        segment_sizze_nonzzero_45756) -
                squot64(squot64(sext_i32_i64(local_tid_45759),
                                segment_sizze_nonzzero_45756) +
                        sext_i32_i64(virt_group_id_45767) *
                        squot64(segred_group_sizze_39253,
                                segment_sizze_nonzzero_45756), i32_res_29181 *
                        i32_res_29181) * (i32_res_29181 * i32_res_29181) -
                squot64(squot64(sext_i32_i64(local_tid_45759),
                                segment_sizze_nonzzero_45756) +
                        sext_i32_i64(virt_group_id_45767) *
                        squot64(segred_group_sizze_39253,
                                segment_sizze_nonzzero_45756) -
                        squot64(squot64(sext_i32_i64(local_tid_45759),
                                        segment_sizze_nonzzero_45756) +
                                sext_i32_i64(virt_group_id_45767) *
                                squot64(segred_group_sizze_39253,
                                        segment_sizze_nonzzero_45756),
                                i32_res_29181 * i32_res_29181) *
                        (i32_res_29181 * i32_res_29181), i32_res_29181) *
                i32_res_29181;
        int64_t gtid_39114 = srem64(sext_i32_i64(local_tid_45759),
                                    i32_res_29175);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29175) && (((slt64(gtid_39102,
                                                              m_29166) &&
                                                        slt64(gtid_39103,
                                                              i32_res_29181)) &&
                                                       slt64(gtid_39104,
                                                             i32_res_29181)) &&
                                                      slt64(sext_i32_i64(local_tid_45759),
                                                            i32_res_29175 *
                                                            squot64(segred_group_sizze_39253,
                                                                    segment_sizze_nonzzero_45756)))) {
                float x_39263 = ((__global
                                  float *) images_mem_44381)[gtid_39102 *
                                                             N_29165 +
                                                             gtid_39114];
                float x_39264 = ((__global
                                  float *) binop_p_mem_44390)[gtid_39103 *
                                                              N_29165 +
                                                              gtid_39114];
                float x_39265 = ((__global float *) mem_44531)[gtid_39104 *
                                                               N_29165 +
                                                               gtid_39114];
                float x_39266 = x_39264 * x_39265;
                bool isnan_res_39267;
                
                isnan_res_39267 = futrts_isnan32(x_39263);
                
                float y_39268;
                
                if (isnan_res_39267) {
                    y_39268 = 0.0F;
                } else {
                    y_39268 = 1.0F;
                }
                
                float defunc_2_f_res_39269 = x_39266 * y_39268;
                
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] =
                        defunc_2_f_res_39269;
                }
            } else {
                ((__local
                  float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] =
                    0.0F;
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29175)) {
            // perform segmented scan to imitate reduction
            {
                float x_39257;
                float x_39258;
                float x_45768;
                float x_45769;
                bool ltid_in_bounds_45771;
                
                ltid_in_bounds_45771 = slt64(sext_i32_i64(local_tid_45759),
                                             i32_res_29175 *
                                             squot64(segred_group_sizze_39253,
                                                     segment_sizze_nonzzero_45756));
                
                int32_t skip_threads_45772;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_45771) {
                        x_39258 = ((volatile __local
                                    float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)];
                        if ((local_tid_45759 - squot32(local_tid_45759, 32) *
                             32) == 0) {
                            x_39257 = x_39258;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_45772 = 1;
                    while (slt32(skip_threads_45772, 32)) {
                        if (sle32(skip_threads_45772, local_tid_45759 -
                                  squot32(local_tid_45759, 32) * 32) &&
                            ltid_in_bounds_45771) {
                            // read operands
                            {
                                x_39257 = ((volatile __local
                                            float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759) -
                                                                        sext_i32_i64(skip_threads_45772)];
                            }
                            // perform operation
                            {
                                bool inactive_45773 =
                                     slt64(srem64(sext_i32_i64(local_tid_45759),
                                                  i32_res_29175),
                                           sext_i32_i64(local_tid_45759) -
                                           sext_i32_i64(local_tid_45759 -
                                           skip_threads_45772));
                                
                                if (inactive_45773) {
                                    x_39257 = x_39258;
                                }
                                if (!inactive_45773) {
                                    float defunc_1_op_res_39259 = x_39257 +
                                          x_39258;
                                    
                                    x_39257 = defunc_1_op_res_39259;
                                }
                            }
                        }
                        if (sle32(wave_sizze_45761, skip_threads_45772)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_45772, local_tid_45759 -
                                  squot32(local_tid_45759, 32) * 32) &&
                            ltid_in_bounds_45771) {
                            // write result
                            {
                                ((volatile __local
                                  float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] =
                                    x_39257;
                                x_39258 = x_39257;
                            }
                        }
                        if (sle32(wave_sizze_45761, skip_threads_45772)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_45772 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_45759 - squot32(local_tid_45759, 32) * 32) ==
                        31 && ltid_in_bounds_45771) {
                        ((volatile __local
                          float *) red_arr_mem_45763)[sext_i32_i64(squot32(local_tid_45759,
                                                                           32))] =
                            x_39257;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_45774;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_45759, 32) == 0 &&
                            ltid_in_bounds_45771) {
                            x_45769 = ((volatile __local
                                        float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)];
                            if ((local_tid_45759 - squot32(local_tid_45759,
                                                           32) * 32) == 0) {
                                x_45768 = x_45769;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_45774 = 1;
                        while (slt32(skip_threads_45774, 32)) {
                            if (sle32(skip_threads_45774, local_tid_45759 -
                                      squot32(local_tid_45759, 32) * 32) &&
                                (squot32(local_tid_45759, 32) == 0 &&
                                 ltid_in_bounds_45771)) {
                                // read operands
                                {
                                    x_45768 = ((volatile __local
                                                float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759) -
                                                                            sext_i32_i64(skip_threads_45774)];
                                }
                                // perform operation
                                {
                                    bool inactive_45775 =
                                         slt64(srem64(sext_i32_i64(local_tid_45759 *
                                                      32 + 32 - 1),
                                                      i32_res_29175),
                                               sext_i32_i64(local_tid_45759 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_45759 -
                                                             skip_threads_45774) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_45775) {
                                        x_45768 = x_45769;
                                    }
                                    if (!inactive_45775) {
                                        float defunc_1_op_res_45770 = x_45768 +
                                              x_45769;
                                        
                                        x_45768 = defunc_1_op_res_45770;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_45761, skip_threads_45774)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_45774, local_tid_45759 -
                                      squot32(local_tid_45759, 32) * 32) &&
                                (squot32(local_tid_45759, 32) == 0 &&
                                 ltid_in_bounds_45771)) {
                                // write result
                                {
                                    ((volatile __local
                                      float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] =
                                        x_45768;
                                    x_45769 = x_45768;
                                }
                            }
                            if (sle32(wave_sizze_45761, skip_threads_45774)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_45774 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_45759, 32) == 0 ||
                          !ltid_in_bounds_45771)) {
                        // read operands
                        {
                            x_39258 = x_39257;
                            x_39257 = ((__local
                                        float *) red_arr_mem_45763)[sext_i32_i64(squot32(local_tid_45759,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_45776 =
                                 slt64(srem64(sext_i32_i64(local_tid_45759),
                                              i32_res_29175),
                                       sext_i32_i64(local_tid_45759) -
                                       sext_i32_i64(squot32(local_tid_45759,
                                                            32) * 32 - 1));
                            
                            if (inactive_45776) {
                                x_39257 = x_39258;
                            }
                            if (!inactive_45776) {
                                float defunc_1_op_res_39259 = x_39257 + x_39258;
                                
                                x_39257 = defunc_1_op_res_39259;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] =
                                x_39257;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_45759, 32) == 0) {
                        ((__local
                          float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] =
                            x_39258;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_45767) *
                      squot64(segred_group_sizze_39253,
                              segment_sizze_nonzzero_45756) +
                      sext_i32_i64(local_tid_45759), m_29166 * i32_res_29181 *
                      i32_res_29181) && slt64(sext_i32_i64(local_tid_45759),
                                              squot64(segred_group_sizze_39253,
                                                      segment_sizze_nonzzero_45756))) {
                ((__global
                  float *) mem_44536)[squot64(sext_i32_i64(virt_group_id_45767) *
                                              squot64(segred_group_sizze_39253,
                                                      segment_sizze_nonzzero_45756) +
                                              sext_i32_i64(local_tid_45759),
                                              i32_res_29181 * i32_res_29181) *
                                      (i32_res_29181 * i32_res_29181) +
                                      squot64(sext_i32_i64(virt_group_id_45767) *
                                              squot64(segred_group_sizze_39253,
                                                      segment_sizze_nonzzero_45756) +
                                              sext_i32_i64(local_tid_45759) -
                                              squot64(sext_i32_i64(virt_group_id_45767) *
                                                      squot64(segred_group_sizze_39253,
                                                              segment_sizze_nonzzero_45756) +
                                                      sext_i32_i64(local_tid_45759),
                                                      i32_res_29181 *
                                                      i32_res_29181) *
                                              (i32_res_29181 * i32_res_29181),
                                              i32_res_29181) * i32_res_29181 +
                                      (sext_i32_i64(virt_group_id_45767) *
                                       squot64(segred_group_sizze_39253,
                                               segment_sizze_nonzzero_45756) +
                                       sext_i32_i64(local_tid_45759) -
                                       squot64(sext_i32_i64(virt_group_id_45767) *
                                               squot64(segred_group_sizze_39253,
                                                       segment_sizze_nonzzero_45756) +
                                               sext_i32_i64(local_tid_45759),
                                               i32_res_29181 * i32_res_29181) *
                                       (i32_res_29181 * i32_res_29181) -
                                       squot64(sext_i32_i64(virt_group_id_45767) *
                                               squot64(segred_group_sizze_39253,
                                                       segment_sizze_nonzzero_45756) +
                                               sext_i32_i64(local_tid_45759) -
                                               squot64(sext_i32_i64(virt_group_id_45767) *
                                                       squot64(segred_group_sizze_39253,
                                                               segment_sizze_nonzzero_45756) +
                                                       sext_i32_i64(local_tid_45759),
                                                       i32_res_29181 *
                                                       i32_res_29181) *
                                               (i32_res_29181 * i32_res_29181),
                                               i32_res_29181) *
                                       i32_res_29181)] = ((__local
                                                           float *) red_arr_mem_45763)[(sext_i32_i64(local_tid_45759) +
                                                                                        (int64_t) 1) *
                                                                                       segment_sizze_nonzzero_45756 -
                                                                                       (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_39253
}
__kernel void mainzisegred_small_40466(__global int *global_failure,
                                       __local volatile
                                       int64_t *red_arr_mem_45960_backing_aligned_0,
                                       int64_t N_29165, int64_t m_29166,
                                       int64_t i32_res_29175,
                                       int64_t i32_res_29181,
                                       int64_t num_groups_40519,
                                       int64_t segment_sizze_nonzzero_45953,
                                       __global unsigned char *images_mem_44381,
                                       __global
                                       unsigned char *binop_p_mem_44390,
                                       __global unsigned char *mem_44844)
{
    #define segred_group_sizze_40518 (mainzisegred_group_sizze_40460)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_45960_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_45960_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45955;
    int32_t local_tid_45956;
    int64_t group_sizze_45959;
    int32_t wave_sizze_45958;
    int32_t group_tid_45957;
    
    global_tid_45955 = get_global_id(0);
    local_tid_45956 = get_local_id(0);
    group_sizze_45959 = get_local_size(0);
    wave_sizze_45958 = LOCKSTEP_WIDTH;
    group_tid_45957 = get_group_id(0);
    
    int32_t phys_tid_40466;
    
    phys_tid_40466 = global_tid_45955;
    
    __local char *red_arr_mem_45960;
    
    red_arr_mem_45960 = (__local char *) red_arr_mem_45960_backing_0;
    
    int32_t phys_group_id_45962;
    
    phys_group_id_45962 = get_group_id(0);
    for (int32_t i_45963 = 0; i_45963 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181,
                                          squot64(segred_group_sizze_40518,
                                                  segment_sizze_nonzzero_45953))) -
                   phys_group_id_45962, sext_i64_i32(num_groups_40519));
         i_45963++) {
        int32_t virt_group_id_45964 = phys_group_id_45962 + i_45963 *
                sext_i64_i32(num_groups_40519);
        int64_t gtid_40455 = squot64(squot64(sext_i32_i64(local_tid_45956),
                                             segment_sizze_nonzzero_45953) +
                                     sext_i32_i64(virt_group_id_45964) *
                                     squot64(segred_group_sizze_40518,
                                             segment_sizze_nonzzero_45953),
                                     i32_res_29181);
        int64_t gtid_40456 = squot64(sext_i32_i64(local_tid_45956),
                                     segment_sizze_nonzzero_45953) +
                sext_i32_i64(virt_group_id_45964) *
                squot64(segred_group_sizze_40518,
                        segment_sizze_nonzzero_45953) -
                squot64(squot64(sext_i32_i64(local_tid_45956),
                                segment_sizze_nonzzero_45953) +
                        sext_i32_i64(virt_group_id_45964) *
                        squot64(segred_group_sizze_40518,
                                segment_sizze_nonzzero_45953), i32_res_29181) *
                i32_res_29181;
        int64_t gtid_40465 = srem64(sext_i32_i64(local_tid_45956),
                                    i32_res_29175);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29175) && ((slt64(gtid_40455,
                                                             m_29166) &&
                                                       slt64(gtid_40456,
                                                             i32_res_29181)) &&
                                                      slt64(sext_i32_i64(local_tid_45956),
                                                            i32_res_29175 *
                                                            squot64(segred_group_sizze_40518,
                                                                    segment_sizze_nonzzero_45953)))) {
                float x_40528 = ((__global
                                  float *) images_mem_44381)[gtid_40455 *
                                                             N_29165 +
                                                             gtid_40465];
                bool isnan_res_40529;
                
                isnan_res_40529 = futrts_isnan32(x_40528);
                
                float defunc_1_f_res_40530;
                
                if (isnan_res_40529) {
                    defunc_1_f_res_40530 = 0.0F;
                } else {
                    float x_40527 = ((__global
                                      float *) binop_p_mem_44390)[gtid_40456 *
                                                                  N_29165 +
                                                                  gtid_40465];
                    float defunc_1_f_res_f_res_40531 = x_40527 * x_40528;
                    
                    defunc_1_f_res_40530 = defunc_1_f_res_f_res_40531;
                }
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] =
                        defunc_1_f_res_40530;
                }
            } else {
                ((__local
                  float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] =
                    0.0F;
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29175)) {
            // perform segmented scan to imitate reduction
            {
                float x_40522;
                float x_40523;
                float x_45965;
                float x_45966;
                bool ltid_in_bounds_45968;
                
                ltid_in_bounds_45968 = slt64(sext_i32_i64(local_tid_45956),
                                             i32_res_29175 *
                                             squot64(segred_group_sizze_40518,
                                                     segment_sizze_nonzzero_45953));
                
                int32_t skip_threads_45969;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_45968) {
                        x_40523 = ((volatile __local
                                    float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)];
                        if ((local_tid_45956 - squot32(local_tid_45956, 32) *
                             32) == 0) {
                            x_40522 = x_40523;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_45969 = 1;
                    while (slt32(skip_threads_45969, 32)) {
                        if (sle32(skip_threads_45969, local_tid_45956 -
                                  squot32(local_tid_45956, 32) * 32) &&
                            ltid_in_bounds_45968) {
                            // read operands
                            {
                                x_40522 = ((volatile __local
                                            float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956) -
                                                                        sext_i32_i64(skip_threads_45969)];
                            }
                            // perform operation
                            {
                                bool inactive_45970 =
                                     slt64(srem64(sext_i32_i64(local_tid_45956),
                                                  i32_res_29175),
                                           sext_i32_i64(local_tid_45956) -
                                           sext_i32_i64(local_tid_45956 -
                                           skip_threads_45969));
                                
                                if (inactive_45970) {
                                    x_40522 = x_40523;
                                }
                                if (!inactive_45970) {
                                    float defunc_1_op_res_40524 = x_40522 +
                                          x_40523;
                                    
                                    x_40522 = defunc_1_op_res_40524;
                                }
                            }
                        }
                        if (sle32(wave_sizze_45958, skip_threads_45969)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_45969, local_tid_45956 -
                                  squot32(local_tid_45956, 32) * 32) &&
                            ltid_in_bounds_45968) {
                            // write result
                            {
                                ((volatile __local
                                  float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] =
                                    x_40522;
                                x_40523 = x_40522;
                            }
                        }
                        if (sle32(wave_sizze_45958, skip_threads_45969)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_45969 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_45956 - squot32(local_tid_45956, 32) * 32) ==
                        31 && ltid_in_bounds_45968) {
                        ((volatile __local
                          float *) red_arr_mem_45960)[sext_i32_i64(squot32(local_tid_45956,
                                                                           32))] =
                            x_40522;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_45971;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_45956, 32) == 0 &&
                            ltid_in_bounds_45968) {
                            x_45966 = ((volatile __local
                                        float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)];
                            if ((local_tid_45956 - squot32(local_tid_45956,
                                                           32) * 32) == 0) {
                                x_45965 = x_45966;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_45971 = 1;
                        while (slt32(skip_threads_45971, 32)) {
                            if (sle32(skip_threads_45971, local_tid_45956 -
                                      squot32(local_tid_45956, 32) * 32) &&
                                (squot32(local_tid_45956, 32) == 0 &&
                                 ltid_in_bounds_45968)) {
                                // read operands
                                {
                                    x_45965 = ((volatile __local
                                                float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956) -
                                                                            sext_i32_i64(skip_threads_45971)];
                                }
                                // perform operation
                                {
                                    bool inactive_45972 =
                                         slt64(srem64(sext_i32_i64(local_tid_45956 *
                                                      32 + 32 - 1),
                                                      i32_res_29175),
                                               sext_i32_i64(local_tid_45956 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_45956 -
                                                             skip_threads_45971) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_45972) {
                                        x_45965 = x_45966;
                                    }
                                    if (!inactive_45972) {
                                        float defunc_1_op_res_45967 = x_45965 +
                                              x_45966;
                                        
                                        x_45965 = defunc_1_op_res_45967;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_45958, skip_threads_45971)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_45971, local_tid_45956 -
                                      squot32(local_tid_45956, 32) * 32) &&
                                (squot32(local_tid_45956, 32) == 0 &&
                                 ltid_in_bounds_45968)) {
                                // write result
                                {
                                    ((volatile __local
                                      float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] =
                                        x_45965;
                                    x_45966 = x_45965;
                                }
                            }
                            if (sle32(wave_sizze_45958, skip_threads_45971)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_45971 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_45956, 32) == 0 ||
                          !ltid_in_bounds_45968)) {
                        // read operands
                        {
                            x_40523 = x_40522;
                            x_40522 = ((__local
                                        float *) red_arr_mem_45960)[sext_i32_i64(squot32(local_tid_45956,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_45973 =
                                 slt64(srem64(sext_i32_i64(local_tid_45956),
                                              i32_res_29175),
                                       sext_i32_i64(local_tid_45956) -
                                       sext_i32_i64(squot32(local_tid_45956,
                                                            32) * 32 - 1));
                            
                            if (inactive_45973) {
                                x_40522 = x_40523;
                            }
                            if (!inactive_45973) {
                                float defunc_1_op_res_40524 = x_40522 + x_40523;
                                
                                x_40522 = defunc_1_op_res_40524;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] =
                                x_40522;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_45956, 32) == 0) {
                        ((__local
                          float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] =
                            x_40523;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_45964) *
                      squot64(segred_group_sizze_40518,
                              segment_sizze_nonzzero_45953) +
                      sext_i32_i64(local_tid_45956), m_29166 * i32_res_29181) &&
                slt64(sext_i32_i64(local_tid_45956),
                      squot64(segred_group_sizze_40518,
                              segment_sizze_nonzzero_45953))) {
                ((__global
                  float *) mem_44844)[squot64(sext_i32_i64(virt_group_id_45964) *
                                              squot64(segred_group_sizze_40518,
                                                      segment_sizze_nonzzero_45953) +
                                              sext_i32_i64(local_tid_45956),
                                              i32_res_29181) * i32_res_29181 +
                                      (sext_i32_i64(virt_group_id_45964) *
                                       squot64(segred_group_sizze_40518,
                                               segment_sizze_nonzzero_45953) +
                                       sext_i32_i64(local_tid_45956) -
                                       squot64(sext_i32_i64(virt_group_id_45964) *
                                               squot64(segred_group_sizze_40518,
                                                       segment_sizze_nonzzero_45953) +
                                               sext_i32_i64(local_tid_45956),
                                               i32_res_29181) *
                                       i32_res_29181)] = ((__local
                                                           float *) red_arr_mem_45960)[(sext_i32_i64(local_tid_45956) +
                                                                                        (int64_t) 1) *
                                                                                       segment_sizze_nonzzero_45953 -
                                                                                       (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_40518
}
__kernel void mainzisegred_small_40603(__global int *global_failure,
                                       __local volatile
                                       int64_t *red_arr_mem_46048_backing_aligned_0,
                                       int64_t m_29166, int64_t i32_res_29181,
                                       int64_t num_groups_40652,
                                       int64_t segment_sizze_nonzzero_46041,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_44629,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_44850,
                                       __global unsigned char *mem_44910)
{
    #define segred_group_sizze_40651 (mainzisegred_group_sizze_40597)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46048_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46048_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46043;
    int32_t local_tid_46044;
    int64_t group_sizze_46047;
    int32_t wave_sizze_46046;
    int32_t group_tid_46045;
    
    global_tid_46043 = get_global_id(0);
    local_tid_46044 = get_local_id(0);
    group_sizze_46047 = get_local_size(0);
    wave_sizze_46046 = LOCKSTEP_WIDTH;
    group_tid_46045 = get_group_id(0);
    
    int32_t phys_tid_40603;
    
    phys_tid_40603 = global_tid_46043;
    
    __local char *red_arr_mem_46048;
    
    red_arr_mem_46048 = (__local char *) red_arr_mem_46048_backing_0;
    
    int32_t phys_group_id_46050;
    
    phys_group_id_46050 = get_group_id(0);
    for (int32_t i_46051 = 0; i_46051 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181,
                                          squot64(segred_group_sizze_40651,
                                                  segment_sizze_nonzzero_46041))) -
                   phys_group_id_46050, sext_i64_i32(num_groups_40652));
         i_46051++) {
        int32_t virt_group_id_46052 = phys_group_id_46050 + i_46051 *
                sext_i64_i32(num_groups_40652);
        int64_t gtid_40592 = squot64(squot64(sext_i32_i64(local_tid_46044),
                                             segment_sizze_nonzzero_46041) +
                                     sext_i32_i64(virt_group_id_46052) *
                                     squot64(segred_group_sizze_40651,
                                             segment_sizze_nonzzero_46041),
                                     i32_res_29181);
        int64_t gtid_40593 = squot64(sext_i32_i64(local_tid_46044),
                                     segment_sizze_nonzzero_46041) +
                sext_i32_i64(virt_group_id_46052) *
                squot64(segred_group_sizze_40651,
                        segment_sizze_nonzzero_46041) -
                squot64(squot64(sext_i32_i64(local_tid_46044),
                                segment_sizze_nonzzero_46041) +
                        sext_i32_i64(virt_group_id_46052) *
                        squot64(segred_group_sizze_40651,
                                segment_sizze_nonzzero_46041), i32_res_29181) *
                i32_res_29181;
        int64_t gtid_40602 = srem64(sext_i32_i64(local_tid_46044),
                                    i32_res_29181);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29181) && ((slt64(gtid_40592,
                                                             m_29166) &&
                                                       slt64(gtid_40593,
                                                             i32_res_29181)) &&
                                                      slt64(sext_i32_i64(local_tid_46044),
                                                            i32_res_29181 *
                                                            squot64(segred_group_sizze_40651,
                                                                    segment_sizze_nonzzero_46041)))) {
                float x_40661 = ((__global
                                  float *) defunc_3_map_res_mem_44850)[gtid_40592 *
                                                                       i32_res_29181 +
                                                                       gtid_40602];
                float x_40662 = ((__global
                                  float *) defunc_3_map_res_mem_44629)[gtid_40592 *
                                                                       (i32_res_29181 *
                                                                        i32_res_29181) +
                                                                       gtid_40593 *
                                                                       i32_res_29181 +
                                                                       gtid_40602];
                float defunc_1_f_res_40663 = x_40661 * x_40662;
                
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] =
                        defunc_1_f_res_40663;
                }
            } else {
                ((__local
                  float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] =
                    0.0F;
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29181)) {
            // perform segmented scan to imitate reduction
            {
                float x_40655;
                float x_40656;
                float x_46053;
                float x_46054;
                bool ltid_in_bounds_46056;
                
                ltid_in_bounds_46056 = slt64(sext_i32_i64(local_tid_46044),
                                             i32_res_29181 *
                                             squot64(segred_group_sizze_40651,
                                                     segment_sizze_nonzzero_46041));
                
                int32_t skip_threads_46057;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_46056) {
                        x_40656 = ((volatile __local
                                    float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)];
                        if ((local_tid_46044 - squot32(local_tid_46044, 32) *
                             32) == 0) {
                            x_40655 = x_40656;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46057 = 1;
                    while (slt32(skip_threads_46057, 32)) {
                        if (sle32(skip_threads_46057, local_tid_46044 -
                                  squot32(local_tid_46044, 32) * 32) &&
                            ltid_in_bounds_46056) {
                            // read operands
                            {
                                x_40655 = ((volatile __local
                                            float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044) -
                                                                        sext_i32_i64(skip_threads_46057)];
                            }
                            // perform operation
                            {
                                bool inactive_46058 =
                                     slt64(srem64(sext_i32_i64(local_tid_46044),
                                                  i32_res_29181),
                                           sext_i32_i64(local_tid_46044) -
                                           sext_i32_i64(local_tid_46044 -
                                           skip_threads_46057));
                                
                                if (inactive_46058) {
                                    x_40655 = x_40656;
                                }
                                if (!inactive_46058) {
                                    float defunc_1_op_res_40657 = x_40655 +
                                          x_40656;
                                    
                                    x_40655 = defunc_1_op_res_40657;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46046, skip_threads_46057)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46057, local_tid_46044 -
                                  squot32(local_tid_46044, 32) * 32) &&
                            ltid_in_bounds_46056) {
                            // write result
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] =
                                    x_40655;
                                x_40656 = x_40655;
                            }
                        }
                        if (sle32(wave_sizze_46046, skip_threads_46057)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46057 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_46044 - squot32(local_tid_46044, 32) * 32) ==
                        31 && ltid_in_bounds_46056) {
                        ((volatile __local
                          float *) red_arr_mem_46048)[sext_i32_i64(squot32(local_tid_46044,
                                                                           32))] =
                            x_40655;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_46059;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_46044, 32) == 0 &&
                            ltid_in_bounds_46056) {
                            x_46054 = ((volatile __local
                                        float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)];
                            if ((local_tid_46044 - squot32(local_tid_46044,
                                                           32) * 32) == 0) {
                                x_46053 = x_46054;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_46059 = 1;
                        while (slt32(skip_threads_46059, 32)) {
                            if (sle32(skip_threads_46059, local_tid_46044 -
                                      squot32(local_tid_46044, 32) * 32) &&
                                (squot32(local_tid_46044, 32) == 0 &&
                                 ltid_in_bounds_46056)) {
                                // read operands
                                {
                                    x_46053 = ((volatile __local
                                                float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044) -
                                                                            sext_i32_i64(skip_threads_46059)];
                                }
                                // perform operation
                                {
                                    bool inactive_46060 =
                                         slt64(srem64(sext_i32_i64(local_tid_46044 *
                                                      32 + 32 - 1),
                                                      i32_res_29181),
                                               sext_i32_i64(local_tid_46044 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_46044 -
                                                             skip_threads_46059) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_46060) {
                                        x_46053 = x_46054;
                                    }
                                    if (!inactive_46060) {
                                        float defunc_1_op_res_46055 = x_46053 +
                                              x_46054;
                                        
                                        x_46053 = defunc_1_op_res_46055;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_46046, skip_threads_46059)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_46059, local_tid_46044 -
                                      squot32(local_tid_46044, 32) * 32) &&
                                (squot32(local_tid_46044, 32) == 0 &&
                                 ltid_in_bounds_46056)) {
                                // write result
                                {
                                    ((volatile __local
                                      float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] =
                                        x_46053;
                                    x_46054 = x_46053;
                                }
                            }
                            if (sle32(wave_sizze_46046, skip_threads_46059)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_46059 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_46044, 32) == 0 ||
                          !ltid_in_bounds_46056)) {
                        // read operands
                        {
                            x_40656 = x_40655;
                            x_40655 = ((__local
                                        float *) red_arr_mem_46048)[sext_i32_i64(squot32(local_tid_46044,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_46061 =
                                 slt64(srem64(sext_i32_i64(local_tid_46044),
                                              i32_res_29181),
                                       sext_i32_i64(local_tid_46044) -
                                       sext_i32_i64(squot32(local_tid_46044,
                                                            32) * 32 - 1));
                            
                            if (inactive_46061) {
                                x_40655 = x_40656;
                            }
                            if (!inactive_46061) {
                                float defunc_1_op_res_40657 = x_40655 + x_40656;
                                
                                x_40655 = defunc_1_op_res_40657;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] =
                                x_40655;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_46044, 32) == 0) {
                        ((__local
                          float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] =
                            x_40656;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_46052) *
                      squot64(segred_group_sizze_40651,
                              segment_sizze_nonzzero_46041) +
                      sext_i32_i64(local_tid_46044), m_29166 * i32_res_29181) &&
                slt64(sext_i32_i64(local_tid_46044),
                      squot64(segred_group_sizze_40651,
                              segment_sizze_nonzzero_46041))) {
                ((__global
                  float *) mem_44910)[squot64(sext_i32_i64(virt_group_id_46052) *
                                              squot64(segred_group_sizze_40651,
                                                      segment_sizze_nonzzero_46041) +
                                              sext_i32_i64(local_tid_46044),
                                              i32_res_29181) * i32_res_29181 +
                                      (sext_i32_i64(virt_group_id_46052) *
                                       squot64(segred_group_sizze_40651,
                                               segment_sizze_nonzzero_46041) +
                                       sext_i32_i64(local_tid_46044) -
                                       squot64(sext_i32_i64(virt_group_id_46052) *
                                               squot64(segred_group_sizze_40651,
                                                       segment_sizze_nonzzero_46041) +
                                               sext_i32_i64(local_tid_46044),
                                               i32_res_29181) *
                                       i32_res_29181)] = ((__local
                                                           float *) red_arr_mem_46048)[(sext_i32_i64(local_tid_46044) +
                                                                                        (int64_t) 1) *
                                                                                       segment_sizze_nonzzero_46041 -
                                                                                       (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_40651
}
__kernel void mainzisegred_small_40733(__global int *global_failure,
                                       __local volatile
                                       int64_t *red_arr_mem_46180_backing_aligned_0,
                                       int64_t N_29165, int64_t m_29166,
                                       int64_t i32_res_29181,
                                       int64_t num_groups_40780,
                                       int64_t segment_sizze_nonzzero_46173,
                                       __global unsigned char *mem_44397,
                                       __global
                                       unsigned char *defunc_4_map_res_mem_44916,
                                       __global unsigned char *mem_45134)
{
    #define segred_group_sizze_40779 (mainzisegred_group_sizze_40727)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46180_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46180_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46175;
    int32_t local_tid_46176;
    int64_t group_sizze_46179;
    int32_t wave_sizze_46178;
    int32_t group_tid_46177;
    
    global_tid_46175 = get_global_id(0);
    local_tid_46176 = get_local_id(0);
    group_sizze_46179 = get_local_size(0);
    wave_sizze_46178 = LOCKSTEP_WIDTH;
    group_tid_46177 = get_group_id(0);
    
    int32_t phys_tid_40733;
    
    phys_tid_40733 = global_tid_46175;
    
    __local char *red_arr_mem_46180;
    
    red_arr_mem_46180 = (__local char *) red_arr_mem_46180_backing_0;
    
    int32_t phys_group_id_46182;
    
    phys_group_id_46182 = get_group_id(0);
    for (int32_t i_46183 = 0; i_46183 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * N_29165,
                                          squot64(segred_group_sizze_40779,
                                                  segment_sizze_nonzzero_46173))) -
                   phys_group_id_46182, sext_i64_i32(num_groups_40780));
         i_46183++) {
        int32_t virt_group_id_46184 = phys_group_id_46182 + i_46183 *
                sext_i64_i32(num_groups_40780);
        int64_t gtid_40722 = squot64(squot64(sext_i32_i64(local_tid_46176),
                                             segment_sizze_nonzzero_46173) +
                                     sext_i32_i64(virt_group_id_46184) *
                                     squot64(segred_group_sizze_40779,
                                             segment_sizze_nonzzero_46173),
                                     N_29165);
        int64_t gtid_40723 = squot64(sext_i32_i64(local_tid_46176),
                                     segment_sizze_nonzzero_46173) +
                sext_i32_i64(virt_group_id_46184) *
                squot64(segred_group_sizze_40779,
                        segment_sizze_nonzzero_46173) -
                squot64(squot64(sext_i32_i64(local_tid_46176),
                                segment_sizze_nonzzero_46173) +
                        sext_i32_i64(virt_group_id_46184) *
                        squot64(segred_group_sizze_40779,
                                segment_sizze_nonzzero_46173), N_29165) *
                N_29165;
        int64_t gtid_40732 = srem64(sext_i32_i64(local_tid_46176),
                                    i32_res_29181);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29181) && ((slt64(gtid_40722,
                                                             m_29166) &&
                                                       slt64(gtid_40723,
                                                             N_29165)) &&
                                                      slt64(sext_i32_i64(local_tid_46176),
                                                            i32_res_29181 *
                                                            squot64(segred_group_sizze_40779,
                                                                    segment_sizze_nonzzero_46173)))) {
                float x_40788 = ((__global
                                  float *) defunc_4_map_res_mem_44916)[gtid_40722 *
                                                                       i32_res_29181 +
                                                                       gtid_40732];
                float x_40789 = ((__global float *) mem_44397)[gtid_40723 *
                                                               i32_res_29181 +
                                                               gtid_40732];
                float defunc_1_f_res_40790 = x_40788 * x_40789;
                
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] =
                        defunc_1_f_res_40790;
                }
            } else {
                ((__local
                  float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] =
                    0.0F;
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29181)) {
            // perform segmented scan to imitate reduction
            {
                float x_40783;
                float x_40784;
                float x_46185;
                float x_46186;
                bool ltid_in_bounds_46188;
                
                ltid_in_bounds_46188 = slt64(sext_i32_i64(local_tid_46176),
                                             i32_res_29181 *
                                             squot64(segred_group_sizze_40779,
                                                     segment_sizze_nonzzero_46173));
                
                int32_t skip_threads_46189;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_46188) {
                        x_40784 = ((volatile __local
                                    float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)];
                        if ((local_tid_46176 - squot32(local_tid_46176, 32) *
                             32) == 0) {
                            x_40783 = x_40784;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46189 = 1;
                    while (slt32(skip_threads_46189, 32)) {
                        if (sle32(skip_threads_46189, local_tid_46176 -
                                  squot32(local_tid_46176, 32) * 32) &&
                            ltid_in_bounds_46188) {
                            // read operands
                            {
                                x_40783 = ((volatile __local
                                            float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176) -
                                                                        sext_i32_i64(skip_threads_46189)];
                            }
                            // perform operation
                            {
                                bool inactive_46190 =
                                     slt64(srem64(sext_i32_i64(local_tid_46176),
                                                  i32_res_29181),
                                           sext_i32_i64(local_tid_46176) -
                                           sext_i32_i64(local_tid_46176 -
                                           skip_threads_46189));
                                
                                if (inactive_46190) {
                                    x_40783 = x_40784;
                                }
                                if (!inactive_46190) {
                                    float defunc_1_op_res_40785 = x_40783 +
                                          x_40784;
                                    
                                    x_40783 = defunc_1_op_res_40785;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46178, skip_threads_46189)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46189, local_tid_46176 -
                                  squot32(local_tid_46176, 32) * 32) &&
                            ltid_in_bounds_46188) {
                            // write result
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] =
                                    x_40783;
                                x_40784 = x_40783;
                            }
                        }
                        if (sle32(wave_sizze_46178, skip_threads_46189)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46189 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_46176 - squot32(local_tid_46176, 32) * 32) ==
                        31 && ltid_in_bounds_46188) {
                        ((volatile __local
                          float *) red_arr_mem_46180)[sext_i32_i64(squot32(local_tid_46176,
                                                                           32))] =
                            x_40783;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_46191;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_46176, 32) == 0 &&
                            ltid_in_bounds_46188) {
                            x_46186 = ((volatile __local
                                        float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)];
                            if ((local_tid_46176 - squot32(local_tid_46176,
                                                           32) * 32) == 0) {
                                x_46185 = x_46186;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_46191 = 1;
                        while (slt32(skip_threads_46191, 32)) {
                            if (sle32(skip_threads_46191, local_tid_46176 -
                                      squot32(local_tid_46176, 32) * 32) &&
                                (squot32(local_tid_46176, 32) == 0 &&
                                 ltid_in_bounds_46188)) {
                                // read operands
                                {
                                    x_46185 = ((volatile __local
                                                float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176) -
                                                                            sext_i32_i64(skip_threads_46191)];
                                }
                                // perform operation
                                {
                                    bool inactive_46192 =
                                         slt64(srem64(sext_i32_i64(local_tid_46176 *
                                                      32 + 32 - 1),
                                                      i32_res_29181),
                                               sext_i32_i64(local_tid_46176 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_46176 -
                                                             skip_threads_46191) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_46192) {
                                        x_46185 = x_46186;
                                    }
                                    if (!inactive_46192) {
                                        float defunc_1_op_res_46187 = x_46185 +
                                              x_46186;
                                        
                                        x_46185 = defunc_1_op_res_46187;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_46178, skip_threads_46191)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_46191, local_tid_46176 -
                                      squot32(local_tid_46176, 32) * 32) &&
                                (squot32(local_tid_46176, 32) == 0 &&
                                 ltid_in_bounds_46188)) {
                                // write result
                                {
                                    ((volatile __local
                                      float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] =
                                        x_46185;
                                    x_46186 = x_46185;
                                }
                            }
                            if (sle32(wave_sizze_46178, skip_threads_46191)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_46191 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_46176, 32) == 0 ||
                          !ltid_in_bounds_46188)) {
                        // read operands
                        {
                            x_40784 = x_40783;
                            x_40783 = ((__local
                                        float *) red_arr_mem_46180)[sext_i32_i64(squot32(local_tid_46176,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_46193 =
                                 slt64(srem64(sext_i32_i64(local_tid_46176),
                                              i32_res_29181),
                                       sext_i32_i64(local_tid_46176) -
                                       sext_i32_i64(squot32(local_tid_46176,
                                                            32) * 32 - 1));
                            
                            if (inactive_46193) {
                                x_40783 = x_40784;
                            }
                            if (!inactive_46193) {
                                float defunc_1_op_res_40785 = x_40783 + x_40784;
                                
                                x_40783 = defunc_1_op_res_40785;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] =
                                x_40783;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_46176, 32) == 0) {
                        ((__local
                          float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] =
                            x_40784;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_46184) *
                      squot64(segred_group_sizze_40779,
                              segment_sizze_nonzzero_46173) +
                      sext_i32_i64(local_tid_46176), m_29166 * N_29165) &&
                slt64(sext_i32_i64(local_tid_46176),
                      squot64(segred_group_sizze_40779,
                              segment_sizze_nonzzero_46173))) {
                ((__global
                  float *) mem_45134)[squot64(sext_i32_i64(virt_group_id_46184) *
                                              squot64(segred_group_sizze_40779,
                                                      segment_sizze_nonzzero_46173) +
                                              sext_i32_i64(local_tid_46176),
                                              N_29165) * N_29165 +
                                      (sext_i32_i64(virt_group_id_46184) *
                                       squot64(segred_group_sizze_40779,
                                               segment_sizze_nonzzero_46173) +
                                       sext_i32_i64(local_tid_46176) -
                                       squot64(sext_i32_i64(virt_group_id_46184) *
                                               squot64(segred_group_sizze_40779,
                                                       segment_sizze_nonzzero_46173) +
                                               sext_i32_i64(local_tid_46176),
                                               N_29165) * N_29165)] = ((__local
                                                                        float *) red_arr_mem_46180)[(sext_i32_i64(local_tid_46176) +
                                                                                                     (int64_t) 1) *
                                                                                                    segment_sizze_nonzzero_46173 -
                                                                                                    (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_40779
}
__kernel void mainzisegred_small_41311(__global int *global_failure,
                                       int failure_is_an_option, __global
                                       int64_t *global_failure_args,
                                       __local volatile
                                       int64_t *red_arr_mem_46432_backing_aligned_0,
                                       int64_t N_29165, int64_t m_29166,
                                       int64_t i32_res_29175,
                                       int64_t num_groups_41362,
                                       int64_t segment_sizze_nonzzero_46425,
                                       __global
                                       unsigned char *defunc_4_map_res_mem_45178,
                                       __global unsigned char *mem_45232,
                                       __global unsigned char *mem_45235)
{
    #define segred_group_sizze_41361 (mainzisegred_group_sizze_41305)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46432_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46432_backing_aligned_0;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46427;
    int32_t local_tid_46428;
    int64_t group_sizze_46431;
    int32_t wave_sizze_46430;
    int32_t group_tid_46429;
    
    global_tid_46427 = get_global_id(0);
    local_tid_46428 = get_local_id(0);
    group_sizze_46431 = get_local_size(0);
    wave_sizze_46430 = LOCKSTEP_WIDTH;
    group_tid_46429 = get_group_id(0);
    
    int32_t phys_tid_41311;
    
    phys_tid_41311 = global_tid_46427;
    
    __local char *red_arr_mem_46432;
    
    red_arr_mem_46432 = (__local char *) red_arr_mem_46432_backing_0;
    
    int32_t phys_group_id_46434;
    
    phys_group_id_46434 = get_group_id(0);
    for (int32_t i_46435 = 0; i_46435 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166,
                                          squot64(segred_group_sizze_41361,
                                                  segment_sizze_nonzzero_46425))) -
                   phys_group_id_46434, sext_i64_i32(num_groups_41362));
         i_46435++) {
        int32_t virt_group_id_46436 = phys_group_id_46434 + i_46435 *
                sext_i64_i32(num_groups_41362);
        int64_t gtid_41302 = squot64(sext_i32_i64(local_tid_46428),
                                     segment_sizze_nonzzero_46425) +
                sext_i32_i64(virt_group_id_46436) *
                squot64(segred_group_sizze_41361, segment_sizze_nonzzero_46425);
        int64_t gtid_41310 = srem64(sext_i32_i64(local_tid_46428),
                                    i32_res_29175);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29175) && (slt64(gtid_41302,
                                                            m_29166) &&
                                                      slt64(sext_i32_i64(local_tid_46428),
                                                            i32_res_29175 *
                                                            squot64(segred_group_sizze_41361,
                                                                    segment_sizze_nonzzero_46425)))) {
                int32_t defunc_0_f_res_41369 = ((__global
                                                 int32_t *) mem_45232)[gtid_41302];
                int32_t index_primexp_42385 = sext_i64_i32(gtid_41310);
                bool cond_41371 = slt32(index_primexp_42385,
                                        defunc_0_f_res_41369);
                float defunc_0_f_res_41372;
                
                if (cond_41371) {
                    int64_t i_41373 = sext_i32_i64(index_primexp_42385);
                    bool x_41374 = sle64((int64_t) 0, i_41373);
                    bool y_41375 = slt64(i_41373, N_29165);
                    bool bounds_check_41376 = x_41374 && y_41375;
                    bool index_certs_41377;
                    
                    if (!bounds_check_41376) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          18) == -1) {
                                global_failure_args[0] = i_41373;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_0;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_41378 = ((__global
                                                         float *) defunc_4_map_res_mem_45178)[gtid_41302 *
                                                                                              N_29165 +
                                                                                              i_41373];
                    
                    defunc_0_f_res_41372 = defunc_0_f_res_t_res_41378;
                } else {
                    defunc_0_f_res_41372 = 0.0F;
                }
                
                float defunc_0_f_res_41379 = defunc_0_f_res_41372 *
                      defunc_0_f_res_41372;
                
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] =
                        defunc_0_f_res_41379;
                }
            } else {
                ((__local
                  float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] =
                    0.0F;
            }
        }
        
      error_0:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29175)) {
            // perform segmented scan to imitate reduction
            {
                float x_41365;
                float x_41366;
                float x_46437;
                float x_46438;
                bool ltid_in_bounds_46440;
                
                ltid_in_bounds_46440 = slt64(sext_i32_i64(local_tid_46428),
                                             i32_res_29175 *
                                             squot64(segred_group_sizze_41361,
                                                     segment_sizze_nonzzero_46425));
                
                int32_t skip_threads_46441;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_46440) {
                        x_41366 = ((volatile __local
                                    float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)];
                        if ((local_tid_46428 - squot32(local_tid_46428, 32) *
                             32) == 0) {
                            x_41365 = x_41366;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46441 = 1;
                    while (slt32(skip_threads_46441, 32)) {
                        if (sle32(skip_threads_46441, local_tid_46428 -
                                  squot32(local_tid_46428, 32) * 32) &&
                            ltid_in_bounds_46440) {
                            // read operands
                            {
                                x_41365 = ((volatile __local
                                            float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428) -
                                                                        sext_i32_i64(skip_threads_46441)];
                            }
                            // perform operation
                            {
                                bool inactive_46442 =
                                     slt64(srem64(sext_i32_i64(local_tid_46428),
                                                  i32_res_29175),
                                           sext_i32_i64(local_tid_46428) -
                                           sext_i32_i64(local_tid_46428 -
                                           skip_threads_46441));
                                
                                if (inactive_46442) {
                                    x_41365 = x_41366;
                                }
                                if (!inactive_46442) {
                                    float defunc_1_op_res_41367 = x_41365 +
                                          x_41366;
                                    
                                    x_41365 = defunc_1_op_res_41367;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46430, skip_threads_46441)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46441, local_tid_46428 -
                                  squot32(local_tid_46428, 32) * 32) &&
                            ltid_in_bounds_46440) {
                            // write result
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] =
                                    x_41365;
                                x_41366 = x_41365;
                            }
                        }
                        if (sle32(wave_sizze_46430, skip_threads_46441)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46441 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_46428 - squot32(local_tid_46428, 32) * 32) ==
                        31 && ltid_in_bounds_46440) {
                        ((volatile __local
                          float *) red_arr_mem_46432)[sext_i32_i64(squot32(local_tid_46428,
                                                                           32))] =
                            x_41365;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_46443;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_46428, 32) == 0 &&
                            ltid_in_bounds_46440) {
                            x_46438 = ((volatile __local
                                        float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)];
                            if ((local_tid_46428 - squot32(local_tid_46428,
                                                           32) * 32) == 0) {
                                x_46437 = x_46438;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_46443 = 1;
                        while (slt32(skip_threads_46443, 32)) {
                            if (sle32(skip_threads_46443, local_tid_46428 -
                                      squot32(local_tid_46428, 32) * 32) &&
                                (squot32(local_tid_46428, 32) == 0 &&
                                 ltid_in_bounds_46440)) {
                                // read operands
                                {
                                    x_46437 = ((volatile __local
                                                float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428) -
                                                                            sext_i32_i64(skip_threads_46443)];
                                }
                                // perform operation
                                {
                                    bool inactive_46444 =
                                         slt64(srem64(sext_i32_i64(local_tid_46428 *
                                                      32 + 32 - 1),
                                                      i32_res_29175),
                                               sext_i32_i64(local_tid_46428 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_46428 -
                                                             skip_threads_46443) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_46444) {
                                        x_46437 = x_46438;
                                    }
                                    if (!inactive_46444) {
                                        float defunc_1_op_res_46439 = x_46437 +
                                              x_46438;
                                        
                                        x_46437 = defunc_1_op_res_46439;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_46430, skip_threads_46443)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_46443, local_tid_46428 -
                                      squot32(local_tid_46428, 32) * 32) &&
                                (squot32(local_tid_46428, 32) == 0 &&
                                 ltid_in_bounds_46440)) {
                                // write result
                                {
                                    ((volatile __local
                                      float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] =
                                        x_46437;
                                    x_46438 = x_46437;
                                }
                            }
                            if (sle32(wave_sizze_46430, skip_threads_46443)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_46443 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_46428, 32) == 0 ||
                          !ltid_in_bounds_46440)) {
                        // read operands
                        {
                            x_41366 = x_41365;
                            x_41365 = ((__local
                                        float *) red_arr_mem_46432)[sext_i32_i64(squot32(local_tid_46428,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_46445 =
                                 slt64(srem64(sext_i32_i64(local_tid_46428),
                                              i32_res_29175),
                                       sext_i32_i64(local_tid_46428) -
                                       sext_i32_i64(squot32(local_tid_46428,
                                                            32) * 32 - 1));
                            
                            if (inactive_46445) {
                                x_41365 = x_41366;
                            }
                            if (!inactive_46445) {
                                float defunc_1_op_res_41367 = x_41365 + x_41366;
                                
                                x_41365 = defunc_1_op_res_41367;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] =
                                x_41365;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_46428, 32) == 0) {
                        ((__local
                          float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] =
                            x_41366;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_46436) *
                      squot64(segred_group_sizze_41361,
                              segment_sizze_nonzzero_46425) +
                      sext_i32_i64(local_tid_46428), m_29166) &&
                slt64(sext_i32_i64(local_tid_46428),
                      squot64(segred_group_sizze_41361,
                              segment_sizze_nonzzero_46425))) {
                ((__global
                  float *) mem_45235)[sext_i32_i64(virt_group_id_46436) *
                                      squot64(segred_group_sizze_41361,
                                              segment_sizze_nonzzero_46425) +
                                      sext_i32_i64(local_tid_46428)] = ((__local
                                                                         float *) red_arr_mem_46432)[(sext_i32_i64(local_tid_46428) +
                                                                                                      (int64_t) 1) *
                                                                                                     segment_sizze_nonzzero_46425 -
                                                                                                     (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41361
}
__kernel void mainzisegred_small_41336(__global int *global_failure,
                                       __local volatile
                                       int64_t *red_arr_mem_46372_backing_aligned_0,
                                       int64_t N_29165, int64_t m_29166,
                                       int64_t i32_res_29175,
                                       int64_t num_groups_41348,
                                       int64_t segment_sizze_nonzzero_46365,
                                       __global unsigned char *images_mem_44381,
                                       __global unsigned char *mem_45232)
{
    #define segred_group_sizze_41347 (mainzisegred_group_sizze_41330)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46372_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46372_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46367;
    int32_t local_tid_46368;
    int64_t group_sizze_46371;
    int32_t wave_sizze_46370;
    int32_t group_tid_46369;
    
    global_tid_46367 = get_global_id(0);
    local_tid_46368 = get_local_id(0);
    group_sizze_46371 = get_local_size(0);
    wave_sizze_46370 = LOCKSTEP_WIDTH;
    group_tid_46369 = get_group_id(0);
    
    int32_t phys_tid_41336;
    
    phys_tid_41336 = global_tid_46367;
    
    __local char *red_arr_mem_46372;
    
    red_arr_mem_46372 = (__local char *) red_arr_mem_46372_backing_0;
    
    int32_t phys_group_id_46374;
    
    phys_group_id_46374 = get_group_id(0);
    for (int32_t i_46375 = 0; i_46375 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166,
                                          squot64(segred_group_sizze_41347,
                                                  segment_sizze_nonzzero_46365))) -
                   phys_group_id_46374, sext_i64_i32(num_groups_41348));
         i_46375++) {
        int32_t virt_group_id_46376 = phys_group_id_46374 + i_46375 *
                sext_i64_i32(num_groups_41348);
        int64_t gtid_41327 = squot64(sext_i32_i64(local_tid_46368),
                                     segment_sizze_nonzzero_46365) +
                sext_i32_i64(virt_group_id_46376) *
                squot64(segred_group_sizze_41347, segment_sizze_nonzzero_46365);
        int64_t gtid_41335 = srem64(sext_i32_i64(local_tid_46368),
                                    i32_res_29175);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29175) && (slt64(gtid_41327,
                                                            m_29166) &&
                                                      slt64(sext_i32_i64(local_tid_46368),
                                                            i32_res_29175 *
                                                            squot64(segred_group_sizze_41347,
                                                                    segment_sizze_nonzzero_46365)))) {
                float x_41355 = ((__global
                                  float *) images_mem_44381)[gtid_41327 *
                                                             N_29165 +
                                                             gtid_41335];
                bool isnan_res_41356;
                
                isnan_res_41356 = futrts_isnan32(x_41355);
                
                bool cond_41357 = !isnan_res_41356;
                int32_t defunc_0_f_res_41358 = btoi_bool_i32(cond_41357);
                
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] =
                        defunc_0_f_res_41358;
                }
            } else {
                ((__local
                  int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] =
                    0;
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29175)) {
            // perform segmented scan to imitate reduction
            {
                int32_t x_41351;
                int32_t x_41352;
                int32_t x_46377;
                int32_t x_46378;
                bool ltid_in_bounds_46380;
                
                ltid_in_bounds_46380 = slt64(sext_i32_i64(local_tid_46368),
                                             i32_res_29175 *
                                             squot64(segred_group_sizze_41347,
                                                     segment_sizze_nonzzero_46365));
                
                int32_t skip_threads_46381;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_46380) {
                        x_41352 = ((volatile __local
                                    int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)];
                        if ((local_tid_46368 - squot32(local_tid_46368, 32) *
                             32) == 0) {
                            x_41351 = x_41352;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46381 = 1;
                    while (slt32(skip_threads_46381, 32)) {
                        if (sle32(skip_threads_46381, local_tid_46368 -
                                  squot32(local_tid_46368, 32) * 32) &&
                            ltid_in_bounds_46380) {
                            // read operands
                            {
                                x_41351 = ((volatile __local
                                            int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368) -
                                                                          sext_i32_i64(skip_threads_46381)];
                            }
                            // perform operation
                            {
                                bool inactive_46382 =
                                     slt64(srem64(sext_i32_i64(local_tid_46368),
                                                  i32_res_29175),
                                           sext_i32_i64(local_tid_46368) -
                                           sext_i32_i64(local_tid_46368 -
                                           skip_threads_46381));
                                
                                if (inactive_46382) {
                                    x_41351 = x_41352;
                                }
                                if (!inactive_46382) {
                                    int32_t defunc_1_op_res_41353 =
                                            add32(x_41351, x_41352);
                                    
                                    x_41351 = defunc_1_op_res_41353;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46370, skip_threads_46381)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46381, local_tid_46368 -
                                  squot32(local_tid_46368, 32) * 32) &&
                            ltid_in_bounds_46380) {
                            // write result
                            {
                                ((volatile __local
                                  int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] =
                                    x_41351;
                                x_41352 = x_41351;
                            }
                        }
                        if (sle32(wave_sizze_46370, skip_threads_46381)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46381 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_46368 - squot32(local_tid_46368, 32) * 32) ==
                        31 && ltid_in_bounds_46380) {
                        ((volatile __local
                          int32_t *) red_arr_mem_46372)[sext_i32_i64(squot32(local_tid_46368,
                                                                             32))] =
                            x_41351;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_46383;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_46368, 32) == 0 &&
                            ltid_in_bounds_46380) {
                            x_46378 = ((volatile __local
                                        int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)];
                            if ((local_tid_46368 - squot32(local_tid_46368,
                                                           32) * 32) == 0) {
                                x_46377 = x_46378;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_46383 = 1;
                        while (slt32(skip_threads_46383, 32)) {
                            if (sle32(skip_threads_46383, local_tid_46368 -
                                      squot32(local_tid_46368, 32) * 32) &&
                                (squot32(local_tid_46368, 32) == 0 &&
                                 ltid_in_bounds_46380)) {
                                // read operands
                                {
                                    x_46377 = ((volatile __local
                                                int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368) -
                                                                              sext_i32_i64(skip_threads_46383)];
                                }
                                // perform operation
                                {
                                    bool inactive_46384 =
                                         slt64(srem64(sext_i32_i64(local_tid_46368 *
                                                      32 + 32 - 1),
                                                      i32_res_29175),
                                               sext_i32_i64(local_tid_46368 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_46368 -
                                                             skip_threads_46383) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_46384) {
                                        x_46377 = x_46378;
                                    }
                                    if (!inactive_46384) {
                                        int32_t defunc_1_op_res_46379 =
                                                add32(x_46377, x_46378);
                                        
                                        x_46377 = defunc_1_op_res_46379;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_46370, skip_threads_46383)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_46383, local_tid_46368 -
                                      squot32(local_tid_46368, 32) * 32) &&
                                (squot32(local_tid_46368, 32) == 0 &&
                                 ltid_in_bounds_46380)) {
                                // write result
                                {
                                    ((volatile __local
                                      int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] =
                                        x_46377;
                                    x_46378 = x_46377;
                                }
                            }
                            if (sle32(wave_sizze_46370, skip_threads_46383)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_46383 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_46368, 32) == 0 ||
                          !ltid_in_bounds_46380)) {
                        // read operands
                        {
                            x_41352 = x_41351;
                            x_41351 = ((__local
                                        int32_t *) red_arr_mem_46372)[sext_i32_i64(squot32(local_tid_46368,
                                                                                           32)) -
                                                                      (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_46385 =
                                 slt64(srem64(sext_i32_i64(local_tid_46368),
                                              i32_res_29175),
                                       sext_i32_i64(local_tid_46368) -
                                       sext_i32_i64(squot32(local_tid_46368,
                                                            32) * 32 - 1));
                            
                            if (inactive_46385) {
                                x_41351 = x_41352;
                            }
                            if (!inactive_46385) {
                                int32_t defunc_1_op_res_41353 = add32(x_41351,
                                                                      x_41352);
                                
                                x_41351 = defunc_1_op_res_41353;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] =
                                x_41351;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_46368, 32) == 0) {
                        ((__local
                          int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] =
                            x_41352;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_46376) *
                      squot64(segred_group_sizze_41347,
                              segment_sizze_nonzzero_46365) +
                      sext_i32_i64(local_tid_46368), m_29166) &&
                slt64(sext_i32_i64(local_tid_46368),
                      squot64(segred_group_sizze_41347,
                              segment_sizze_nonzzero_46365))) {
                ((__global
                  int32_t *) mem_45232)[sext_i32_i64(virt_group_id_46376) *
                                        squot64(segred_group_sizze_41347,
                                                segment_sizze_nonzzero_46365) +
                                        sext_i32_i64(local_tid_46368)] =
                    ((__local
                      int32_t *) red_arr_mem_46372)[(sext_i32_i64(local_tid_46368) +
                                                     (int64_t) 1) *
                                                    segment_sizze_nonzzero_46365 -
                                                    (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41347
}
__kernel void mainzisegred_small_41499(__global int *global_failure,
                                       int failure_is_an_option, __global
                                       int64_t *global_failure_args,
                                       __local volatile
                                       int64_t *red_arr_mem_46541_backing_aligned_0,
                                       int64_t N_29165, int64_t m_29166,
                                       int64_t i32_res_29568,
                                       int64_t num_groups_41521,
                                       int64_t segment_sizze_nonzzero_46534,
                                       __global
                                       unsigned char *defunc_4_map_res_mem_45178,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_45244,
                                       __global
                                       unsigned char *defunc_3_map_res_mem_45245,
                                       __global unsigned char *mem_45278)
{
    #define segred_group_sizze_41520 (mainzisegred_group_sizze_41493)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46541_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46541_backing_aligned_0;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46536;
    int32_t local_tid_46537;
    int64_t group_sizze_46540;
    int32_t wave_sizze_46539;
    int32_t group_tid_46538;
    
    global_tid_46536 = get_global_id(0);
    local_tid_46537 = get_local_id(0);
    group_sizze_46540 = get_local_size(0);
    wave_sizze_46539 = LOCKSTEP_WIDTH;
    group_tid_46538 = get_group_id(0);
    
    int32_t phys_tid_41499;
    
    phys_tid_41499 = global_tid_46536;
    
    __local char *red_arr_mem_46541;
    
    red_arr_mem_46541 = (__local char *) red_arr_mem_46541_backing_0;
    
    int32_t phys_group_id_46543;
    
    phys_group_id_46543 = get_group_id(0);
    for (int32_t i_46544 = 0; i_46544 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166,
                                          squot64(segred_group_sizze_41520,
                                                  segment_sizze_nonzzero_46534))) -
                   phys_group_id_46543, sext_i64_i32(num_groups_41521));
         i_46544++) {
        int32_t virt_group_id_46545 = phys_group_id_46543 + i_46544 *
                sext_i64_i32(num_groups_41521);
        int64_t gtid_41490 = squot64(sext_i32_i64(local_tid_46537),
                                     segment_sizze_nonzzero_46534) +
                sext_i32_i64(virt_group_id_46545) *
                squot64(segred_group_sizze_41520, segment_sizze_nonzzero_46534);
        int64_t gtid_41498 = srem64(sext_i32_i64(local_tid_46537),
                                    i32_res_29568);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, i32_res_29568) && (slt64(gtid_41490,
                                                            m_29166) &&
                                                      slt64(sext_i32_i64(local_tid_46537),
                                                            i32_res_29568 *
                                                            squot64(segred_group_sizze_41520,
                                                                    segment_sizze_nonzzero_46534)))) {
                int32_t x_41529 = ((__global
                                    int32_t *) defunc_3_map_res_mem_45244)[gtid_41490];
                int32_t index_primexp_42390 = sext_i64_i32(gtid_41498);
                bool cond_41531 = slt32(index_primexp_42390, x_41529);
                float defunc_0_f_res_41532;
                
                if (cond_41531) {
                    int32_t x_41528 = ((__global
                                        int32_t *) defunc_3_map_res_mem_45245)[gtid_41490];
                    int32_t x_41533 = add32(x_41528, index_primexp_42390);
                    int32_t x_41534 = sub32(x_41533, x_41529);
                    int32_t i_41535 = add32(1, x_41534);
                    int64_t i_41536 = sext_i32_i64(i_41535);
                    bool x_41537 = sle64((int64_t) 0, i_41536);
                    bool y_41538 = slt64(i_41536, N_29165);
                    bool bounds_check_41539 = x_41537 && y_41538;
                    bool index_certs_41540;
                    
                    if (!bounds_check_41539) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          22) == -1) {
                                global_failure_args[0] = i_41536;
                                global_failure_args[1] = N_29165;
                                ;
                            }
                            local_failure = true;
                            goto error_0;
                        }
                    }
                    
                    float defunc_0_f_res_t_res_41541 = ((__global
                                                         float *) defunc_4_map_res_mem_45178)[gtid_41490 *
                                                                                              N_29165 +
                                                                                              i_41536];
                    
                    defunc_0_f_res_41532 = defunc_0_f_res_t_res_41541;
                } else {
                    defunc_0_f_res_41532 = 0.0F;
                }
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] =
                        defunc_0_f_res_41532;
                }
            } else {
                ((__local
                  float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] =
                    0.0F;
            }
        }
        
      error_0:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, i32_res_29568)) {
            // perform segmented scan to imitate reduction
            {
                float x_41524;
                float x_41525;
                float x_46546;
                float x_46547;
                bool ltid_in_bounds_46549;
                
                ltid_in_bounds_46549 = slt64(sext_i32_i64(local_tid_46537),
                                             i32_res_29568 *
                                             squot64(segred_group_sizze_41520,
                                                     segment_sizze_nonzzero_46534));
                
                int32_t skip_threads_46550;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_46549) {
                        x_41525 = ((volatile __local
                                    float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)];
                        if ((local_tid_46537 - squot32(local_tid_46537, 32) *
                             32) == 0) {
                            x_41524 = x_41525;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46550 = 1;
                    while (slt32(skip_threads_46550, 32)) {
                        if (sle32(skip_threads_46550, local_tid_46537 -
                                  squot32(local_tid_46537, 32) * 32) &&
                            ltid_in_bounds_46549) {
                            // read operands
                            {
                                x_41524 = ((volatile __local
                                            float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537) -
                                                                        sext_i32_i64(skip_threads_46550)];
                            }
                            // perform operation
                            {
                                bool inactive_46551 =
                                     slt64(srem64(sext_i32_i64(local_tid_46537),
                                                  i32_res_29568),
                                           sext_i32_i64(local_tid_46537) -
                                           sext_i32_i64(local_tid_46537 -
                                           skip_threads_46550));
                                
                                if (inactive_46551) {
                                    x_41524 = x_41525;
                                }
                                if (!inactive_46551) {
                                    float defunc_1_op_res_41526 = x_41524 +
                                          x_41525;
                                    
                                    x_41524 = defunc_1_op_res_41526;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46539, skip_threads_46550)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46550, local_tid_46537 -
                                  squot32(local_tid_46537, 32) * 32) &&
                            ltid_in_bounds_46549) {
                            // write result
                            {
                                ((volatile __local
                                  float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] =
                                    x_41524;
                                x_41525 = x_41524;
                            }
                        }
                        if (sle32(wave_sizze_46539, skip_threads_46550)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46550 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_46537 - squot32(local_tid_46537, 32) * 32) ==
                        31 && ltid_in_bounds_46549) {
                        ((volatile __local
                          float *) red_arr_mem_46541)[sext_i32_i64(squot32(local_tid_46537,
                                                                           32))] =
                            x_41524;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_46552;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_46537, 32) == 0 &&
                            ltid_in_bounds_46549) {
                            x_46547 = ((volatile __local
                                        float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)];
                            if ((local_tid_46537 - squot32(local_tid_46537,
                                                           32) * 32) == 0) {
                                x_46546 = x_46547;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_46552 = 1;
                        while (slt32(skip_threads_46552, 32)) {
                            if (sle32(skip_threads_46552, local_tid_46537 -
                                      squot32(local_tid_46537, 32) * 32) &&
                                (squot32(local_tid_46537, 32) == 0 &&
                                 ltid_in_bounds_46549)) {
                                // read operands
                                {
                                    x_46546 = ((volatile __local
                                                float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537) -
                                                                            sext_i32_i64(skip_threads_46552)];
                                }
                                // perform operation
                                {
                                    bool inactive_46553 =
                                         slt64(srem64(sext_i32_i64(local_tid_46537 *
                                                      32 + 32 - 1),
                                                      i32_res_29568),
                                               sext_i32_i64(local_tid_46537 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_46537 -
                                                             skip_threads_46552) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_46553) {
                                        x_46546 = x_46547;
                                    }
                                    if (!inactive_46553) {
                                        float defunc_1_op_res_46548 = x_46546 +
                                              x_46547;
                                        
                                        x_46546 = defunc_1_op_res_46548;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_46539, skip_threads_46552)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_46552, local_tid_46537 -
                                      squot32(local_tid_46537, 32) * 32) &&
                                (squot32(local_tid_46537, 32) == 0 &&
                                 ltid_in_bounds_46549)) {
                                // write result
                                {
                                    ((volatile __local
                                      float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] =
                                        x_46546;
                                    x_46547 = x_46546;
                                }
                            }
                            if (sle32(wave_sizze_46539, skip_threads_46552)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_46552 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_46537, 32) == 0 ||
                          !ltid_in_bounds_46549)) {
                        // read operands
                        {
                            x_41525 = x_41524;
                            x_41524 = ((__local
                                        float *) red_arr_mem_46541)[sext_i32_i64(squot32(local_tid_46537,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_46554 =
                                 slt64(srem64(sext_i32_i64(local_tid_46537),
                                              i32_res_29568),
                                       sext_i32_i64(local_tid_46537) -
                                       sext_i32_i64(squot32(local_tid_46537,
                                                            32) * 32 - 1));
                            
                            if (inactive_46554) {
                                x_41524 = x_41525;
                            }
                            if (!inactive_46554) {
                                float defunc_1_op_res_41526 = x_41524 + x_41525;
                                
                                x_41524 = defunc_1_op_res_41526;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] =
                                x_41524;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_46537, 32) == 0) {
                        ((__local
                          float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] =
                            x_41525;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_46545) *
                      squot64(segred_group_sizze_41520,
                              segment_sizze_nonzzero_46534) +
                      sext_i32_i64(local_tid_46537), m_29166) &&
                slt64(sext_i32_i64(local_tid_46537),
                      squot64(segred_group_sizze_41520,
                              segment_sizze_nonzzero_46534))) {
                ((__global
                  float *) mem_45278)[sext_i32_i64(virt_group_id_46545) *
                                      squot64(segred_group_sizze_41520,
                                              segment_sizze_nonzzero_46534) +
                                      sext_i32_i64(local_tid_46537)] = ((__local
                                                                         float *) red_arr_mem_46541)[(sext_i32_i64(local_tid_46537) +
                                                                                                      (int64_t) 1) *
                                                                                                     segment_sizze_nonzzero_46534 -
                                                                                                     (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_41520
}
__kernel void mainzisegred_small_42060(__global int *global_failure,
                                       __local volatile
                                       int64_t *red_arr_mem_46703_backing_aligned_0,
                                       __local volatile
                                       int64_t *red_arr_mem_46701_backing_aligned_1,
                                       __local volatile
                                       int64_t *red_arr_mem_46699_backing_aligned_2,
                                       int64_t m_29166,
                                       int64_t iota32_arg_29597,
                                       int64_t num_groups_42247,
                                       int64_t segment_sizze_nonzzero_46692,
                                       __global unsigned char *mem_45284,
                                       __global unsigned char *mem_45296,
                                       __global unsigned char *mem_45298,
                                       __global unsigned char *mem_45302,
                                       __global unsigned char *mem_45305,
                                       __global unsigned char *mem_45307,
                                       __global unsigned char *mem_45309)
{
    #define segred_group_sizze_42246 (mainzisegred_group_sizze_42054)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict red_arr_mem_46703_backing_2 =
                          (__local volatile
                           char *) red_arr_mem_46703_backing_aligned_0;
    __local volatile char *restrict red_arr_mem_46701_backing_1 =
                          (__local volatile
                           char *) red_arr_mem_46701_backing_aligned_1;
    __local volatile char *restrict red_arr_mem_46699_backing_0 =
                          (__local volatile
                           char *) red_arr_mem_46699_backing_aligned_2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46694;
    int32_t local_tid_46695;
    int64_t group_sizze_46698;
    int32_t wave_sizze_46697;
    int32_t group_tid_46696;
    
    global_tid_46694 = get_global_id(0);
    local_tid_46695 = get_local_id(0);
    group_sizze_46698 = get_local_size(0);
    wave_sizze_46697 = LOCKSTEP_WIDTH;
    group_tid_46696 = get_group_id(0);
    
    int32_t phys_tid_42060;
    
    phys_tid_42060 = global_tid_46694;
    
    __local char *red_arr_mem_46699;
    
    red_arr_mem_46699 = (__local char *) red_arr_mem_46699_backing_0;
    
    __local char *red_arr_mem_46701;
    
    red_arr_mem_46701 = (__local char *) red_arr_mem_46701_backing_1;
    
    __local char *red_arr_mem_46703;
    
    red_arr_mem_46703 = (__local char *) red_arr_mem_46703_backing_2;
    
    int32_t phys_group_id_46705;
    
    phys_group_id_46705 = get_group_id(0);
    for (int32_t i_46706 = 0; i_46706 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_29166,
                                          squot64(segred_group_sizze_42246,
                                                  segment_sizze_nonzzero_46692))) -
                   phys_group_id_46705, sext_i64_i32(num_groups_42247));
         i_46706++) {
        int32_t virt_group_id_46707 = phys_group_id_46705 + i_46706 *
                sext_i64_i32(num_groups_42247);
        int64_t gtid_42051 = squot64(sext_i32_i64(local_tid_46695),
                                     segment_sizze_nonzzero_46692) +
                sext_i32_i64(virt_group_id_46707) *
                squot64(segred_group_sizze_42246, segment_sizze_nonzzero_46692);
        int64_t gtid_42059 = srem64(sext_i32_i64(local_tid_46695),
                                    iota32_arg_29597);
        
        // apply map function if in bounds
        {
            if (slt64((int64_t) 0, iota32_arg_29597) && (slt64(gtid_42051,
                                                               m_29166) &&
                                                         slt64(sext_i32_i64(local_tid_46695),
                                                               iota32_arg_29597 *
                                                               squot64(segred_group_sizze_42246,
                                                                       segment_sizze_nonzzero_46692)))) {
                int32_t y_42266 = ((__global int32_t *) mem_45298)[gtid_42051];
                float y_42267 = ((__global float *) mem_45296)[gtid_42051];
                float x_42271 = ((__global float *) mem_45302)[gtid_42051 *
                                                               iota32_arg_29597 +
                                                               gtid_42059];
                float x_42272 = ((__global float *) mem_45284)[gtid_42059];
                int32_t index_primexp_42404 = sext_i64_i32(gtid_42059);
                float defunc_0_f_res_42275 = x_42271 / y_42267;
                bool cond_42276 = slt32(index_primexp_42404, y_42266);
                bool isnan_res_42277;
                
                isnan_res_42277 = futrts_isnan32(defunc_0_f_res_42275);
                
                bool cond_t_res_42278 = !isnan_res_42277;
                bool x_42279 = cond_42276 && cond_t_res_42278;
                float abs_res_42280 = (float) fabs(defunc_0_f_res_42275);
                bool defunc_2_f_res_t_res_42281 = x_42272 < abs_res_42280;
                bool x_42282 = x_42279 && defunc_2_f_res_t_res_42281;
                float defunc_1_f_res_42283;
                
                if (cond_42276) {
                    defunc_1_f_res_42283 = defunc_0_f_res_42275;
                } else {
                    defunc_1_f_res_42283 = 0.0F;
                }
                // save map-out results
                { }
                // save results to be reduced
                {
                    ((__local
                      bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] =
                        x_42282;
                    ((__local
                      int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] =
                        index_primexp_42404;
                    ((__local
                      float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] =
                        defunc_1_f_res_42283;
                }
            } else {
                ((__local
                  bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = 0;
                ((__local
                  int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] =
                    -1;
                ((__local
                  float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] =
                    0.0F;
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        if (slt64((int64_t) 0, iota32_arg_29597)) {
            // perform segmented scan to imitate reduction
            {
                bool x_42252;
                int32_t x_42253;
                float x_42254;
                bool x_42255;
                int32_t x_42256;
                float x_42257;
                bool x_46708;
                int32_t x_46709;
                float x_46710;
                bool x_46711;
                int32_t x_46712;
                float x_46713;
                bool ltid_in_bounds_46722;
                
                ltid_in_bounds_46722 = slt64(sext_i32_i64(local_tid_46695),
                                             iota32_arg_29597 *
                                             squot64(segred_group_sizze_42246,
                                                     segment_sizze_nonzzero_46692));
                
                int32_t skip_threads_46723;
                
                // read input for in-block scan
                {
                    if (ltid_in_bounds_46722) {
                        x_42255 = ((volatile __local
                                    bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)];
                        x_42256 = ((volatile __local
                                    int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)];
                        x_42257 = ((volatile __local
                                    float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)];
                        if ((local_tid_46695 - squot32(local_tid_46695, 32) *
                             32) == 0) {
                            x_42252 = x_42255;
                            x_42253 = x_42256;
                            x_42254 = x_42257;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46723 = 1;
                    while (slt32(skip_threads_46723, 32)) {
                        if (sle32(skip_threads_46723, local_tid_46695 -
                                  squot32(local_tid_46695, 32) * 32) &&
                            ltid_in_bounds_46722) {
                            // read operands
                            {
                                x_42252 = ((volatile __local
                                            bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695) -
                                                                       sext_i32_i64(skip_threads_46723)];
                                x_42253 = ((volatile __local
                                            int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695) -
                                                                          sext_i32_i64(skip_threads_46723)];
                                x_42254 = ((volatile __local
                                            float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695) -
                                                                        sext_i32_i64(skip_threads_46723)];
                            }
                            // perform operation
                            {
                                bool inactive_46724 =
                                     slt64(srem64(sext_i32_i64(local_tid_46695),
                                                  iota32_arg_29597),
                                           sext_i32_i64(local_tid_46695) -
                                           sext_i32_i64(local_tid_46695 -
                                           skip_threads_46723));
                                
                                if (inactive_46724) {
                                    x_42252 = x_42255;
                                    x_42253 = x_42256;
                                    x_42254 = x_42257;
                                }
                                if (!inactive_46724) {
                                    bool defunc_1_op_res_42258;
                                    int32_t defunc_1_op_res_42259;
                                    
                                    if (x_42252) {
                                        defunc_1_op_res_42258 = x_42252;
                                        defunc_1_op_res_42259 = x_42253;
                                    } else {
                                        bool x_42260 = x_42255 && x_42255;
                                        bool x_42261 = !x_42255;
                                        bool y_42262 = x_42252 && x_42261;
                                        bool defunc_1_op_res_f_res_42263 =
                                             x_42260 || y_42262;
                                        int32_t defunc_1_op_res_f_res_42264;
                                        
                                        if (x_42255) {
                                            defunc_1_op_res_f_res_42264 =
                                                x_42256;
                                        } else {
                                            defunc_1_op_res_f_res_42264 =
                                                x_42253;
                                        }
                                        defunc_1_op_res_42258 =
                                            defunc_1_op_res_f_res_42263;
                                        defunc_1_op_res_42259 =
                                            defunc_1_op_res_f_res_42264;
                                    }
                                    
                                    float defunc_1_op_res_42265 = x_42254 +
                                          x_42257;
                                    
                                    x_42252 = defunc_1_op_res_42258;
                                    x_42253 = defunc_1_op_res_42259;
                                    x_42254 = defunc_1_op_res_42265;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46697, skip_threads_46723)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46723, local_tid_46695 -
                                  squot32(local_tid_46695, 32) * 32) &&
                            ltid_in_bounds_46722) {
                            // write result
                            {
                                ((volatile __local
                                  bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] =
                                    x_42252;
                                x_42255 = x_42252;
                                ((volatile __local
                                  int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] =
                                    x_42253;
                                x_42256 = x_42253;
                                ((volatile __local
                                  float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] =
                                    x_42254;
                                x_42257 = x_42254;
                            }
                        }
                        if (sle32(wave_sizze_46697, skip_threads_46723)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46723 *= 2;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // last thread of block 'i' writes its result to offset 'i'
                {
                    if ((local_tid_46695 - squot32(local_tid_46695, 32) * 32) ==
                        31 && ltid_in_bounds_46722) {
                        ((volatile __local
                          bool *) red_arr_mem_46699)[sext_i32_i64(squot32(local_tid_46695,
                                                                          32))] =
                            x_42252;
                        ((volatile __local
                          int32_t *) red_arr_mem_46701)[sext_i32_i64(squot32(local_tid_46695,
                                                                             32))] =
                            x_42253;
                        ((volatile __local
                          float *) red_arr_mem_46703)[sext_i32_i64(squot32(local_tid_46695,
                                                                           32))] =
                            x_42254;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
                {
                    int32_t skip_threads_46725;
                    
                    // read input for in-block scan
                    {
                        if (squot32(local_tid_46695, 32) == 0 &&
                            ltid_in_bounds_46722) {
                            x_46711 = ((volatile __local
                                        bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)];
                            x_46712 = ((volatile __local
                                        int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)];
                            x_46713 = ((volatile __local
                                        float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)];
                            if ((local_tid_46695 - squot32(local_tid_46695,
                                                           32) * 32) == 0) {
                                x_46708 = x_46711;
                                x_46709 = x_46712;
                                x_46710 = x_46713;
                            }
                        }
                    }
                    // in-block scan (hopefully no barriers needed)
                    {
                        skip_threads_46725 = 1;
                        while (slt32(skip_threads_46725, 32)) {
                            if (sle32(skip_threads_46725, local_tid_46695 -
                                      squot32(local_tid_46695, 32) * 32) &&
                                (squot32(local_tid_46695, 32) == 0 &&
                                 ltid_in_bounds_46722)) {
                                // read operands
                                {
                                    x_46708 = ((volatile __local
                                                bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695) -
                                                                           sext_i32_i64(skip_threads_46725)];
                                    x_46709 = ((volatile __local
                                                int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695) -
                                                                              sext_i32_i64(skip_threads_46725)];
                                    x_46710 = ((volatile __local
                                                float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695) -
                                                                            sext_i32_i64(skip_threads_46725)];
                                }
                                // perform operation
                                {
                                    bool inactive_46726 =
                                         slt64(srem64(sext_i32_i64(local_tid_46695 *
                                                      32 + 32 - 1),
                                                      iota32_arg_29597),
                                               sext_i32_i64(local_tid_46695 *
                                               32 + 32 - 1) -
                                               sext_i32_i64((local_tid_46695 -
                                                             skip_threads_46725) *
                                               32 + 32 - 1));
                                    
                                    if (inactive_46726) {
                                        x_46708 = x_46711;
                                        x_46709 = x_46712;
                                        x_46710 = x_46713;
                                    }
                                    if (!inactive_46726) {
                                        bool defunc_1_op_res_46714;
                                        int32_t defunc_1_op_res_46715;
                                        
                                        if (x_46708) {
                                            defunc_1_op_res_46714 = x_46708;
                                            defunc_1_op_res_46715 = x_46709;
                                        } else {
                                            bool x_46716 = x_46711 && x_46711;
                                            bool x_46717 = !x_46711;
                                            bool y_46718 = x_46708 && x_46717;
                                            bool defunc_1_op_res_f_res_46719 =
                                                 x_46716 || y_46718;
                                            int32_t defunc_1_op_res_f_res_46720;
                                            
                                            if (x_46711) {
                                                defunc_1_op_res_f_res_46720 =
                                                    x_46712;
                                            } else {
                                                defunc_1_op_res_f_res_46720 =
                                                    x_46709;
                                            }
                                            defunc_1_op_res_46714 =
                                                defunc_1_op_res_f_res_46719;
                                            defunc_1_op_res_46715 =
                                                defunc_1_op_res_f_res_46720;
                                        }
                                        
                                        float defunc_1_op_res_46721 = x_46710 +
                                              x_46713;
                                        
                                        x_46708 = defunc_1_op_res_46714;
                                        x_46709 = defunc_1_op_res_46715;
                                        x_46710 = defunc_1_op_res_46721;
                                    }
                                }
                            }
                            if (sle32(wave_sizze_46697, skip_threads_46725)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            if (sle32(skip_threads_46725, local_tid_46695 -
                                      squot32(local_tid_46695, 32) * 32) &&
                                (squot32(local_tid_46695, 32) == 0 &&
                                 ltid_in_bounds_46722)) {
                                // write result
                                {
                                    ((volatile __local
                                      bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] =
                                        x_46708;
                                    x_46711 = x_46708;
                                    ((volatile __local
                                      int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] =
                                        x_46709;
                                    x_46712 = x_46709;
                                    ((volatile __local
                                      float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] =
                                        x_46710;
                                    x_46713 = x_46710;
                                }
                            }
                            if (sle32(wave_sizze_46697, skip_threads_46725)) {
                                barrier(CLK_LOCAL_MEM_FENCE);
                            }
                            skip_threads_46725 *= 2;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // carry-in for every block except the first
                {
                    if (!(squot32(local_tid_46695, 32) == 0 ||
                          !ltid_in_bounds_46722)) {
                        // read operands
                        {
                            x_42255 = x_42252;
                            x_42256 = x_42253;
                            x_42257 = x_42254;
                            x_42252 = ((__local
                                        bool *) red_arr_mem_46699)[sext_i32_i64(squot32(local_tid_46695,
                                                                                        32)) -
                                                                   (int64_t) 1];
                            x_42253 = ((__local
                                        int32_t *) red_arr_mem_46701)[sext_i32_i64(squot32(local_tid_46695,
                                                                                           32)) -
                                                                      (int64_t) 1];
                            x_42254 = ((__local
                                        float *) red_arr_mem_46703)[sext_i32_i64(squot32(local_tid_46695,
                                                                                         32)) -
                                                                    (int64_t) 1];
                        }
                        // perform operation
                        {
                            bool inactive_46727 =
                                 slt64(srem64(sext_i32_i64(local_tid_46695),
                                              iota32_arg_29597),
                                       sext_i32_i64(local_tid_46695) -
                                       sext_i32_i64(squot32(local_tid_46695,
                                                            32) * 32 - 1));
                            
                            if (inactive_46727) {
                                x_42252 = x_42255;
                                x_42253 = x_42256;
                                x_42254 = x_42257;
                            }
                            if (!inactive_46727) {
                                bool defunc_1_op_res_42258;
                                int32_t defunc_1_op_res_42259;
                                
                                if (x_42252) {
                                    defunc_1_op_res_42258 = x_42252;
                                    defunc_1_op_res_42259 = x_42253;
                                } else {
                                    bool x_42260 = x_42255 && x_42255;
                                    bool x_42261 = !x_42255;
                                    bool y_42262 = x_42252 && x_42261;
                                    bool defunc_1_op_res_f_res_42263 =
                                         x_42260 || y_42262;
                                    int32_t defunc_1_op_res_f_res_42264;
                                    
                                    if (x_42255) {
                                        defunc_1_op_res_f_res_42264 = x_42256;
                                    } else {
                                        defunc_1_op_res_f_res_42264 = x_42253;
                                    }
                                    defunc_1_op_res_42258 =
                                        defunc_1_op_res_f_res_42263;
                                    defunc_1_op_res_42259 =
                                        defunc_1_op_res_f_res_42264;
                                }
                                
                                float defunc_1_op_res_42265 = x_42254 + x_42257;
                                
                                x_42252 = defunc_1_op_res_42258;
                                x_42253 = defunc_1_op_res_42259;
                                x_42254 = defunc_1_op_res_42265;
                            }
                        }
                        // write final result
                        {
                            ((__local
                              bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] =
                                x_42252;
                            ((__local
                              int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] =
                                x_42253;
                            ((__local
                              float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] =
                                x_42254;
                        }
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
                // restore correct values for first block
                {
                    if (squot32(local_tid_46695, 32) == 0) {
                        ((__local
                          bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] =
                            x_42255;
                        ((__local
                          int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] =
                            x_42256;
                        ((__local
                          float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] =
                            x_42257;
                    }
                }
                barrier(CLK_LOCAL_MEM_FENCE);
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        // save final values of segments
        {
            if (slt64(sext_i32_i64(virt_group_id_46707) *
                      squot64(segred_group_sizze_42246,
                              segment_sizze_nonzzero_46692) +
                      sext_i32_i64(local_tid_46695), m_29166) &&
                slt64(sext_i32_i64(local_tid_46695),
                      squot64(segred_group_sizze_42246,
                              segment_sizze_nonzzero_46692))) {
                ((__global
                  bool *) mem_45305)[sext_i32_i64(virt_group_id_46707) *
                                     squot64(segred_group_sizze_42246,
                                             segment_sizze_nonzzero_46692) +
                                     sext_i32_i64(local_tid_46695)] = ((__local
                                                                        bool *) red_arr_mem_46699)[(sext_i32_i64(local_tid_46695) +
                                                                                                    (int64_t) 1) *
                                                                                                   segment_sizze_nonzzero_46692 -
                                                                                                   (int64_t) 1];
                ((__global
                  int32_t *) mem_45307)[sext_i32_i64(virt_group_id_46707) *
                                        squot64(segred_group_sizze_42246,
                                                segment_sizze_nonzzero_46692) +
                                        sext_i32_i64(local_tid_46695)] =
                    ((__local
                      int32_t *) red_arr_mem_46701)[(sext_i32_i64(local_tid_46695) +
                                                     (int64_t) 1) *
                                                    segment_sizze_nonzzero_46692 -
                                                    (int64_t) 1];
                ((__global
                  float *) mem_45309)[sext_i32_i64(virt_group_id_46707) *
                                      squot64(segred_group_sizze_42246,
                                              segment_sizze_nonzzero_46692) +
                                      sext_i32_i64(local_tid_46695)] = ((__local
                                                                         float *) red_arr_mem_46703)[(sext_i32_i64(local_tid_46695) +
                                                                                                      (int64_t) 1) *
                                                                                                     segment_sizze_nonzzero_46692 -
                                                                                                     (int64_t) 1];
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_1:
    return;
    #undef segred_group_sizze_42246
}
__kernel void mainDetailedzicopy_45861(int64_t m_27772, int64_t nm_27920,
                                       int64_t ctx_param_ext_44580,
                                       int64_t ctx_param_ext_44581,
                                       int64_t ctx_param_ext_44583, __global
                                       unsigned char *mem_param_44585, __global
                                       unsigned char *mem_44590)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    int32_t copy_gtid_45861;
    int32_t copy_ltid_45862;
    int32_t copy_gid_45863;
    
    copy_gtid_45861 = get_global_id(0);
    copy_ltid_45862 = get_local_id(0);
    copy_gid_45863 = get_group_id(0);
    if (slt64(sext_i32_i64(copy_gtid_45861), m_27772 * nm_27920)) {
        ((__global float *) mem_44590)[(sext_i32_i64(copy_gtid_45861) -
                                        squot64(sext_i32_i64(copy_gtid_45861),
                                                nm_27920) * nm_27920) *
                                       m_27772 +
                                       squot64(sext_i32_i64(copy_gtid_45861),
                                               nm_27920)] = ((__global
                                                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                                                        (squot64(sext_i32_i64(copy_gtid_45861),
                                                                                                 nm_27920) *
                                                                                         ctx_param_ext_44581 +
                                                                                         (sext_i32_i64(copy_gtid_45861) -
                                                                                          squot64(sext_i32_i64(copy_gtid_45861),
                                                                                                  nm_27920) *
                                                                                          nm_27920) *
                                                                                         ctx_param_ext_44583)];
    }
    
  error_0:
    return;
}
__kernel void mainDetailedziscan_stage1_32356(__global int *global_failure,
                                              __local volatile
                                              int64_t *scan_arr_mem_46273_backing_aligned_0,
                                              int64_t N_27771, int64_t m_27772,
                                              int32_t num_threads_46267,
                                              __global
                                              unsigned char *images_mem_44381,
                                              __global
                                              unsigned char *defunc_3_map_res_mem_45140,
                                              __global unsigned char *mem_45163,
                                              __global unsigned char *mem_45166)
{
    #define segscan_group_sizze_32373 (mainDetailedzisegscan_group_sizze_32350)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46273_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46273_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46268;
    int32_t local_tid_46269;
    int64_t group_sizze_46272;
    int32_t wave_sizze_46271;
    int32_t group_tid_46270;
    
    global_tid_46268 = get_global_id(0);
    local_tid_46269 = get_local_id(0);
    group_sizze_46272 = get_local_size(0);
    wave_sizze_46271 = LOCKSTEP_WIDTH;
    group_tid_46270 = get_group_id(0);
    
    int32_t phys_tid_32356;
    
    phys_tid_32356 = global_tid_46268;
    
    __local char *scan_arr_mem_46273;
    
    scan_arr_mem_46273 = (__local char *) scan_arr_mem_46273_backing_0;
    
    int64_t x_32378;
    int64_t x_32379;
    
    x_32378 = (int64_t) 0;
    for (int64_t j_46275 = 0; j_46275 < sdiv_up64(m_27772 * N_27771,
                                                  sext_i32_i64(num_threads_46267));
         j_46275++) {
        int64_t chunk_offset_46276 = segscan_group_sizze_32373 * j_46275 +
                sext_i32_i64(group_tid_46270) * (segscan_group_sizze_32373 *
                                                 sdiv_up64(m_27772 * N_27771,
                                                           sext_i32_i64(num_threads_46267)));
        int64_t flat_idx_46277 = chunk_offset_46276 +
                sext_i32_i64(local_tid_46269);
        int64_t gtid_32347 = squot64(flat_idx_46277, N_27771);
        int64_t gtid_32355 = flat_idx_46277 - squot64(flat_idx_46277, N_27771) *
                N_27771;
        
        // threads in bounds read input
        {
            if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) {
                float x_32383 = ((__global
                                  float *) images_mem_44381)[gtid_32347 *
                                                             N_27771 +
                                                             gtid_32355];
                bool isnan_res_32385;
                
                isnan_res_32385 = futrts_isnan32(x_32383);
                
                bool cond_32386 = !isnan_res_32385;
                float defunc_1_f_res_32387;
                
                if (cond_32386) {
                    float x_32384 = ((__global
                                      float *) defunc_3_map_res_mem_45140)[gtid_32347 *
                                                                           N_27771 +
                                                                           gtid_32355];
                    float defunc_1_f_res_t_res_32388 = x_32383 - x_32384;
                    
                    defunc_1_f_res_32387 = defunc_1_f_res_t_res_32388;
                } else {
                    defunc_1_f_res_32387 = NAN;
                }
                
                bool isnan_res_32389;
                
                isnan_res_32389 = futrts_isnan32(defunc_1_f_res_32387);
                
                bool defunc_0_p_res_32390 = !isnan_res_32389;
                int64_t defunc_0_f_res_32391 =
                        btoi_bool_i64(defunc_0_p_res_32390);
                
                // write to-scan values to parameters
                {
                    x_32379 = defunc_0_f_res_32391;
                }
                // write mapped values results to global memory
                {
                    ((__global float *) mem_45166)[gtid_32347 * N_27771 +
                                                   gtid_32355] =
                        defunc_1_f_res_32387;
                }
            }
        }
        // do one intra-group scan operation
        {
            // maybe restore some to-scan values to parameters, or read neutral
            {
                if (!(slt64(gtid_32347, m_27772) && slt64(gtid_32355,
                                                          N_27771))) {
                    x_32379 = (int64_t) 0;
                }
            }
            // combine with carry and write to local memory
            {
                int64_t defunc_1_op_res_32380 = add64(x_32378, x_32379);
                
                ((__local
                  int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] =
                    defunc_1_op_res_32380;
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            
            int64_t x_46278;
            int64_t x_46279;
            int64_t x_46281;
            int64_t x_46282;
            bool ltid_in_bounds_46284;
            
            ltid_in_bounds_46284 = slt64(sext_i32_i64(local_tid_46269),
                                         segscan_group_sizze_32373);
            
            int32_t skip_threads_46285;
            
            // read input for in-block scan
            {
                if (ltid_in_bounds_46284) {
                    x_46279 = ((volatile __local
                                int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)];
                    if ((local_tid_46269 - squot32(local_tid_46269, 32) * 32) ==
                        0) {
                        x_46278 = x_46279;
                    }
                }
            }
            // in-block scan (hopefully no barriers needed)
            {
                skip_threads_46285 = 1;
                while (slt32(skip_threads_46285, 32)) {
                    if (sle32(skip_threads_46285, local_tid_46269 -
                              squot32(local_tid_46269, 32) * 32) &&
                        ltid_in_bounds_46284) {
                        // read operands
                        {
                            x_46278 = ((volatile __local
                                        int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269) -
                                                                       sext_i32_i64(skip_threads_46285)];
                        }
                        // perform operation
                        {
                            bool inactive_46286 =
                                 slt64(srem64(sext_i32_i64(local_tid_46269) +
                                              chunk_offset_46276, N_27771),
                                       sext_i32_i64(local_tid_46269) +
                                       chunk_offset_46276 -
                                       (sext_i32_i64(local_tid_46269 -
                                        skip_threads_46285) +
                                        chunk_offset_46276));
                            
                            if (inactive_46286) {
                                x_46278 = x_46279;
                            }
                            if (!inactive_46286) {
                                int64_t defunc_1_op_res_46280 = add64(x_46278,
                                                                      x_46279);
                                
                                x_46278 = defunc_1_op_res_46280;
                            }
                        }
                    }
                    if (sle32(wave_sizze_46271, skip_threads_46285)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    if (sle32(skip_threads_46285, local_tid_46269 -
                              squot32(local_tid_46269, 32) * 32) &&
                        ltid_in_bounds_46284) {
                        // write result
                        {
                            ((volatile __local
                              int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] =
                                x_46278;
                            x_46279 = x_46278;
                        }
                    }
                    if (sle32(wave_sizze_46271, skip_threads_46285)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    skip_threads_46285 *= 2;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // last thread of block 'i' writes its result to offset 'i'
            {
                if ((local_tid_46269 - squot32(local_tid_46269, 32) * 32) ==
                    31 && ltid_in_bounds_46284) {
                    ((volatile __local
                      int64_t *) scan_arr_mem_46273)[sext_i32_i64(squot32(local_tid_46269,
                                                                          32))] =
                        x_46278;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
            {
                int32_t skip_threads_46287;
                
                // read input for in-block scan
                {
                    if (squot32(local_tid_46269, 32) == 0 &&
                        ltid_in_bounds_46284) {
                        x_46282 = ((volatile __local
                                    int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)];
                        if ((local_tid_46269 - squot32(local_tid_46269, 32) *
                             32) == 0) {
                            x_46281 = x_46282;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46287 = 1;
                    while (slt32(skip_threads_46287, 32)) {
                        if (sle32(skip_threads_46287, local_tid_46269 -
                                  squot32(local_tid_46269, 32) * 32) &&
                            (squot32(local_tid_46269, 32) == 0 &&
                             ltid_in_bounds_46284)) {
                            // read operands
                            {
                                x_46281 = ((volatile __local
                                            int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269) -
                                                                           sext_i32_i64(skip_threads_46287)];
                            }
                            // perform operation
                            {
                                bool inactive_46288 =
                                     slt64(srem64(sext_i32_i64(local_tid_46269 *
                                                  32 + 32 - 1) +
                                                  chunk_offset_46276, N_27771),
                                           sext_i32_i64(local_tid_46269 * 32 +
                                           32 - 1) + chunk_offset_46276 -
                                           (sext_i32_i64((local_tid_46269 -
                                                          skip_threads_46287) *
                                            32 + 32 - 1) + chunk_offset_46276));
                                
                                if (inactive_46288) {
                                    x_46281 = x_46282;
                                }
                                if (!inactive_46288) {
                                    int64_t defunc_1_op_res_46283 =
                                            add64(x_46281, x_46282);
                                    
                                    x_46281 = defunc_1_op_res_46283;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46271, skip_threads_46287)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46287, local_tid_46269 -
                                  squot32(local_tid_46269, 32) * 32) &&
                            (squot32(local_tid_46269, 32) == 0 &&
                             ltid_in_bounds_46284)) {
                            // write result
                            {
                                ((volatile __local
                                  int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] =
                                    x_46281;
                                x_46282 = x_46281;
                            }
                        }
                        if (sle32(wave_sizze_46271, skip_threads_46287)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46287 *= 2;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // carry-in for every block except the first
            {
                if (!(squot32(local_tid_46269, 32) == 0 ||
                      !ltid_in_bounds_46284)) {
                    // read operands
                    {
                        x_46279 = x_46278;
                        x_46278 = ((__local
                                    int64_t *) scan_arr_mem_46273)[sext_i32_i64(squot32(local_tid_46269,
                                                                                        32)) -
                                                                   (int64_t) 1];
                    }
                    // perform operation
                    {
                        bool inactive_46289 =
                             slt64(srem64(sext_i32_i64(local_tid_46269) +
                                          chunk_offset_46276, N_27771),
                                   sext_i32_i64(local_tid_46269) +
                                   chunk_offset_46276 -
                                   (sext_i32_i64(squot32(local_tid_46269, 32) *
                                    32 - 1) + chunk_offset_46276));
                        
                        if (inactive_46289) {
                            x_46278 = x_46279;
                        }
                        if (!inactive_46289) {
                            int64_t defunc_1_op_res_46280 = add64(x_46278,
                                                                  x_46279);
                            
                            x_46278 = defunc_1_op_res_46280;
                        }
                    }
                    // write final result
                    {
                        ((__local
                          int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] =
                            x_46278;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // restore correct values for first block
            {
                if (squot32(local_tid_46269, 32) == 0) {
                    ((__local
                      int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] =
                        x_46279;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // threads in bounds write partial scan result
            {
                if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) {
                    ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 +
                                                     gtid_32355] = ((__local
                                                                     int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // first thread reads last element as carry-in for next iteration
            {
                bool crosses_segment_46290 = slt64(srem64(chunk_offset_46276 +
                                                          segscan_group_sizze_32373,
                                                          N_27771),
                                                   chunk_offset_46276 +
                                                   segscan_group_sizze_32373 -
                                                   (chunk_offset_46276 +
                                                    segscan_group_sizze_32373 -
                                                    (int64_t) 1));
                bool should_load_carry_46291 = local_tid_46269 == 0 &&
                     !crosses_segment_46290;
                
                if (should_load_carry_46291) {
                    x_32378 = ((__local
                                int64_t *) scan_arr_mem_46273)[segscan_group_sizze_32373 -
                                                               (int64_t) 1];
                }
                if (!should_load_carry_46291) {
                    x_32378 = (int64_t) 0;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    
  error_1:
    return;
    #undef segscan_group_sizze_32373
}
__kernel void mainDetailedziscan_stage1_34045(__global int *global_failure,
                                              int failure_is_an_option, __global
                                              int64_t *global_failure_args,
                                              __local volatile
                                              int64_t *scan_arr_mem_46697_backing_aligned_0,
                                              int64_t N_27771, int64_t m_27772,
                                              int64_t iota32_arg_28233,
                                              int32_t num_threads_46691,
                                              __global
                                              unsigned char *defunc_4_map_res_mem_45178,
                                              __global
                                              unsigned char *defunc_3_map_res_mem_45244,
                                              __global
                                              unsigned char *defunc_3_map_res_mem_45245,
                                              __global
                                              unsigned char *defunc_0_f_res_mem_45279,
                                              __global unsigned char *mem_45399,
                                              __global unsigned char *mem_45403)
{
    #define segscan_group_sizze_34093 (mainDetailedzisegscan_group_sizze_34039)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46697_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46697_backing_aligned_0;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46692;
    int32_t local_tid_46693;
    int64_t group_sizze_46696;
    int32_t wave_sizze_46695;
    int32_t group_tid_46694;
    
    global_tid_46692 = get_global_id(0);
    local_tid_46693 = get_local_id(0);
    group_sizze_46696 = get_local_size(0);
    wave_sizze_46695 = LOCKSTEP_WIDTH;
    group_tid_46694 = get_group_id(0);
    
    int32_t phys_tid_34045;
    
    phys_tid_34045 = global_tid_46692;
    
    __local char *scan_arr_mem_46697;
    
    scan_arr_mem_46697 = (__local char *) scan_arr_mem_46697_backing_0;
    
    float x_34097;
    float x_34098;
    
    x_34097 = 0.0F;
    for (int64_t j_46699 = 0; j_46699 < sdiv_up64(m_27772 * iota32_arg_28233,
                                                  sext_i32_i64(num_threads_46691));
         j_46699++) {
        int64_t chunk_offset_46700 = segscan_group_sizze_34093 * j_46699 +
                sext_i32_i64(group_tid_46694) * (segscan_group_sizze_34093 *
                                                 sdiv_up64(m_27772 *
                                                           iota32_arg_28233,
                                                           sext_i32_i64(num_threads_46691)));
        int64_t flat_idx_46701 = chunk_offset_46700 +
                sext_i32_i64(local_tid_46693);
        int64_t gtid_34036 = squot64(flat_idx_46701, iota32_arg_28233);
        int64_t gtid_34044 = flat_idx_46701 - squot64(flat_idx_46701,
                                                      iota32_arg_28233) *
                iota32_arg_28233;
        
        // threads in bounds read input
        {
            if (slt64(gtid_34036, m_27772) && slt64(gtid_34044,
                                                    iota32_arg_28233)) {
                int32_t y_34104 = ((__global int32_t *) mem_45399)[gtid_34036];
                int32_t index_primexp_42411 = sext_i64_i32(gtid_34044);
                bool cond_34106 = sle32(y_34104, index_primexp_42411);
                float defunc_0_f_res_34107;
                
                if (cond_34106) {
                    defunc_0_f_res_34107 = 0.0F;
                } else {
                    int32_t x_34100 = ((__global
                                        int32_t *) defunc_3_map_res_mem_45245)[gtid_34036];
                    int32_t x_34101 = ((__global
                                        int32_t *) defunc_3_map_res_mem_45244)[gtid_34036];
                    float x_34102 = ((__global
                                      float *) defunc_0_f_res_mem_45279)[gtid_34036];
                    bool cond_34108 = index_primexp_42411 == 0;
                    float defunc_0_f_res_f_res_34109;
                    
                    if (cond_34108) {
                        defunc_0_f_res_f_res_34109 = x_34102;
                    } else {
                        int32_t i_34110 = add32(x_34100, index_primexp_42411);
                        int64_t i_34111 = sext_i32_i64(i_34110);
                        bool x_34112 = sle64((int64_t) 0, i_34111);
                        bool y_34113 = slt64(i_34111, N_27771);
                        bool bounds_check_34114 = x_34112 && y_34113;
                        bool index_certs_34115;
                        
                        if (!bounds_check_34114) {
                            {
                                if (atomic_cmpxchg_i32_global(global_failure,
                                                              -1, 75) == -1) {
                                    global_failure_args[0] = i_34111;
                                    global_failure_args[1] = N_27771;
                                    ;
                                }
                                local_failure = true;
                                goto error_0;
                            }
                        }
                        
                        float x_34116 = ((__global
                                          float *) defunc_4_map_res_mem_45178)[gtid_34036 *
                                                                               N_27771 +
                                                                               i_34111];
                        int32_t x_34117 = sub32(x_34100, x_34101);
                        int32_t i_34118 = add32(x_34117, index_primexp_42411);
                        int64_t i_34119 = sext_i32_i64(i_34118);
                        bool x_34120 = sle64((int64_t) 0, i_34119);
                        bool y_34121 = slt64(i_34119, N_27771);
                        bool bounds_check_34122 = x_34120 && y_34121;
                        bool index_certs_34123;
                        
                        if (!bounds_check_34122) {
                            {
                                if (atomic_cmpxchg_i32_global(global_failure,
                                                              -1, 76) == -1) {
                                    global_failure_args[0] = i_34119;
                                    global_failure_args[1] = N_27771;
                                    ;
                                }
                                local_failure = true;
                                goto error_0;
                            }
                        }
                        
                        float y_34124 = ((__global
                                          float *) defunc_4_map_res_mem_45178)[gtid_34036 *
                                                                               N_27771 +
                                                                               i_34119];
                        float defunc_0_f_res_f_res_f_res_34125 = x_34116 -
                              y_34124;
                        
                        defunc_0_f_res_f_res_34109 =
                            defunc_0_f_res_f_res_f_res_34125;
                    }
                    defunc_0_f_res_34107 = defunc_0_f_res_f_res_34109;
                }
                // write to-scan values to parameters
                {
                    x_34098 = defunc_0_f_res_34107;
                }
                // write mapped values results to global memory
                { }
            }
        }
        // do one intra-group scan operation
        {
            // maybe restore some to-scan values to parameters, or read neutral
            {
                if (!(slt64(gtid_34036, m_27772) && slt64(gtid_34044,
                                                          iota32_arg_28233))) {
                    x_34098 = 0.0F;
                }
            }
            // combine with carry and write to local memory
            {
                float defunc_1_op_res_34099 = x_34097 + x_34098;
                
                ((__local
                  float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] =
                    defunc_1_op_res_34099;
            }
            
          error_0:
            barrier(CLK_LOCAL_MEM_FENCE);
            if (local_failure)
                return;
            barrier(CLK_LOCAL_MEM_FENCE);
            
            float x_46702;
            float x_46703;
            float x_46705;
            float x_46706;
            bool ltid_in_bounds_46708;
            
            ltid_in_bounds_46708 = slt64(sext_i32_i64(local_tid_46693),
                                         segscan_group_sizze_34093);
            
            int32_t skip_threads_46709;
            
            // read input for in-block scan
            {
                if (ltid_in_bounds_46708) {
                    x_46703 = ((volatile __local
                                float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)];
                    if ((local_tid_46693 - squot32(local_tid_46693, 32) * 32) ==
                        0) {
                        x_46702 = x_46703;
                    }
                }
            }
            // in-block scan (hopefully no barriers needed)
            {
                skip_threads_46709 = 1;
                while (slt32(skip_threads_46709, 32)) {
                    if (sle32(skip_threads_46709, local_tid_46693 -
                              squot32(local_tid_46693, 32) * 32) &&
                        ltid_in_bounds_46708) {
                        // read operands
                        {
                            x_46702 = ((volatile __local
                                        float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693) -
                                                                     sext_i32_i64(skip_threads_46709)];
                        }
                        // perform operation
                        {
                            bool inactive_46710 =
                                 slt64(srem64(sext_i32_i64(local_tid_46693) +
                                              chunk_offset_46700,
                                              iota32_arg_28233),
                                       sext_i32_i64(local_tid_46693) +
                                       chunk_offset_46700 -
                                       (sext_i32_i64(local_tid_46693 -
                                        skip_threads_46709) +
                                        chunk_offset_46700));
                            
                            if (inactive_46710) {
                                x_46702 = x_46703;
                            }
                            if (!inactive_46710) {
                                float defunc_1_op_res_46704 = x_46702 + x_46703;
                                
                                x_46702 = defunc_1_op_res_46704;
                            }
                        }
                    }
                    if (sle32(wave_sizze_46695, skip_threads_46709)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    if (sle32(skip_threads_46709, local_tid_46693 -
                              squot32(local_tid_46693, 32) * 32) &&
                        ltid_in_bounds_46708) {
                        // write result
                        {
                            ((volatile __local
                              float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] =
                                x_46702;
                            x_46703 = x_46702;
                        }
                    }
                    if (sle32(wave_sizze_46695, skip_threads_46709)) {
                        barrier(CLK_LOCAL_MEM_FENCE);
                    }
                    skip_threads_46709 *= 2;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // last thread of block 'i' writes its result to offset 'i'
            {
                if ((local_tid_46693 - squot32(local_tid_46693, 32) * 32) ==
                    31 && ltid_in_bounds_46708) {
                    ((volatile __local
                      float *) scan_arr_mem_46697)[sext_i32_i64(squot32(local_tid_46693,
                                                                        32))] =
                        x_46702;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
            {
                int32_t skip_threads_46711;
                
                // read input for in-block scan
                {
                    if (squot32(local_tid_46693, 32) == 0 &&
                        ltid_in_bounds_46708) {
                        x_46706 = ((volatile __local
                                    float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)];
                        if ((local_tid_46693 - squot32(local_tid_46693, 32) *
                             32) == 0) {
                            x_46705 = x_46706;
                        }
                    }
                }
                // in-block scan (hopefully no barriers needed)
                {
                    skip_threads_46711 = 1;
                    while (slt32(skip_threads_46711, 32)) {
                        if (sle32(skip_threads_46711, local_tid_46693 -
                                  squot32(local_tid_46693, 32) * 32) &&
                            (squot32(local_tid_46693, 32) == 0 &&
                             ltid_in_bounds_46708)) {
                            // read operands
                            {
                                x_46705 = ((volatile __local
                                            float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693) -
                                                                         sext_i32_i64(skip_threads_46711)];
                            }
                            // perform operation
                            {
                                bool inactive_46712 =
                                     slt64(srem64(sext_i32_i64(local_tid_46693 *
                                                  32 + 32 - 1) +
                                                  chunk_offset_46700,
                                                  iota32_arg_28233),
                                           sext_i32_i64(local_tid_46693 * 32 +
                                           32 - 1) + chunk_offset_46700 -
                                           (sext_i32_i64((local_tid_46693 -
                                                          skip_threads_46711) *
                                            32 + 32 - 1) + chunk_offset_46700));
                                
                                if (inactive_46712) {
                                    x_46705 = x_46706;
                                }
                                if (!inactive_46712) {
                                    float defunc_1_op_res_46707 = x_46705 +
                                          x_46706;
                                    
                                    x_46705 = defunc_1_op_res_46707;
                                }
                            }
                        }
                        if (sle32(wave_sizze_46695, skip_threads_46711)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        if (sle32(skip_threads_46711, local_tid_46693 -
                                  squot32(local_tid_46693, 32) * 32) &&
                            (squot32(local_tid_46693, 32) == 0 &&
                             ltid_in_bounds_46708)) {
                            // write result
                            {
                                ((volatile __local
                                  float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] =
                                    x_46705;
                                x_46706 = x_46705;
                            }
                        }
                        if (sle32(wave_sizze_46695, skip_threads_46711)) {
                            barrier(CLK_LOCAL_MEM_FENCE);
                        }
                        skip_threads_46711 *= 2;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // carry-in for every block except the first
            {
                if (!(squot32(local_tid_46693, 32) == 0 ||
                      !ltid_in_bounds_46708)) {
                    // read operands
                    {
                        x_46703 = x_46702;
                        x_46702 = ((__local
                                    float *) scan_arr_mem_46697)[sext_i32_i64(squot32(local_tid_46693,
                                                                                      32)) -
                                                                 (int64_t) 1];
                    }
                    // perform operation
                    {
                        bool inactive_46713 =
                             slt64(srem64(sext_i32_i64(local_tid_46693) +
                                          chunk_offset_46700, iota32_arg_28233),
                                   sext_i32_i64(local_tid_46693) +
                                   chunk_offset_46700 -
                                   (sext_i32_i64(squot32(local_tid_46693, 32) *
                                    32 - 1) + chunk_offset_46700));
                        
                        if (inactive_46713) {
                            x_46702 = x_46703;
                        }
                        if (!inactive_46713) {
                            float defunc_1_op_res_46704 = x_46702 + x_46703;
                            
                            x_46702 = defunc_1_op_res_46704;
                        }
                    }
                    // write final result
                    {
                        ((__local
                          float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] =
                            x_46702;
                    }
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // restore correct values for first block
            {
                if (squot32(local_tid_46693, 32) == 0) {
                    ((__local
                      float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] =
                        x_46703;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // threads in bounds write partial scan result
            {
                if (slt64(gtid_34036, m_27772) && slt64(gtid_34044,
                                                        iota32_arg_28233)) {
                    ((__global float *) mem_45403)[gtid_34036 *
                                                   iota32_arg_28233 +
                                                   gtid_34044] = ((__local
                                                                   float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)];
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            // first thread reads last element as carry-in for next iteration
            {
                bool crosses_segment_46714 = slt64(srem64(chunk_offset_46700 +
                                                          segscan_group_sizze_34093,
                                                          iota32_arg_28233),
                                                   chunk_offset_46700 +
                                                   segscan_group_sizze_34093 -
                                                   (chunk_offset_46700 +
                                                    segscan_group_sizze_34093 -
                                                    (int64_t) 1));
                bool should_load_carry_46715 = local_tid_46693 == 0 &&
                     !crosses_segment_46714;
                
                if (should_load_carry_46715) {
                    x_34097 = ((__local
                                float *) scan_arr_mem_46697)[segscan_group_sizze_34093 -
                                                             (int64_t) 1];
                }
                if (!should_load_carry_46715) {
                    x_34097 = 0.0F;
                }
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }
    
  error_1:
    return;
    #undef segscan_group_sizze_34093
}
__kernel void mainDetailedziscan_stage2_32356(__global int *global_failure,
                                              __local volatile
                                              int64_t *scan_arr_mem_46297_backing_aligned_0,
                                              int64_t N_27771, int64_t m_27772,
                                              int64_t stage1_num_groups_46266,
                                              int32_t num_threads_46267,
                                              __global unsigned char *mem_45163)
{
    #define segscan_group_sizze_32373 (mainDetailedzisegscan_group_sizze_32350)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46297_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46297_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46292;
    int32_t local_tid_46293;
    int64_t group_sizze_46296;
    int32_t wave_sizze_46295;
    int32_t group_tid_46294;
    
    global_tid_46292 = get_global_id(0);
    local_tid_46293 = get_local_id(0);
    group_sizze_46296 = get_local_size(0);
    wave_sizze_46295 = LOCKSTEP_WIDTH;
    group_tid_46294 = get_group_id(0);
    
    int32_t phys_tid_32356;
    
    phys_tid_32356 = global_tid_46292;
    
    __local char *scan_arr_mem_46297;
    
    scan_arr_mem_46297 = (__local char *) scan_arr_mem_46297_backing_0;
    
    int64_t flat_idx_46299;
    
    flat_idx_46299 = (sext_i32_i64(local_tid_46293) + (int64_t) 1) *
        (segscan_group_sizze_32373 * sdiv_up64(m_27772 * N_27771,
                                               sext_i32_i64(num_threads_46267))) -
        (int64_t) 1;
    
    int64_t gtid_32347;
    
    gtid_32347 = squot64(flat_idx_46299, N_27771);
    
    int64_t gtid_32355;
    
    gtid_32355 = flat_idx_46299 - squot64(flat_idx_46299, N_27771) * N_27771;
    // threads in bound read carries; others get neutral element
    {
        if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) {
            ((__local
              int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] =
                ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 +
                                                 gtid_32355];
        } else {
            ((__local
              int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] =
                (int64_t) 0;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t x_32378;
    int64_t x_32379;
    int64_t x_46300;
    int64_t x_46301;
    bool ltid_in_bounds_46303;
    
    ltid_in_bounds_46303 = slt64(sext_i32_i64(local_tid_46293),
                                 stage1_num_groups_46266);
    
    int32_t skip_threads_46304;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46303) {
            x_32379 = ((volatile __local
                        int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)];
            if ((local_tid_46293 - squot32(local_tid_46293, 32) * 32) == 0) {
                x_32378 = x_32379;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46304 = 1;
        while (slt32(skip_threads_46304, 32)) {
            if (sle32(skip_threads_46304, local_tid_46293 -
                      squot32(local_tid_46293, 32) * 32) &&
                ltid_in_bounds_46303) {
                // read operands
                {
                    x_32378 = ((volatile __local
                                int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293) -
                                                               sext_i32_i64(skip_threads_46304)];
                }
                // perform operation
                {
                    bool inactive_46305 =
                         slt64(srem64((sext_i32_i64(local_tid_46293) +
                                       (int64_t) 1) *
                                      (segscan_group_sizze_32373 *
                                       sdiv_up64(m_27772 * N_27771,
                                                 sext_i32_i64(num_threads_46267))) -
                                      (int64_t) 1, N_27771),
                               (sext_i32_i64(local_tid_46293) + (int64_t) 1) *
                               (segscan_group_sizze_32373 * sdiv_up64(m_27772 *
                                                                      N_27771,
                                                                      sext_i32_i64(num_threads_46267))) -
                               (int64_t) 1 - ((sext_i32_i64(local_tid_46293 -
                                               skip_threads_46304) +
                                               (int64_t) 1) *
                                              (segscan_group_sizze_32373 *
                                               sdiv_up64(m_27772 * N_27771,
                                                         sext_i32_i64(num_threads_46267))) -
                                              (int64_t) 1));
                    
                    if (inactive_46305) {
                        x_32378 = x_32379;
                    }
                    if (!inactive_46305) {
                        int64_t defunc_1_op_res_32380 = add64(x_32378, x_32379);
                        
                        x_32378 = defunc_1_op_res_32380;
                    }
                }
            }
            if (sle32(wave_sizze_46295, skip_threads_46304)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46304, local_tid_46293 -
                      squot32(local_tid_46293, 32) * 32) &&
                ltid_in_bounds_46303) {
                // write result
                {
                    ((volatile __local
                      int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] =
                        x_32378;
                    x_32379 = x_32378;
                }
            }
            if (sle32(wave_sizze_46295, skip_threads_46304)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46304 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46293 - squot32(local_tid_46293, 32) * 32) == 31 &&
            ltid_in_bounds_46303) {
            ((volatile __local
              int64_t *) scan_arr_mem_46297)[sext_i32_i64(squot32(local_tid_46293,
                                                                  32))] =
                x_32378;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46306;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46293, 32) == 0 && ltid_in_bounds_46303) {
                x_46301 = ((volatile __local
                            int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)];
                if ((local_tid_46293 - squot32(local_tid_46293, 32) * 32) ==
                    0) {
                    x_46300 = x_46301;
                }
            }
        }
        // in-block scan (hopefully no barriers needed)
        {
            skip_threads_46306 = 1;
            while (slt32(skip_threads_46306, 32)) {
                if (sle32(skip_threads_46306, local_tid_46293 -
                          squot32(local_tid_46293, 32) * 32) &&
                    (squot32(local_tid_46293, 32) == 0 &&
                     ltid_in_bounds_46303)) {
                    // read operands
                    {
                        x_46300 = ((volatile __local
                                    int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293) -
                                                                   sext_i32_i64(skip_threads_46306)];
                    }
                    // perform operation
                    {
                        bool inactive_46307 =
                             slt64(srem64((sext_i32_i64(local_tid_46293 * 32 +
                                           32 - 1) + (int64_t) 1) *
                                          (segscan_group_sizze_32373 *
                                           sdiv_up64(m_27772 * N_27771,
                                                     sext_i32_i64(num_threads_46267))) -
                                          (int64_t) 1, N_27771),
                                   (sext_i32_i64(local_tid_46293 * 32 + 32 -
                                    1) + (int64_t) 1) *
                                   (segscan_group_sizze_32373 *
                                    sdiv_up64(m_27772 * N_27771,
                                              sext_i32_i64(num_threads_46267))) -
                                   (int64_t) 1 -
                                   ((sext_i32_i64((local_tid_46293 -
                                                   skip_threads_46306) * 32 +
                                     32 - 1) + (int64_t) 1) *
                                    (segscan_group_sizze_32373 *
                                     sdiv_up64(m_27772 * N_27771,
                                               sext_i32_i64(num_threads_46267))) -
                                    (int64_t) 1));
                        
                        if (inactive_46307) {
                            x_46300 = x_46301;
                        }
                        if (!inactive_46307) {
                            int64_t defunc_1_op_res_46302 = add64(x_46300,
                                                                  x_46301);
                            
                            x_46300 = defunc_1_op_res_46302;
                        }
                    }
                }
                if (sle32(wave_sizze_46295, skip_threads_46306)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                if (sle32(skip_threads_46306, local_tid_46293 -
                          squot32(local_tid_46293, 32) * 32) &&
                    (squot32(local_tid_46293, 32) == 0 &&
                     ltid_in_bounds_46303)) {
                    // write result
                    {
                        ((volatile __local
                          int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] =
                            x_46300;
                        x_46301 = x_46300;
                    }
                }
                if (sle32(wave_sizze_46295, skip_threads_46306)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                skip_threads_46306 *= 2;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // carry-in for every block except the first
    {
        if (!(squot32(local_tid_46293, 32) == 0 || !ltid_in_bounds_46303)) {
            // read operands
            {
                x_32379 = x_32378;
                x_32378 = ((__local
                            int64_t *) scan_arr_mem_46297)[sext_i32_i64(squot32(local_tid_46293,
                                                                                32)) -
                                                           (int64_t) 1];
            }
            // perform operation
            {
                bool inactive_46308 =
                     slt64(srem64((sext_i32_i64(local_tid_46293) +
                                   (int64_t) 1) * (segscan_group_sizze_32373 *
                                                   sdiv_up64(m_27772 * N_27771,
                                                             sext_i32_i64(num_threads_46267))) -
                                  (int64_t) 1, N_27771),
                           (sext_i32_i64(local_tid_46293) + (int64_t) 1) *
                           (segscan_group_sizze_32373 * sdiv_up64(m_27772 *
                                                                  N_27771,
                                                                  sext_i32_i64(num_threads_46267))) -
                           (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46293,
                                                                32) * 32 - 1) +
                                           (int64_t) 1) *
                                          (segscan_group_sizze_32373 *
                                           sdiv_up64(m_27772 * N_27771,
                                                     sext_i32_i64(num_threads_46267))) -
                                          (int64_t) 1));
                
                if (inactive_46308) {
                    x_32378 = x_32379;
                }
                if (!inactive_46308) {
                    int64_t defunc_1_op_res_32380 = add64(x_32378, x_32379);
                    
                    x_32378 = defunc_1_op_res_32380;
                }
            }
            // write final result
            {
                ((__local
                  int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] =
                    x_32378;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // restore correct values for first block
    {
        if (squot32(local_tid_46293, 32) == 0) {
            ((__local
              int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] =
                x_32379;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // threads in bounds write scanned carries
    {
        if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) {
            ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 +
                                             gtid_32355] = ((__local
                                                             int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)];
        }
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_32373
}
__kernel void mainDetailedziscan_stage2_34045(__global int *global_failure,
                                              __local volatile
                                              int64_t *scan_arr_mem_46721_backing_aligned_0,
                                              int64_t m_27772,
                                              int64_t iota32_arg_28233,
                                              int64_t stage1_num_groups_46690,
                                              int32_t num_threads_46691,
                                              __global unsigned char *mem_45403)
{
    #define segscan_group_sizze_34093 (mainDetailedzisegscan_group_sizze_34039)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict scan_arr_mem_46721_backing_0 =
                          (__local volatile
                           char *) scan_arr_mem_46721_backing_aligned_0;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46716;
    int32_t local_tid_46717;
    int64_t group_sizze_46720;
    int32_t wave_sizze_46719;
    int32_t group_tid_46718;
    
    global_tid_46716 = get_global_id(0);
    local_tid_46717 = get_local_id(0);
    group_sizze_46720 = get_local_size(0);
    wave_sizze_46719 = LOCKSTEP_WIDTH;
    group_tid_46718 = get_group_id(0);
    
    int32_t phys_tid_34045;
    
    phys_tid_34045 = global_tid_46716;
    
    __local char *scan_arr_mem_46721;
    
    scan_arr_mem_46721 = (__local char *) scan_arr_mem_46721_backing_0;
    
    int64_t flat_idx_46723;
    
    flat_idx_46723 = (sext_i32_i64(local_tid_46717) + (int64_t) 1) *
        (segscan_group_sizze_34093 * sdiv_up64(m_27772 * iota32_arg_28233,
                                               sext_i32_i64(num_threads_46691))) -
        (int64_t) 1;
    
    int64_t gtid_34036;
    
    gtid_34036 = squot64(flat_idx_46723, iota32_arg_28233);
    
    int64_t gtid_34044;
    
    gtid_34044 = flat_idx_46723 - squot64(flat_idx_46723, iota32_arg_28233) *
        iota32_arg_28233;
    // threads in bound read carries; others get neutral element
    {
        if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, iota32_arg_28233)) {
            ((__local
              float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] =
                ((__global float *) mem_45403)[gtid_34036 * iota32_arg_28233 +
                                               gtid_34044];
        } else {
            ((__local
              float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] =
                0.0F;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
    float x_34097;
    float x_34098;
    float x_46724;
    float x_46725;
    bool ltid_in_bounds_46727;
    
    ltid_in_bounds_46727 = slt64(sext_i32_i64(local_tid_46717),
                                 stage1_num_groups_46690);
    
    int32_t skip_threads_46728;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46727) {
            x_34098 = ((volatile __local
                        float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)];
            if ((local_tid_46717 - squot32(local_tid_46717, 32) * 32) == 0) {
                x_34097 = x_34098;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46728 = 1;
        while (slt32(skip_threads_46728, 32)) {
            if (sle32(skip_threads_46728, local_tid_46717 -
                      squot32(local_tid_46717, 32) * 32) &&
                ltid_in_bounds_46727) {
                // read operands
                {
                    x_34097 = ((volatile __local
                                float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717) -
                                                             sext_i32_i64(skip_threads_46728)];
                }
                // perform operation
                {
                    bool inactive_46729 =
                         slt64(srem64((sext_i32_i64(local_tid_46717) +
                                       (int64_t) 1) *
                                      (segscan_group_sizze_34093 *
                                       sdiv_up64(m_27772 * iota32_arg_28233,
                                                 sext_i32_i64(num_threads_46691))) -
                                      (int64_t) 1, iota32_arg_28233),
                               (sext_i32_i64(local_tid_46717) + (int64_t) 1) *
                               (segscan_group_sizze_34093 * sdiv_up64(m_27772 *
                                                                      iota32_arg_28233,
                                                                      sext_i32_i64(num_threads_46691))) -
                               (int64_t) 1 - ((sext_i32_i64(local_tid_46717 -
                                               skip_threads_46728) +
                                               (int64_t) 1) *
                                              (segscan_group_sizze_34093 *
                                               sdiv_up64(m_27772 *
                                                         iota32_arg_28233,
                                                         sext_i32_i64(num_threads_46691))) -
                                              (int64_t) 1));
                    
                    if (inactive_46729) {
                        x_34097 = x_34098;
                    }
                    if (!inactive_46729) {
                        float defunc_1_op_res_34099 = x_34097 + x_34098;
                        
                        x_34097 = defunc_1_op_res_34099;
                    }
                }
            }
            if (sle32(wave_sizze_46719, skip_threads_46728)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46728, local_tid_46717 -
                      squot32(local_tid_46717, 32) * 32) &&
                ltid_in_bounds_46727) {
                // write result
                {
                    ((volatile __local
                      float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] =
                        x_34097;
                    x_34098 = x_34097;
                }
            }
            if (sle32(wave_sizze_46719, skip_threads_46728)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46728 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46717 - squot32(local_tid_46717, 32) * 32) == 31 &&
            ltid_in_bounds_46727) {
            ((volatile __local
              float *) scan_arr_mem_46721)[sext_i32_i64(squot32(local_tid_46717,
                                                                32))] = x_34097;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46730;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46717, 32) == 0 && ltid_in_bounds_46727) {
                x_46725 = ((volatile __local
                            float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)];
                if ((local_tid_46717 - squot32(local_tid_46717, 32) * 32) ==
                    0) {
                    x_46724 = x_46725;
                }
            }
        }
        // in-block scan (hopefully no barriers needed)
        {
            skip_threads_46730 = 1;
            while (slt32(skip_threads_46730, 32)) {
                if (sle32(skip_threads_46730, local_tid_46717 -
                          squot32(local_tid_46717, 32) * 32) &&
                    (squot32(local_tid_46717, 32) == 0 &&
                     ltid_in_bounds_46727)) {
                    // read operands
                    {
                        x_46724 = ((volatile __local
                                    float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717) -
                                                                 sext_i32_i64(skip_threads_46730)];
                    }
                    // perform operation
                    {
                        bool inactive_46731 =
                             slt64(srem64((sext_i32_i64(local_tid_46717 * 32 +
                                           32 - 1) + (int64_t) 1) *
                                          (segscan_group_sizze_34093 *
                                           sdiv_up64(m_27772 * iota32_arg_28233,
                                                     sext_i32_i64(num_threads_46691))) -
                                          (int64_t) 1, iota32_arg_28233),
                                   (sext_i32_i64(local_tid_46717 * 32 + 32 -
                                    1) + (int64_t) 1) *
                                   (segscan_group_sizze_34093 *
                                    sdiv_up64(m_27772 * iota32_arg_28233,
                                              sext_i32_i64(num_threads_46691))) -
                                   (int64_t) 1 -
                                   ((sext_i32_i64((local_tid_46717 -
                                                   skip_threads_46730) * 32 +
                                     32 - 1) + (int64_t) 1) *
                                    (segscan_group_sizze_34093 *
                                     sdiv_up64(m_27772 * iota32_arg_28233,
                                               sext_i32_i64(num_threads_46691))) -
                                    (int64_t) 1));
                        
                        if (inactive_46731) {
                            x_46724 = x_46725;
                        }
                        if (!inactive_46731) {
                            float defunc_1_op_res_46726 = x_46724 + x_46725;
                            
                            x_46724 = defunc_1_op_res_46726;
                        }
                    }
                }
                if (sle32(wave_sizze_46719, skip_threads_46730)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                if (sle32(skip_threads_46730, local_tid_46717 -
                          squot32(local_tid_46717, 32) * 32) &&
                    (squot32(local_tid_46717, 32) == 0 &&
                     ltid_in_bounds_46727)) {
                    // write result
                    {
                        ((volatile __local
                          float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] =
                            x_46724;
                        x_46725 = x_46724;
                    }
                }
                if (sle32(wave_sizze_46719, skip_threads_46730)) {
                    barrier(CLK_LOCAL_MEM_FENCE);
                }
                skip_threads_46730 *= 2;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // carry-in for every block except the first
    {
        if (!(squot32(local_tid_46717, 32) == 0 || !ltid_in_bounds_46727)) {
            // read operands
            {
                x_34098 = x_34097;
                x_34097 = ((__local
                            float *) scan_arr_mem_46721)[sext_i32_i64(squot32(local_tid_46717,
                                                                              32)) -
                                                         (int64_t) 1];
            }
            // perform operation
            {
                bool inactive_46732 =
                     slt64(srem64((sext_i32_i64(local_tid_46717) +
                                   (int64_t) 1) * (segscan_group_sizze_34093 *
                                                   sdiv_up64(m_27772 *
                                                             iota32_arg_28233,
                                                             sext_i32_i64(num_threads_46691))) -
                                  (int64_t) 1, iota32_arg_28233),
                           (sext_i32_i64(local_tid_46717) + (int64_t) 1) *
                           (segscan_group_sizze_34093 * sdiv_up64(m_27772 *
                                                                  iota32_arg_28233,
                                                                  sext_i32_i64(num_threads_46691))) -
                           (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46717,
                                                                32) * 32 - 1) +
                                           (int64_t) 1) *
                                          (segscan_group_sizze_34093 *
                                           sdiv_up64(m_27772 * iota32_arg_28233,
                                                     sext_i32_i64(num_threads_46691))) -
                                          (int64_t) 1));
                
                if (inactive_46732) {
                    x_34097 = x_34098;
                }
                if (!inactive_46732) {
                    float defunc_1_op_res_34099 = x_34097 + x_34098;
                    
                    x_34097 = defunc_1_op_res_34099;
                }
            }
            // write final result
            {
                ((__local
                  float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] =
                    x_34097;
            }
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // restore correct values for first block
    {
        if (squot32(local_tid_46717, 32) == 0) {
            ((__local
              float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] =
                x_34098;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // threads in bounds write scanned carries
    {
        if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, iota32_arg_28233)) {
            ((__global float *) mem_45403)[gtid_34036 * iota32_arg_28233 +
                                           gtid_34044] = ((__local
                                                           float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)];
        }
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_34093
}
__kernel void mainDetailedziscan_stage3_32356(__global int *global_failure,
                                              int64_t N_27771, int64_t m_27772,
                                              int64_t num_groups_32374,
                                              int32_t num_threads_46267,
                                              int32_t required_groups_46309,
                                              __global unsigned char *mem_45163)
{
    #define segscan_group_sizze_32373 (mainDetailedzisegscan_group_sizze_32350)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46310;
    int32_t local_tid_46311;
    int64_t group_sizze_46314;
    int32_t wave_sizze_46313;
    int32_t group_tid_46312;
    
    global_tid_46310 = get_global_id(0);
    local_tid_46311 = get_local_id(0);
    group_sizze_46314 = get_local_size(0);
    wave_sizze_46313 = LOCKSTEP_WIDTH;
    group_tid_46312 = get_group_id(0);
    
    int32_t phys_tid_32356;
    
    phys_tid_32356 = global_tid_46310;
    
    int32_t phys_group_id_46315;
    
    phys_group_id_46315 = get_group_id(0);
    for (int32_t i_46316 = 0; i_46316 < sdiv_up32(required_groups_46309 -
                                                  phys_group_id_46315,
                                                  sext_i64_i32(num_groups_32374));
         i_46316++) {
        int32_t virt_group_id_46317 = phys_group_id_46315 + i_46316 *
                sext_i64_i32(num_groups_32374);
        int64_t flat_idx_46318 = sext_i32_i64(virt_group_id_46317) *
                segscan_group_sizze_32373 + sext_i32_i64(local_tid_46311);
        int64_t gtid_32347 = squot64(flat_idx_46318, N_27771);
        int64_t gtid_32355 = flat_idx_46318 - squot64(flat_idx_46318, N_27771) *
                N_27771;
        int64_t orig_group_46319 = squot64(flat_idx_46318,
                                           segscan_group_sizze_32373 *
                                           sdiv_up64(m_27772 * N_27771,
                                                     sext_i32_i64(num_threads_46267)));
        int64_t carry_in_flat_idx_46320 = orig_group_46319 *
                (segscan_group_sizze_32373 * sdiv_up64(m_27772 * N_27771,
                                                       sext_i32_i64(num_threads_46267))) -
                (int64_t) 1;
        
        if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) {
            if (!(orig_group_46319 == (int64_t) 0 || (flat_idx_46318 ==
                                                      (orig_group_46319 +
                                                       (int64_t) 1) *
                                                      (segscan_group_sizze_32373 *
                                                       sdiv_up64(m_27772 *
                                                                 N_27771,
                                                                 sext_i32_i64(num_threads_46267))) -
                                                      (int64_t) 1 ||
                                                      slt64(srem64(flat_idx_46318,
                                                                   N_27771),
                                                            flat_idx_46318 -
                                                            carry_in_flat_idx_46320)))) {
                int64_t x_32378;
                int64_t x_32379;
                
                x_32378 = ((__global
                            int64_t *) mem_45163)[squot64(carry_in_flat_idx_46320,
                                                          N_27771) * N_27771 +
                                                  (carry_in_flat_idx_46320 -
                                                   squot64(carry_in_flat_idx_46320,
                                                           N_27771) * N_27771)];
                x_32379 = ((__global int64_t *) mem_45163)[gtid_32347 *
                                                           N_27771 +
                                                           gtid_32355];
                
                int64_t defunc_1_op_res_32380;
                
                defunc_1_op_res_32380 = add64(x_32378, x_32379);
                x_32378 = defunc_1_op_res_32380;
                ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 +
                                                 gtid_32355] = x_32378;
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_32373
}
__kernel void mainDetailedziscan_stage3_34045(__global int *global_failure,
                                              int64_t m_27772,
                                              int64_t iota32_arg_28233,
                                              int64_t num_groups_34094,
                                              int32_t num_threads_46691,
                                              int32_t required_groups_46733,
                                              __global unsigned char *mem_45403)
{
    #define segscan_group_sizze_34093 (mainDetailedzisegscan_group_sizze_34039)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46734;
    int32_t local_tid_46735;
    int64_t group_sizze_46738;
    int32_t wave_sizze_46737;
    int32_t group_tid_46736;
    
    global_tid_46734 = get_global_id(0);
    local_tid_46735 = get_local_id(0);
    group_sizze_46738 = get_local_size(0);
    wave_sizze_46737 = LOCKSTEP_WIDTH;
    group_tid_46736 = get_group_id(0);
    
    int32_t phys_tid_34045;
    
    phys_tid_34045 = global_tid_46734;
    
    int32_t phys_group_id_46739;
    
    phys_group_id_46739 = get_group_id(0);
    for (int32_t i_46740 = 0; i_46740 < sdiv_up32(required_groups_46733 -
                                                  phys_group_id_46739,
                                                  sext_i64_i32(num_groups_34094));
         i_46740++) {
        int32_t virt_group_id_46741 = phys_group_id_46739 + i_46740 *
                sext_i64_i32(num_groups_34094);
        int64_t flat_idx_46742 = sext_i32_i64(virt_group_id_46741) *
                segscan_group_sizze_34093 + sext_i32_i64(local_tid_46735);
        int64_t gtid_34036 = squot64(flat_idx_46742, iota32_arg_28233);
        int64_t gtid_34044 = flat_idx_46742 - squot64(flat_idx_46742,
                                                      iota32_arg_28233) *
                iota32_arg_28233;
        int64_t orig_group_46743 = squot64(flat_idx_46742,
                                           segscan_group_sizze_34093 *
                                           sdiv_up64(m_27772 * iota32_arg_28233,
                                                     sext_i32_i64(num_threads_46691)));
        int64_t carry_in_flat_idx_46744 = orig_group_46743 *
                (segscan_group_sizze_34093 * sdiv_up64(m_27772 *
                                                       iota32_arg_28233,
                                                       sext_i32_i64(num_threads_46691))) -
                (int64_t) 1;
        
        if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, iota32_arg_28233)) {
            if (!(orig_group_46743 == (int64_t) 0 || (flat_idx_46742 ==
                                                      (orig_group_46743 +
                                                       (int64_t) 1) *
                                                      (segscan_group_sizze_34093 *
                                                       sdiv_up64(m_27772 *
                                                                 iota32_arg_28233,
                                                                 sext_i32_i64(num_threads_46691))) -
                                                      (int64_t) 1 ||
                                                      slt64(srem64(flat_idx_46742,
                                                                   iota32_arg_28233),
                                                            flat_idx_46742 -
                                                            carry_in_flat_idx_46744)))) {
                float x_34097;
                float x_34098;
                
                x_34097 = ((__global
                            float *) mem_45403)[squot64(carry_in_flat_idx_46744,
                                                        iota32_arg_28233) *
                                                iota32_arg_28233 +
                                                (carry_in_flat_idx_46744 -
                                                 squot64(carry_in_flat_idx_46744,
                                                         iota32_arg_28233) *
                                                 iota32_arg_28233)];
                x_34098 = ((__global float *) mem_45403)[gtid_34036 *
                                                         iota32_arg_28233 +
                                                         gtid_34044];
                
                float defunc_1_op_res_34099;
                
                defunc_1_op_res_34099 = x_34097 + x_34098;
                x_34097 = defunc_1_op_res_34099;
                ((__global float *) mem_45403)[gtid_34036 * iota32_arg_28233 +
                                               gtid_34044] = x_34097;
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segscan_group_sizze_34093
}
__kernel void mainDetailedzisegmap_29975(__global int *global_failure,
                                         int64_t N_27771, float freq_27776,
                                         int64_t i32_res_27787, __global
                                         unsigned char *mappingindices_mem_44380,
                                         __global unsigned char *mem_44385)
{
    #define segmap_group_sizze_30048 (mainDetailedzisegmap_group_sizze_29978)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45691;
    int32_t local_tid_45692;
    int64_t group_sizze_45695;
    int32_t wave_sizze_45694;
    int32_t group_tid_45693;
    
    global_tid_45691 = get_global_id(0);
    local_tid_45692 = get_local_id(0);
    group_sizze_45695 = get_local_size(0);
    wave_sizze_45694 = LOCKSTEP_WIDTH;
    group_tid_45693 = get_group_id(0);
    
    int32_t phys_tid_29975;
    
    phys_tid_29975 = global_tid_45691;
    
    int64_t gtid_29973;
    
    gtid_29973 = squot64(sext_i32_i64(group_tid_45693) *
                         segmap_group_sizze_30048 +
                         sext_i32_i64(local_tid_45692), N_27771);
    
    int64_t gtid_29974;
    
    gtid_29974 = sext_i32_i64(group_tid_45693) * segmap_group_sizze_30048 +
        sext_i32_i64(local_tid_45692) - squot64(sext_i32_i64(group_tid_45693) *
                                                segmap_group_sizze_30048 +
                                                sext_i32_i64(local_tid_45692),
                                                N_27771) * N_27771;
    if (slt64(gtid_29973, i32_res_27787) && slt64(gtid_29974, N_27771)) {
        int32_t index_primexp_42340 = sext_i64_i32(gtid_29973);
        bool index_primexp_42337 = index_primexp_42340 == 0;
        float defunc_0_f_res_30054;
        
        if (index_primexp_42337) {
            defunc_0_f_res_30054 = 1.0F;
        } else {
            int32_t x_30053 = ((__global
                                int32_t *) mappingindices_mem_44380)[gtid_29974];
            bool cond_30055 = index_primexp_42340 == 1;
            float defunc_0_f_res_f_res_30056;
            
            if (cond_30055) {
                float i32_res_30057 = sitofp_i32_f32(x_30053);
                
                defunc_0_f_res_f_res_30056 = i32_res_30057;
            } else {
                int32_t r32_arg_30058 = sdiv32(index_primexp_42340, 2);
                float i32_res_30059 = sitofp_i32_f32(r32_arg_30058);
                float i32_res_30060 = sitofp_i32_f32(x_30053);
                float x_30061 = 6.2831855F * i32_res_30059;
                float x_30062 = i32_res_30060 * x_30061;
                float angle_30063 = x_30062 / freq_27776;
                int32_t x_30064 = smod32(index_primexp_42340, 2);
                bool cond_30065 = x_30064 == 0;
                float defunc_0_f_res_f_res_f_res_30066;
                
                if (cond_30065) {
                    float sin_res_30067;
                    
                    sin_res_30067 = futrts_sin32(angle_30063);
                    defunc_0_f_res_f_res_f_res_30066 = sin_res_30067;
                } else {
                    float cos_res_30068;
                    
                    cos_res_30068 = futrts_cos32(angle_30063);
                    defunc_0_f_res_f_res_f_res_30066 = cos_res_30068;
                }
                defunc_0_f_res_f_res_30056 = defunc_0_f_res_f_res_f_res_30066;
            }
            defunc_0_f_res_30054 = defunc_0_f_res_f_res_30056;
        }
        ((__global float *) mem_44385)[gtid_29973 * N_27771 + gtid_29974] =
            defunc_0_f_res_30054;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_30048
}
__kernel void mainDetailedzisegmap_30153(__global int *global_failure,
                                         int64_t N_27771, float freq_27776,
                                         int64_t i32_res_27787, __global
                                         unsigned char *mappingindices_mem_44380,
                                         __global unsigned char *mem_44389)
{
    #define segmap_group_sizze_30222 (mainDetailedzisegmap_group_sizze_30156)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45696;
    int32_t local_tid_45697;
    int64_t group_sizze_45700;
    int32_t wave_sizze_45699;
    int32_t group_tid_45698;
    
    global_tid_45696 = get_global_id(0);
    local_tid_45697 = get_local_id(0);
    group_sizze_45700 = get_local_size(0);
    wave_sizze_45699 = LOCKSTEP_WIDTH;
    group_tid_45698 = get_group_id(0);
    
    int32_t phys_tid_30153;
    
    phys_tid_30153 = global_tid_45696;
    
    int64_t gtid_30151;
    
    gtid_30151 = squot64(sext_i32_i64(group_tid_45698) *
                         segmap_group_sizze_30222 +
                         sext_i32_i64(local_tid_45697), N_27771);
    
    int64_t gtid_30152;
    
    gtid_30152 = sext_i32_i64(group_tid_45698) * segmap_group_sizze_30222 +
        sext_i32_i64(local_tid_45697) - squot64(sext_i32_i64(group_tid_45698) *
                                                segmap_group_sizze_30222 +
                                                sext_i32_i64(local_tid_45697),
                                                N_27771) * N_27771;
    if (slt64(gtid_30151, i32_res_27787) && slt64(gtid_30152, N_27771)) {
        int32_t index_primexp_42349 = sext_i64_i32(gtid_30151);
        bool index_primexp_42346 = index_primexp_42349 == 0;
        float defunc_0_f_res_30228;
        
        if (index_primexp_42346) {
            defunc_0_f_res_30228 = 1.0F;
        } else {
            int32_t x_30227 = ((__global
                                int32_t *) mappingindices_mem_44380)[gtid_30152];
            int32_t i_30229 = add32(1, index_primexp_42349);
            int32_t r32_arg_30230 = sdiv32(i_30229, 2);
            float i32_res_30231 = sitofp_i32_f32(r32_arg_30230);
            float i32_res_30232 = sitofp_i32_f32(x_30227);
            float x_30233 = 6.2831855F * i32_res_30231;
            float x_30234 = i32_res_30232 * x_30233;
            float angle_30235 = x_30234 / freq_27776;
            int32_t x_30236 = smod32(i_30229, 2);
            bool cond_30237 = x_30236 == 0;
            float defunc_0_f_res_f_res_30238;
            
            if (cond_30237) {
                float sin_res_30239;
                
                sin_res_30239 = futrts_sin32(angle_30235);
                defunc_0_f_res_f_res_30238 = sin_res_30239;
            } else {
                float cos_res_30240;
                
                cos_res_30240 = futrts_cos32(angle_30235);
                defunc_0_f_res_f_res_30238 = cos_res_30240;
            }
            defunc_0_f_res_30228 = defunc_0_f_res_f_res_30238;
        }
        ((__global float *) mem_44389)[gtid_30151 * N_27771 + gtid_30152] =
            defunc_0_f_res_30228;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_30222
}
__kernel void mainDetailedzisegmap_30281(__global int *global_failure,
                                         int64_t N_27771, int64_t i32_res_27787,
                                         float i32_res_27852, __global
                                         unsigned char *mem_44393, __global
                                         unsigned char *mem_44397)
{
    #define segmap_group_sizze_30305 (mainDetailedzisegmap_group_sizze_30284)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45701;
    int32_t local_tid_45702;
    int64_t group_sizze_45705;
    int32_t wave_sizze_45704;
    int32_t group_tid_45703;
    
    global_tid_45701 = get_global_id(0);
    local_tid_45702 = get_local_id(0);
    group_sizze_45705 = get_local_size(0);
    wave_sizze_45704 = LOCKSTEP_WIDTH;
    group_tid_45703 = get_group_id(0);
    
    int32_t phys_tid_30281;
    
    phys_tid_30281 = global_tid_45701;
    
    int64_t gtid_30279;
    
    gtid_30279 = squot64(sext_i32_i64(group_tid_45703) *
                         segmap_group_sizze_30305 +
                         sext_i32_i64(local_tid_45702), i32_res_27787);
    
    int64_t gtid_30280;
    
    gtid_30280 = sext_i32_i64(group_tid_45703) * segmap_group_sizze_30305 +
        sext_i32_i64(local_tid_45702) - squot64(sext_i32_i64(group_tid_45703) *
                                                segmap_group_sizze_30305 +
                                                sext_i32_i64(local_tid_45702),
                                                i32_res_27787) * i32_res_27787;
    if (slt64(gtid_30279, N_27771) && slt64(gtid_30280, i32_res_27787)) {
        float x_30308 = ((__global float *) mem_44393)[gtid_30279 *
                                                       i32_res_27787 +
                                                       gtid_30280];
        float defunc_0_f_res_30309 = i32_res_27852 + x_30308;
        
        ((__global float *) mem_44397)[gtid_30279 * i32_res_27787 +
                                       gtid_30280] = defunc_0_f_res_30309;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_30305
}
__kernel void mainDetailedzisegmap_30314(__global int *global_failure,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t n_27775, int32_t k2p2zq_27785,
                                         int64_t i32_res_27787,
                                         int64_t num_groups_30339, __global
                                         unsigned char *binop_p_mem_44390,
                                         __global unsigned char *mem_44397,
                                         __global unsigned char *mem_44400,
                                         __global unsigned char *mem_44404,
                                         __global unsigned char *mem_44446)
{
    #define segmap_group_sizze_30338 (mainDetailedzisegmap_group_sizze_30316)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45709;
    int32_t local_tid_45710;
    int64_t group_sizze_45713;
    int32_t wave_sizze_45712;
    int32_t group_tid_45711;
    
    global_tid_45709 = get_global_id(0);
    local_tid_45710 = get_local_id(0);
    group_sizze_45713 = get_local_size(0);
    wave_sizze_45712 = LOCKSTEP_WIDTH;
    group_tid_45711 = get_group_id(0);
    
    int32_t phys_tid_30314;
    
    phys_tid_30314 = global_tid_45709;
    
    int32_t phys_group_id_45714;
    
    phys_group_id_45714 = get_group_id(0);
    for (int32_t i_45715 = 0; i_45715 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_30338)) -
                   phys_group_id_45714, sext_i64_i32(num_groups_30339));
         i_45715++) {
        int32_t virt_group_id_45716 = phys_group_id_45714 + i_45715 *
                sext_i64_i32(num_groups_30339);
        int64_t gtid_30313 = sext_i32_i64(virt_group_id_45716) *
                segmap_group_sizze_30338 + sext_i32_i64(local_tid_45710);
        
        if (slt64(gtid_30313, m_27772)) {
            for (int32_t i_44360 = 0; i_44360 < k2p2zq_27785; i_44360++) {
                int64_t i_44289 = sext_i32_i64(i_44360);
                
                for (int32_t i_44359 = 0; i_44359 < k2p2zq_27785; i_44359++) {
                    int64_t i_44293 = sext_i32_i64(i_44359);
                    float defunc_2_reduce_res_30347;
                    float redout_44295 = 0.0F;
                    
                    for (int32_t i_44358 = 0; i_44358 < n_27775; i_44358++) {
                        int64_t i_44296 = sext_i32_i64(i_44358);
                        float x_30351 = ((__global float *) mem_44400)[i_44296 *
                                                                       m_27772 +
                                                                       gtid_30313];
                        float x_30352 = ((__global
                                          float *) binop_p_mem_44390)[i_44289 *
                                                                      N_27771 +
                                                                      i_44296];
                        float x_30353 = ((__global float *) mem_44397)[i_44296 *
                                                                       i32_res_27787 +
                                                                       i_44293];
                        float x_30354 = x_30352 * x_30353;
                        bool isnan_res_30355;
                        
                        isnan_res_30355 = futrts_isnan32(x_30351);
                        
                        float y_30356;
                        
                        if (isnan_res_30355) {
                            y_30356 = 0.0F;
                        } else {
                            y_30356 = 1.0F;
                        }
                        
                        float defunc_2_f_res_30357 = x_30354 * y_30356;
                        float defunc_1_op_res_30350 = defunc_2_f_res_30357 +
                              redout_44295;
                        float redout_tmp_45719 = defunc_1_op_res_30350;
                        
                        redout_44295 = redout_tmp_45719;
                    }
                    defunc_2_reduce_res_30347 = redout_44295;
                    ((__global float *) mem_44404)[phys_tid_30314 + (i_44289 *
                                                                     (num_groups_30339 *
                                                                      segmap_group_sizze_30338 *
                                                                      i32_res_27787) +
                                                                     i_44293 *
                                                                     (num_groups_30339 *
                                                                      segmap_group_sizze_30338))] =
                        defunc_2_reduce_res_30347;
                }
            }
            for (int64_t i_45720 = 0; i_45720 < i32_res_27787; i_45720++) {
                for (int64_t i_45721 = 0; i_45721 < i32_res_27787; i_45721++) {
                    ((__global float *) mem_44446)[i_45720 * (m_27772 *
                                                              i32_res_27787) +
                                                   i_45721 * m_27772 +
                                                   gtid_30313] = ((__global
                                                                   float *) mem_44404)[phys_tid_30314 +
                                                                                       (i_45720 *
                                                                                        (num_groups_30339 *
                                                                                         segmap_group_sizze_30338 *
                                                                                         i32_res_27787) +
                                                                                        i_45721 *
                                                                                        (num_groups_30339 *
                                                                                         segmap_group_sizze_30338))];
                }
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_30338
}
__kernel void mainDetailedzisegmap_30360(__global int *global_failure,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t n_27775, int32_t k2p2zq_27785,
                                         int64_t i32_res_27787,
                                         int64_t num_groups_30516, __global
                                         unsigned char *images_mem_44381,
                                         __global unsigned char *mem_44393,
                                         __global unsigned char *mem_44397,
                                         __global unsigned char *mem_44449,
                                         __global unsigned char *mem_44465)
{
    #define segmap_group_sizze_30515 (mainDetailedzisegmap_group_sizze_30363)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45722;
    int32_t local_tid_45723;
    int64_t group_sizze_45726;
    int32_t wave_sizze_45725;
    int32_t group_tid_45724;
    
    global_tid_45722 = get_global_id(0);
    local_tid_45723 = get_local_id(0);
    group_sizze_45726 = get_local_size(0);
    wave_sizze_45725 = LOCKSTEP_WIDTH;
    group_tid_45724 = get_group_id(0);
    
    int32_t phys_tid_30360;
    
    phys_tid_30360 = global_tid_45722;
    
    int32_t phys_group_id_45727;
    
    phys_group_id_45727 = get_group_id(0);
    for (int32_t i_45728 = 0; i_45728 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_27772 * i32_res_27787,
                                          segmap_group_sizze_30515)) -
                   phys_group_id_45727, sext_i64_i32(num_groups_30516));
         i_45728++) {
        int32_t virt_group_id_45729 = phys_group_id_45727 + i_45728 *
                sext_i64_i32(num_groups_30516);
        int64_t gtid_30358 = squot64(sext_i32_i64(virt_group_id_45729) *
                                     segmap_group_sizze_30515 +
                                     sext_i32_i64(local_tid_45723),
                                     i32_res_27787);
        int64_t gtid_30359 = sext_i32_i64(virt_group_id_45729) *
                segmap_group_sizze_30515 + sext_i32_i64(local_tid_45723) -
                squot64(sext_i32_i64(virt_group_id_45729) *
                        segmap_group_sizze_30515 +
                        sext_i32_i64(local_tid_45723), i32_res_27787) *
                i32_res_27787;
        
        if (slt64(gtid_30358, m_27772) && slt64(gtid_30359, i32_res_27787)) {
            for (int32_t i_44362 = 0; i_44362 < k2p2zq_27785; i_44362++) {
                int64_t i_44299 = sext_i32_i64(i_44362);
                float defunc_2_reduce_res_30527;
                float redout_44301 = 0.0F;
                
                for (int32_t i_44361 = 0; i_44361 < n_27775; i_44361++) {
                    int64_t i_44302 = sext_i32_i64(i_44361);
                    float x_30531 = ((__global
                                      float *) images_mem_44381)[gtid_30358 *
                                                                 N_27771 +
                                                                 i_44302];
                    float x_30532 = ((__global float *) mem_44393)[i_44302 *
                                                                   i32_res_27787 +
                                                                   gtid_30359];
                    float x_30533 = ((__global float *) mem_44397)[i_44302 *
                                                                   i32_res_27787 +
                                                                   i_44299];
                    float x_30534 = x_30532 * x_30533;
                    bool isnan_res_30535;
                    
                    isnan_res_30535 = futrts_isnan32(x_30531);
                    
                    float y_30536;
                    
                    if (isnan_res_30535) {
                        y_30536 = 0.0F;
                    } else {
                        y_30536 = 1.0F;
                    }
                    
                    float defunc_2_f_res_30537 = x_30534 * y_30536;
                    float defunc_1_op_res_30530 = defunc_2_f_res_30537 +
                          redout_44301;
                    float redout_tmp_45731 = defunc_1_op_res_30530;
                    
                    redout_44301 = redout_tmp_45731;
                }
                defunc_2_reduce_res_30527 = redout_44301;
                ((__global float *) mem_44449)[phys_tid_30360 + i_44299 *
                                               (num_groups_30516 *
                                                segmap_group_sizze_30515)] =
                    defunc_2_reduce_res_30527;
            }
            for (int64_t i_45732 = 0; i_45732 < i32_res_27787; i_45732++) {
                ((__global float *) mem_44465)[i_45732 * (i32_res_27787 *
                                                          m_27772) +
                                               gtid_30358 * i32_res_27787 +
                                               gtid_30359] = ((__global
                                                               float *) mem_44449)[phys_tid_30360 +
                                                                                   i_45732 *
                                                                                   (num_groups_30516 *
                                                                                    segmap_group_sizze_30515)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_30515
}
__kernel void mainDetailedzisegmap_30952(__global int *global_failure,
                                         int64_t m_27772, int64_t i32_res_27787,
                                         int64_t nm_27920,
                                         int64_t i32_res_27935, int64_t x_27936,
                                         int64_t j_m_i_27939,
                                         int64_t gauss_jordan_res_r_ixfn_44617,
                                         int64_t gauss_jordan_res_r_ixfn_44618,
                                         int64_t gauss_jordan_res_r_ixfn_44620,
                                         __global
                                         unsigned char *gauss_jordan_res_r_mem_44622,
                                         __global unsigned char *mem_44627)
{
    #define segmap_group_sizze_31699 (mainDetailedzisegmap_group_sizze_30956)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45889;
    int32_t local_tid_45890;
    int64_t group_sizze_45893;
    int32_t wave_sizze_45892;
    int32_t group_tid_45891;
    
    global_tid_45889 = get_global_id(0);
    local_tid_45890 = get_local_id(0);
    group_sizze_45893 = get_local_size(0);
    wave_sizze_45892 = LOCKSTEP_WIDTH;
    group_tid_45891 = get_group_id(0);
    
    int32_t phys_tid_30952;
    
    phys_tid_30952 = global_tid_45889;
    
    int64_t gtid_30949;
    
    gtid_30949 = squot64(sext_i32_i64(group_tid_45891) *
                         segmap_group_sizze_31699 +
                         sext_i32_i64(local_tid_45890), i32_res_27787 *
                         j_m_i_27939);
    
    int64_t gtid_slice_30947;
    
    gtid_slice_30947 = squot64(sext_i32_i64(group_tid_45891) *
                               segmap_group_sizze_31699 +
                               sext_i32_i64(local_tid_45890) -
                               squot64(sext_i32_i64(group_tid_45891) *
                                       segmap_group_sizze_31699 +
                                       sext_i32_i64(local_tid_45890),
                                       i32_res_27787 * j_m_i_27939) *
                               (i32_res_27787 * j_m_i_27939), j_m_i_27939);
    
    int64_t gtid_slice_30948;
    
    gtid_slice_30948 = sext_i32_i64(group_tid_45891) *
        segmap_group_sizze_31699 + sext_i32_i64(local_tid_45890) -
        squot64(sext_i32_i64(group_tid_45891) * segmap_group_sizze_31699 +
                sext_i32_i64(local_tid_45890), i32_res_27787 * j_m_i_27939) *
        (i32_res_27787 * j_m_i_27939) - squot64(sext_i32_i64(group_tid_45891) *
                                                segmap_group_sizze_31699 +
                                                sext_i32_i64(local_tid_45890) -
                                                squot64(sext_i32_i64(group_tid_45891) *
                                                        segmap_group_sizze_31699 +
                                                        sext_i32_i64(local_tid_45890),
                                                        i32_res_27787 *
                                                        j_m_i_27939) *
                                                (i32_res_27787 * j_m_i_27939),
                                                j_m_i_27939) * j_m_i_27939;
    if ((slt64(gtid_30949, m_27772) && slt64(gtid_slice_30947,
                                             i32_res_27787)) &&
        slt64(gtid_slice_30948, j_m_i_27939)) {
        int64_t slice_31703 = i32_res_27787 + gtid_slice_30948;
        int64_t binop_x_42430 = x_27936 * gtid_30949;
        int64_t binop_y_42431 = i32_res_27935 * gtid_slice_30947;
        int64_t binop_x_42432 = binop_x_42430 + binop_y_42431;
        int64_t binop_x_42433 = slice_31703 + binop_x_42432;
        int64_t new_index_42434 = squot64(binop_x_42433, nm_27920);
        int64_t binop_y_42446 = nm_27920 * new_index_42434;
        int64_t new_index_42447 = binop_x_42433 - binop_y_42446;
        float v_31704 = ((__global
                          float *) gauss_jordan_res_r_mem_44622)[gauss_jordan_res_r_ixfn_44617 +
                                                                 (new_index_42434 *
                                                                  gauss_jordan_res_r_ixfn_44618 +
                                                                  new_index_42447 *
                                                                  gauss_jordan_res_r_ixfn_44620)];
        
        ((__global float *) mem_44627)[gtid_30949 * (j_m_i_27939 *
                                                     i32_res_27787) +
                                       gtid_slice_30947 * j_m_i_27939 +
                                       gtid_slice_30948] = v_31704;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31699
}
__kernel void mainDetailedzisegmap_31182(__global int *global_failure,
                                         int64_t m_27772, int64_t nm_27920,
                                         int64_t ctx_param_ext_44580,
                                         int64_t ctx_param_ext_44581,
                                         int64_t ctx_param_ext_44583, __global
                                         unsigned char *mem_param_44585,
                                         __global unsigned char *mem_44605)
{
    #define segmap_group_sizze_31687 (mainDetailedzisegmap_group_sizze_31185)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45883;
    int32_t local_tid_45884;
    int64_t group_sizze_45887;
    int32_t wave_sizze_45886;
    int32_t group_tid_45885;
    
    global_tid_45883 = get_global_id(0);
    local_tid_45884 = get_local_id(0);
    group_sizze_45887 = get_local_size(0);
    wave_sizze_45886 = LOCKSTEP_WIDTH;
    group_tid_45885 = get_group_id(0);
    
    int32_t phys_tid_31182;
    
    phys_tid_31182 = global_tid_45883;
    
    int64_t gtid_31180;
    
    gtid_31180 = squot64(sext_i32_i64(group_tid_45885) *
                         segmap_group_sizze_31687 +
                         sext_i32_i64(local_tid_45884), nm_27920);
    
    int64_t gtid_31181;
    
    gtid_31181 = sext_i32_i64(group_tid_45885) * segmap_group_sizze_31687 +
        sext_i32_i64(local_tid_45884) - squot64(sext_i32_i64(group_tid_45885) *
                                                segmap_group_sizze_31687 +
                                                sext_i32_i64(local_tid_45884),
                                                nm_27920) * nm_27920;
    if (slt64(gtid_31180, m_27772) && slt64(gtid_31181, nm_27920)) {
        float write_value_31693 = ((__global float *) mem_44605)[gtid_31180 *
                                                                 nm_27920 +
                                                                 gtid_31181];
        
        if ((sle64((int64_t) 0, gtid_31180) && slt64(gtid_31180, m_27772)) &&
            (sle64((int64_t) 0, gtid_31181) && slt64(gtid_31181, nm_27920))) {
            ((__global float *) mem_param_44585)[ctx_param_ext_44580 +
                                                 (gtid_31180 *
                                                  ctx_param_ext_44581 +
                                                  gtid_31181 *
                                                  ctx_param_ext_44583)] =
                write_value_31693;
        }
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31687
}
__kernel void mainDetailedzisegmap_31252(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t m_27772, int32_t k2p2zq_27785,
                                         int32_t m_27918, int64_t nm_27920,
                                         int32_t i_31554, int64_t i32_res_31556,
                                         int64_t ctx_param_ext_44580,
                                         int64_t ctx_param_ext_44581,
                                         int64_t ctx_param_ext_44583, __global
                                         unsigned char *mem_param_44585,
                                         __global unsigned char *mem_44601,
                                         __global unsigned char *mem_44605)
{
    #define segmap_group_sizze_31637 (mainDetailedzisegmap_group_sizze_31255)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45878;
    int32_t local_tid_45879;
    int64_t group_sizze_45882;
    int32_t wave_sizze_45881;
    int32_t group_tid_45880;
    
    global_tid_45878 = get_global_id(0);
    local_tid_45879 = get_local_id(0);
    group_sizze_45882 = get_local_size(0);
    wave_sizze_45881 = LOCKSTEP_WIDTH;
    group_tid_45880 = get_group_id(0);
    
    int32_t phys_tid_31252;
    
    phys_tid_31252 = global_tid_45878;
    
    int64_t gtid_31250;
    
    gtid_31250 = squot64(sext_i32_i64(group_tid_45880) *
                         segmap_group_sizze_31637 +
                         sext_i32_i64(local_tid_45879), nm_27920);
    
    int64_t gtid_31251;
    
    gtid_31251 = sext_i32_i64(group_tid_45880) * segmap_group_sizze_31637 +
        sext_i32_i64(local_tid_45879) - squot64(sext_i32_i64(group_tid_45880) *
                                                segmap_group_sizze_31637 +
                                                sext_i32_i64(local_tid_45879),
                                                nm_27920) * nm_27920;
    if (slt64(gtid_31250, m_27772) && slt64(gtid_31251, nm_27920)) {
        bool cond_31642 = ((__global bool *) mem_44601)[gtid_31250];
        int32_t defunc_0_f_res_31644 = sext_i64_i32(gtid_31251);
        int32_t defunc_0_f_res_31645 = sdiv32(defunc_0_f_res_31644, m_27918);
        int32_t defunc_0_f_res_31646 = smod32(defunc_0_f_res_31644, m_27918);
        float defunc_0_f_res_31647;
        
        if (cond_31642) {
            int32_t x_31648 = mul32(m_27918, defunc_0_f_res_31645);
            int32_t i32_arg_31649 = add32(defunc_0_f_res_31646, x_31648);
            int64_t i32_res_31650 = sext_i32_i64(i32_arg_31649);
            bool x_31651 = sle64((int64_t) 0, i32_res_31650);
            bool y_31652 = slt64(i32_res_31650, nm_27920);
            bool bounds_check_31653 = x_31651 && y_31652;
            bool index_certs_31654;
            
            if (!bounds_check_31653) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 42) ==
                        -1) {
                        global_failure_args[0] = i32_res_31650;
                        global_failure_args[1] = nm_27920;
                        ;
                    }
                    return;
                }
            }
            
            float defunc_0_f_res_t_res_31655 = ((__global
                                                 float *) mem_param_44585)[ctx_param_ext_44580 +
                                                                           (gtid_31250 *
                                                                            ctx_param_ext_44581 +
                                                                            i32_res_31650 *
                                                                            ctx_param_ext_44583)];
            
            defunc_0_f_res_31647 = defunc_0_f_res_t_res_31655;
        } else {
            float v1_31641 = ((__global
                               float *) mem_param_44585)[ctx_param_ext_44580 +
                                                         (gtid_31250 *
                                                          ctx_param_ext_44581 +
                                                          i32_res_31556 *
                                                          ctx_param_ext_44583)];
            int64_t i32_res_31656 = sext_i32_i64(defunc_0_f_res_31646);
            bool x_31657 = sle64((int64_t) 0, i32_res_31656);
            bool y_31658 = slt64(i32_res_31656, nm_27920);
            bool bounds_check_31659 = x_31657 && y_31658;
            bool index_certs_31660;
            
            if (!bounds_check_31659) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 43) ==
                        -1) {
                        global_failure_args[0] = i32_res_31656;
                        global_failure_args[1] = nm_27920;
                        ;
                    }
                    return;
                }
            }
            
            float x_31661 = ((__global
                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                        (gtid_31250 *
                                                         ctx_param_ext_44581 +
                                                         i32_res_31656 *
                                                         ctx_param_ext_44583)];
            float x_31662 = x_31661 / v1_31641;
            int32_t y_31663 = sub32(k2p2zq_27785, 1);
            bool cond_31664 = slt32(defunc_0_f_res_31645, y_31663);
            float defunc_0_f_res_f_res_31665;
            
            if (cond_31664) {
                int32_t x_31666 = add32(1, defunc_0_f_res_31645);
                int32_t x_31667 = mul32(m_27918, x_31666);
                int32_t i32_arg_31668 = add32(defunc_0_f_res_31646, x_31667);
                int64_t i32_res_31669 = sext_i32_i64(i32_arg_31668);
                bool x_31670 = sle64((int64_t) 0, i32_res_31669);
                bool y_31671 = slt64(i32_res_31669, nm_27920);
                bool bounds_check_31672 = x_31670 && y_31671;
                bool index_certs_31673;
                
                if (!bounds_check_31672) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 44) ==
                            -1) {
                            global_failure_args[0] = i32_res_31669;
                            global_failure_args[1] = nm_27920;
                            ;
                        }
                        return;
                    }
                }
                
                float x_31674 = ((__global
                                  float *) mem_param_44585)[ctx_param_ext_44580 +
                                                            (gtid_31250 *
                                                             ctx_param_ext_44581 +
                                                             i32_res_31669 *
                                                             ctx_param_ext_44583)];
                int32_t i32_arg_31675 = add32(i_31554, x_31667);
                int64_t i32_res_31676 = sext_i32_i64(i32_arg_31675);
                bool x_31677 = sle64((int64_t) 0, i32_res_31676);
                bool y_31678 = slt64(i32_res_31676, nm_27920);
                bool bounds_check_31679 = x_31677 && y_31678;
                bool index_certs_31680;
                
                if (!bounds_check_31679) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 45) ==
                            -1) {
                            global_failure_args[0] = i32_res_31676;
                            global_failure_args[1] = nm_27920;
                            ;
                        }
                        return;
                    }
                }
                
                float x_31681 = ((__global
                                  float *) mem_param_44585)[ctx_param_ext_44580 +
                                                            (gtid_31250 *
                                                             ctx_param_ext_44581 +
                                                             i32_res_31676 *
                                                             ctx_param_ext_44583)];
                float y_31682 = x_31662 * x_31681;
                float defunc_0_f_res_f_res_t_res_31683 = x_31674 - y_31682;
                
                defunc_0_f_res_f_res_31665 = defunc_0_f_res_f_res_t_res_31683;
            } else {
                defunc_0_f_res_f_res_31665 = x_31662;
            }
            defunc_0_f_res_31647 = defunc_0_f_res_f_res_31665;
        }
        ((__global float *) mem_44605)[gtid_31250 * nm_27920 + gtid_31251] =
            defunc_0_f_res_31647;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31637
}
__kernel void mainDetailedzisegmap_31352(__global int *global_failure,
                                         int64_t m_27772, int64_t i32_res_31556,
                                         int64_t ctx_param_ext_44580,
                                         int64_t ctx_param_ext_44581,
                                         int64_t ctx_param_ext_44583, __global
                                         unsigned char *mem_param_44585,
                                         __global unsigned char *mem_44601)
{
    #define segmap_group_sizze_31621 (mainDetailedzisegmap_group_sizze_31354)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45873;
    int32_t local_tid_45874;
    int64_t group_sizze_45877;
    int32_t wave_sizze_45876;
    int32_t group_tid_45875;
    
    global_tid_45873 = get_global_id(0);
    local_tid_45874 = get_local_id(0);
    group_sizze_45877 = get_local_size(0);
    wave_sizze_45876 = LOCKSTEP_WIDTH;
    group_tid_45875 = get_group_id(0);
    
    int32_t phys_tid_31352;
    
    phys_tid_31352 = global_tid_45873;
    
    int64_t gtid_31351;
    
    gtid_31351 = sext_i32_i64(group_tid_45875) * segmap_group_sizze_31621 +
        sext_i32_i64(local_tid_45874);
    if (slt64(gtid_31351, m_27772)) {
        float v1_31626 = ((__global
                           float *) mem_param_44585)[ctx_param_ext_44580 +
                                                     (gtid_31351 *
                                                      ctx_param_ext_44581 +
                                                      i32_res_31556 *
                                                      ctx_param_ext_44583)];
        bool cond_31627 = v1_31626 == 0.0F;
        
        ((__global bool *) mem_44601)[gtid_31351] = cond_31627;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31621
}
__kernel void mainDetailedzisegmap_31469(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t m_27772, int32_t k2p2zq_27785,
                                         int64_t i32_res_27787, int32_t m_27918,
                                         int64_t nm_27920, __global
                                         unsigned char *defunc_3_map_res_mem_44549,
                                         __global unsigned char *mem_44577)
{
    #define segmap_group_sizze_31529 (mainDetailedzisegmap_group_sizze_31472)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45840;
    int32_t local_tid_45841;
    int64_t group_sizze_45844;
    int32_t wave_sizze_45843;
    int32_t group_tid_45842;
    
    global_tid_45840 = get_global_id(0);
    local_tid_45841 = get_local_id(0);
    group_sizze_45844 = get_local_size(0);
    wave_sizze_45843 = LOCKSTEP_WIDTH;
    group_tid_45842 = get_group_id(0);
    
    int32_t phys_tid_31469;
    
    phys_tid_31469 = global_tid_45840;
    
    int64_t gtid_31467;
    
    gtid_31467 = squot64(sext_i32_i64(group_tid_45842) *
                         segmap_group_sizze_31529 +
                         sext_i32_i64(local_tid_45841), nm_27920);
    
    int64_t gtid_31468;
    
    gtid_31468 = sext_i32_i64(group_tid_45842) * segmap_group_sizze_31529 +
        sext_i32_i64(local_tid_45841) - squot64(sext_i32_i64(group_tid_45842) *
                                                segmap_group_sizze_31529 +
                                                sext_i32_i64(local_tid_45841),
                                                nm_27920) * nm_27920;
    if (slt64(gtid_31467, m_27772) && slt64(gtid_31468, nm_27920)) {
        int32_t index_primexp_42361 = sext_i64_i32(gtid_31468);
        int32_t defunc_0_f_res_31534 = sdiv32(index_primexp_42361, m_27918);
        int32_t defunc_0_f_res_31535 = smod32(index_primexp_42361, m_27918);
        bool cond_31536 = slt32(defunc_0_f_res_31535, k2p2zq_27785);
        float defunc_0_f_res_31537;
        
        if (cond_31536) {
            int64_t i_31538 = sext_i32_i64(defunc_0_f_res_31534);
            bool x_31539 = sle64((int64_t) 0, i_31538);
            bool y_31540 = slt64(i_31538, i32_res_27787);
            bool bounds_check_31541 = x_31539 && y_31540;
            int64_t j_31542 = sext_i32_i64(defunc_0_f_res_31535);
            bool x_31543 = sle64((int64_t) 0, j_31542);
            bool y_31544 = slt64(j_31542, i32_res_27787);
            bool bounds_check_31545 = x_31543 && y_31544;
            bool index_ok_31546 = bounds_check_31541 && bounds_check_31545;
            bool index_certs_31547;
            
            if (!index_ok_31546) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 37) ==
                        -1) {
                        global_failure_args[0] = i_31538;
                        global_failure_args[1] = j_31542;
                        global_failure_args[2] = i32_res_27787;
                        global_failure_args[3] = i32_res_27787;
                        ;
                    }
                    return;
                }
            }
            
            float defunc_0_f_res_t_res_31548 = ((__global
                                                 float *) defunc_3_map_res_mem_44549)[gtid_31467 *
                                                                                      (i32_res_27787 *
                                                                                       i32_res_27787) +
                                                                                      i_31538 *
                                                                                      i32_res_27787 +
                                                                                      j_31542];
            
            defunc_0_f_res_31537 = defunc_0_f_res_t_res_31548;
        } else {
            int32_t y_31549 = add32(k2p2zq_27785, defunc_0_f_res_31534);
            bool cond_31550 = defunc_0_f_res_31535 == y_31549;
            float defunc_0_f_res_f_res_31551;
            
            if (cond_31550) {
                defunc_0_f_res_f_res_31551 = 1.0F;
            } else {
                defunc_0_f_res_f_res_31551 = 0.0F;
            }
            defunc_0_f_res_31537 = defunc_0_f_res_f_res_31551;
        }
        ((__global float *) mem_44577)[gtid_31467 * nm_27920 + gtid_31468] =
            defunc_0_f_res_31537;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31529
}
__kernel void mainDetailedzisegmap_31710(__global int *global_failure,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t n_27775, int32_t k2p2zq_27785,
                                         int64_t i32_res_27787,
                                         int64_t num_groups_31731, __global
                                         unsigned char *binop_p_mem_44390,
                                         __global unsigned char *mem_44632,
                                         __global unsigned char *mem_44635,
                                         __global unsigned char *mem_44650)
{
    #define segmap_group_sizze_31730 (mainDetailedzisegmap_group_sizze_31712)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_45897;
    int32_t local_tid_45898;
    int64_t group_sizze_45901;
    int32_t wave_sizze_45900;
    int32_t group_tid_45899;
    
    global_tid_45897 = get_global_id(0);
    local_tid_45898 = get_local_id(0);
    group_sizze_45901 = get_local_size(0);
    wave_sizze_45900 = LOCKSTEP_WIDTH;
    group_tid_45899 = get_group_id(0);
    
    int32_t phys_tid_31710;
    
    phys_tid_31710 = global_tid_45897;
    
    int32_t phys_group_id_45902;
    
    phys_group_id_45902 = get_group_id(0);
    for (int32_t i_45903 = 0; i_45903 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_31730)) -
                   phys_group_id_45902, sext_i64_i32(num_groups_31731));
         i_45903++) {
        int32_t virt_group_id_45904 = phys_group_id_45902 + i_45903 *
                sext_i64_i32(num_groups_31731);
        int64_t gtid_31709 = sext_i32_i64(virt_group_id_45904) *
                segmap_group_sizze_31730 + sext_i32_i64(local_tid_45898);
        
        if (slt64(gtid_31709, m_27772)) {
            for (int32_t i_44364 = 0; i_44364 < k2p2zq_27785; i_44364++) {
                int64_t i_44305 = sext_i32_i64(i_44364);
                float defunc_2_reduce_res_31737;
                float redout_44307 = 0.0F;
                
                for (int32_t i_44363 = 0; i_44363 < n_27775; i_44363++) {
                    int64_t i_44308 = sext_i32_i64(i_44363);
                    float x_31742 = ((__global float *) mem_44632)[i_44308 *
                                                                   m_27772 +
                                                                   gtid_31709];
                    bool isnan_res_31743;
                    
                    isnan_res_31743 = futrts_isnan32(x_31742);
                    
                    float defunc_1_f_res_31744;
                    
                    if (isnan_res_31743) {
                        defunc_1_f_res_31744 = 0.0F;
                    } else {
                        float x_31741 = ((__global
                                          float *) binop_p_mem_44390)[i_44305 *
                                                                      N_27771 +
                                                                      i_44308];
                        float defunc_1_f_res_f_res_31745 = x_31741 * x_31742;
                        
                        defunc_1_f_res_31744 = defunc_1_f_res_f_res_31745;
                    }
                    
                    float defunc_1_op_res_31740 = defunc_1_f_res_31744 +
                          redout_44307;
                    float redout_tmp_45906 = defunc_1_op_res_31740;
                    
                    redout_44307 = redout_tmp_45906;
                }
                defunc_2_reduce_res_31737 = redout_44307;
                ((__global float *) mem_44635)[phys_tid_31710 + i_44305 *
                                               (num_groups_31731 *
                                                segmap_group_sizze_31730)] =
                    defunc_2_reduce_res_31737;
            }
            for (int64_t i_45907 = 0; i_45907 < i32_res_27787; i_45907++) {
                ((__global float *) mem_44650)[i_45907 * m_27772 + gtid_31709] =
                    ((__global float *) mem_44635)[phys_tid_31710 + i_45907 *
                                                   (num_groups_31731 *
                                                    segmap_group_sizze_31730)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31730
}
__kernel void mainDetailedzisegmap_31851(__global int *global_failure,
                                         int64_t m_27772, int32_t k2p2zq_27785,
                                         int64_t i32_res_27787,
                                         int64_t num_groups_31871, __global
                                         unsigned char *mem_44854, __global
                                         unsigned char *mem_44857, __global
                                         unsigned char *mem_44860, __global
                                         unsigned char *mem_44875)
{
    #define segmap_group_sizze_31870 (mainDetailedzisegmap_group_sizze_31853)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46029;
    int32_t local_tid_46030;
    int64_t group_sizze_46033;
    int32_t wave_sizze_46032;
    int32_t group_tid_46031;
    
    global_tid_46029 = get_global_id(0);
    local_tid_46030 = get_local_id(0);
    group_sizze_46033 = get_local_size(0);
    wave_sizze_46032 = LOCKSTEP_WIDTH;
    group_tid_46031 = get_group_id(0);
    
    int32_t phys_tid_31851;
    
    phys_tid_31851 = global_tid_46029;
    
    int32_t phys_group_id_46034;
    
    phys_group_id_46034 = get_group_id(0);
    for (int32_t i_46035 = 0; i_46035 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_31870)) -
                   phys_group_id_46034, sext_i64_i32(num_groups_31871));
         i_46035++) {
        int32_t virt_group_id_46036 = phys_group_id_46034 + i_46035 *
                sext_i64_i32(num_groups_31871);
        int64_t gtid_31850 = sext_i32_i64(virt_group_id_46036) *
                segmap_group_sizze_31870 + sext_i32_i64(local_tid_46030);
        
        if (slt64(gtid_31850, m_27772)) {
            for (int32_t i_44366 = 0; i_44366 < k2p2zq_27785; i_44366++) {
                int64_t i_44311 = sext_i32_i64(i_44366);
                float defunc_0_f_res_31878;
                float redout_44313 = 0.0F;
                
                for (int32_t i_44365 = 0; i_44365 < k2p2zq_27785; i_44365++) {
                    int64_t i_44314 = sext_i32_i64(i_44365);
                    float x_31882 = ((__global float *) mem_44857)[i_44314 *
                                                                   m_27772 +
                                                                   gtid_31850];
                    float x_31883 = ((__global float *) mem_44854)[i_44311 *
                                                                   (m_27772 *
                                                                    i32_res_27787) +
                                                                   i_44314 *
                                                                   m_27772 +
                                                                   gtid_31850];
                    float defunc_1_f_res_31884 = x_31882 * x_31883;
                    float defunc_1_op_res_31881 = defunc_1_f_res_31884 +
                          redout_44313;
                    float redout_tmp_46038 = defunc_1_op_res_31881;
                    
                    redout_44313 = redout_tmp_46038;
                }
                defunc_0_f_res_31878 = redout_44313;
                ((__global float *) mem_44860)[phys_tid_31851 + i_44311 *
                                               (num_groups_31871 *
                                                segmap_group_sizze_31870)] =
                    defunc_0_f_res_31878;
            }
            for (int64_t i_46039 = 0; i_46039 < i32_res_27787; i_46039++) {
                ((__global float *) mem_44875)[i_46039 * m_27772 + gtid_31850] =
                    ((__global float *) mem_44860)[phys_tid_31851 + i_46039 *
                                                   (num_groups_31871 *
                                                    segmap_group_sizze_31870)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_31870
}
__kernel void mainDetailedzisegmap_31983(__global int *global_failure,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t k2p2zq_27785,
                                         int64_t i32_res_27787,
                                         int64_t num_groups_32002, __global
                                         unsigned char *mem_44397, __global
                                         unsigned char *mem_44919, __global
                                         unsigned char *mem_44922, __global
                                         unsigned char *mem_44937)
{
    #define segmap_group_sizze_32001 (mainDetailedzisegmap_group_sizze_31985)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46117;
    int32_t local_tid_46118;
    int64_t group_sizze_46121;
    int32_t wave_sizze_46120;
    int32_t group_tid_46119;
    
    global_tid_46117 = get_global_id(0);
    local_tid_46118 = get_local_id(0);
    group_sizze_46121 = get_local_size(0);
    wave_sizze_46120 = LOCKSTEP_WIDTH;
    group_tid_46119 = get_group_id(0);
    
    int32_t phys_tid_31983;
    
    phys_tid_31983 = global_tid_46117;
    
    int32_t phys_group_id_46122;
    
    phys_group_id_46122 = get_group_id(0);
    for (int32_t i_46123 = 0; i_46123 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_32001)) -
                   phys_group_id_46122, sext_i64_i32(num_groups_32002));
         i_46123++) {
        int32_t virt_group_id_46124 = phys_group_id_46122 + i_46123 *
                sext_i64_i32(num_groups_32002);
        int64_t gtid_31982 = sext_i32_i64(virt_group_id_46124) *
                segmap_group_sizze_32001 + sext_i32_i64(local_tid_46118);
        
        if (slt64(gtid_31982, m_27772)) {
            for (int64_t i_44321 = 0; i_44321 < N_27771; i_44321++) {
                float defunc_0_f_res_32008;
                float redout_44323 = 0.0F;
                
                for (int32_t i_44369 = 0; i_44369 < k2p2zq_27785; i_44369++) {
                    int64_t i_44324 = sext_i32_i64(i_44369);
                    float x_32012 = ((__global float *) mem_44919)[i_44324 *
                                                                   m_27772 +
                                                                   gtid_31982];
                    float x_32013 = ((__global float *) mem_44397)[i_44321 *
                                                                   i32_res_27787 +
                                                                   i_44324];
                    float defunc_1_f_res_32014 = x_32012 * x_32013;
                    float defunc_1_op_res_32011 = defunc_1_f_res_32014 +
                          redout_44323;
                    float redout_tmp_46126 = defunc_1_op_res_32011;
                    
                    redout_44323 = redout_tmp_46126;
                }
                defunc_0_f_res_32008 = redout_44323;
                ((__global float *) mem_44922)[phys_tid_31983 + i_44321 *
                                               (num_groups_32002 *
                                                segmap_group_sizze_32001)] =
                    defunc_0_f_res_32008;
            }
            for (int64_t i_46127 = 0; i_46127 < N_27771; i_46127++) {
                ((__global float *) mem_44937)[i_46127 * m_27772 + gtid_31982] =
                    ((__global float *) mem_44922)[phys_tid_31983 + i_46127 *
                                                   (num_groups_32002 *
                                                    segmap_group_sizze_32001)];
            }
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_32001
}
__kernel void mainDetailedzisegmap_32263(__global int *global_failure,
                                         int64_t N_27771, int64_t m_27772,
                                         __global unsigned char *mem_45163,
                                         __global unsigned char *mem_45166,
                                         __global unsigned char *mem_45172,
                                         __global unsigned char *mem_45175)
{
    #define segmap_group_sizze_32429 (mainDetailedzisegmap_group_sizze_32266)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46344;
    int32_t local_tid_46345;
    int64_t group_sizze_46348;
    int32_t wave_sizze_46347;
    int32_t group_tid_46346;
    
    global_tid_46344 = get_global_id(0);
    local_tid_46345 = get_local_id(0);
    group_sizze_46348 = get_local_size(0);
    wave_sizze_46347 = LOCKSTEP_WIDTH;
    group_tid_46346 = get_group_id(0);
    
    int32_t phys_tid_32263;
    
    phys_tid_32263 = global_tid_46344;
    
    int64_t gtid_32261;
    
    gtid_32261 = squot64(sext_i32_i64(group_tid_46346) *
                         segmap_group_sizze_32429 +
                         sext_i32_i64(local_tid_46345), N_27771);
    
    int64_t gtid_32262;
    
    gtid_32262 = sext_i32_i64(group_tid_46346) * segmap_group_sizze_32429 +
        sext_i32_i64(local_tid_46345) - squot64(sext_i32_i64(group_tid_46346) *
                                                segmap_group_sizze_32429 +
                                                sext_i32_i64(local_tid_46345),
                                                N_27771) * N_27771;
    if (slt64(gtid_32261, m_27772) && slt64(gtid_32262, N_27771)) {
        float x_32437 = ((__global float *) mem_45166)[gtid_32261 * N_27771 +
                                                       gtid_32262];
        int32_t index_primexp_42377 = sext_i64_i32(gtid_32262);
        bool isnan_res_32440;
        
        isnan_res_32440 = futrts_isnan32(x_32437);
        
        bool defunc_0_p_res_32441 = !isnan_res_32440;
        int64_t defunc_1_f_res_32442;
        
        if (defunc_0_p_res_32441) {
            int64_t x_32438 = ((__global int64_t *) mem_45163)[gtid_32261 *
                                                               N_27771 +
                                                               gtid_32262];
            int64_t defunc_1_f_res_t_res_32443 = sub64(x_32438, (int64_t) 1);
            
            defunc_1_f_res_32442 = defunc_1_f_res_t_res_32443;
        } else {
            defunc_1_f_res_32442 = (int64_t) -1;
        }
        if ((sle64((int64_t) 0, gtid_32261) && slt64(gtid_32261, m_27772)) &&
            (sle64((int64_t) 0, defunc_1_f_res_32442) &&
             slt64(defunc_1_f_res_32442, N_27771))) {
            ((__global int32_t *) mem_45175)[gtid_32261 * N_27771 +
                                             defunc_1_f_res_32442] =
                index_primexp_42377;
        }
        if ((sle64((int64_t) 0, gtid_32261) && slt64(gtid_32261, m_27772)) &&
            (sle64((int64_t) 0, defunc_1_f_res_32442) &&
             slt64(defunc_1_f_res_32442, N_27771))) {
            ((__global float *) mem_45172)[gtid_32261 * N_27771 +
                                           defunc_1_f_res_32442] = x_32437;
        }
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_32429
}
__kernel void mainDetailedzisegmap_32339(__global int *global_failure,
                                         int64_t N_27771, int64_t m_27772,
                                         int64_t i_28075, __global
                                         unsigned char *mem_45163, __global
                                         unsigned char *mem_45169)
{
    #define segmap_group_sizze_32393 (mainDetailedzisegmap_group_sizze_32341)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46321;
    int32_t local_tid_46322;
    int64_t group_sizze_46325;
    int32_t wave_sizze_46324;
    int32_t group_tid_46323;
    
    global_tid_46321 = get_global_id(0);
    local_tid_46322 = get_local_id(0);
    group_sizze_46325 = get_local_size(0);
    wave_sizze_46324 = LOCKSTEP_WIDTH;
    group_tid_46323 = get_group_id(0);
    
    int32_t phys_tid_32339;
    
    phys_tid_32339 = global_tid_46321;
    
    int64_t gtid_32338;
    
    gtid_32338 = sext_i32_i64(group_tid_46323) * segmap_group_sizze_32393 +
        sext_i32_i64(local_tid_46322);
    if (slt64(gtid_32338, m_27772)) {
        int64_t last_res_32397 = ((__global int64_t *) mem_45163)[gtid_32338 *
                                                                  N_27771 +
                                                                  i_28075];
        int32_t defunc_0_f_res_32398 = sext_i64_i32(last_res_32397);
        
        ((__global int32_t *) mem_45169)[gtid_32338] = defunc_0_f_res_32398;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_32393
}
__kernel void mainDetailedzisegmap_32602(__global int *global_failure,
                                         int64_t m_27772, float hfrac_27777,
                                         int32_t k2p2_27783, __global
                                         unsigned char *mem_45232, __global
                                         unsigned char *mem_45235, __global
                                         unsigned char *mem_45238, __global
                                         unsigned char *mem_45240)
{
    #define segmap_group_sizze_32695 (mainDetailedzisegmap_group_sizze_32604)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46497;
    int32_t local_tid_46498;
    int64_t group_sizze_46501;
    int32_t wave_sizze_46500;
    int32_t group_tid_46499;
    
    global_tid_46497 = get_global_id(0);
    local_tid_46498 = get_local_id(0);
    group_sizze_46501 = get_local_size(0);
    wave_sizze_46500 = LOCKSTEP_WIDTH;
    group_tid_46499 = get_group_id(0);
    
    int32_t phys_tid_32602;
    
    phys_tid_32602 = global_tid_46497;
    
    int64_t gtid_32601;
    
    gtid_32601 = sext_i32_i64(group_tid_46499) * segmap_group_sizze_32695 +
        sext_i32_i64(local_tid_46498);
    if (slt64(gtid_32601, m_27772)) {
        int32_t defunc_0_f_res_32699 = ((__global
                                         int32_t *) mem_45232)[gtid_32601];
        float defunc_0_f_res_32700 = ((__global float *) mem_45235)[gtid_32601];
        int32_t r32_arg_32701 = sub32(defunc_0_f_res_32699, k2p2_27783);
        float i32_res_32702 = sitofp_i32_f32(r32_arg_32701);
        float sqrt_arg_32703 = defunc_0_f_res_32700 / i32_res_32702;
        float sqrt_res_32704;
        
        sqrt_res_32704 = futrts_sqrt32(sqrt_arg_32703);
        
        float i32_res_32705 = sitofp_i32_f32(defunc_0_f_res_32699);
        float t32_arg_32706 = hfrac_27777 * i32_res_32705;
        int32_t f32_res_32707 = fptosi_f32_i32(t32_arg_32706);
        
        ((__global int32_t *) mem_45238)[gtid_32601] = f32_res_32707;
        ((__global float *) mem_45240)[gtid_32601] = sqrt_res_32704;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_32695
}
__kernel void mainDetailedzisegmap_32902(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t N_27771, int32_t n_27775,
                                         float lam_27778,
                                         int64_t iota32_arg_28203,
                                         float i32_res_28215, __global
                                         unsigned char *mappingindices_mem_44380,
                                         __global unsigned char *mem_45282)
{
    #define segmap_group_sizze_32924 (mainDetailedzisegmap_group_sizze_32904)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46607;
    int32_t local_tid_46608;
    int64_t group_sizze_46611;
    int32_t wave_sizze_46610;
    int32_t group_tid_46609;
    
    global_tid_46607 = get_global_id(0);
    local_tid_46608 = get_local_id(0);
    group_sizze_46611 = get_local_size(0);
    wave_sizze_46610 = LOCKSTEP_WIDTH;
    group_tid_46609 = get_group_id(0);
    
    int32_t phys_tid_32902;
    
    phys_tid_32902 = global_tid_46607;
    
    int64_t gtid_32901;
    
    gtid_32901 = sext_i32_i64(group_tid_46609) * segmap_group_sizze_32924 +
        sext_i32_i64(local_tid_46608);
    if (slt64(gtid_32901, iota32_arg_28203)) {
        int32_t defunc_0_f_res_32928 = sext_i64_i32(gtid_32901);
        int32_t i_32929 = add32(n_27775, defunc_0_f_res_32928);
        int64_t i_32930 = sext_i32_i64(i_32929);
        bool x_32931 = sle64((int64_t) 0, i_32930);
        bool y_32932 = slt64(i_32930, N_27771);
        bool bounds_check_32933 = x_32931 && y_32932;
        bool index_certs_32934;
        
        if (!bounds_check_32933) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 55) == -1) {
                    global_failure_args[0] = i_32930;
                    global_failure_args[1] = N_27771;
                    ;
                }
                return;
            }
        }
        
        int32_t time_32935 = ((__global
                               int32_t *) mappingindices_mem_44380)[i_32930];
        float i32_res_32936 = sitofp_i32_f32(time_32935);
        float logplus_arg_32937 = i32_res_32936 / i32_res_28215;
        bool cond_32938 = 2.7182817F < logplus_arg_32937;
        float logplus_res_32939;
        
        if (cond_32938) {
            float log_res_32940;
            
            log_res_32940 = futrts_log32(logplus_arg_32937);
            logplus_res_32939 = log_res_32940;
        } else {
            logplus_res_32939 = 1.0F;
        }
        
        float sqrt_res_32941;
        
        sqrt_res_32941 = futrts_sqrt32(logplus_res_32939);
        
        float defunc_0_f_res_32942 = lam_27778 * sqrt_res_32941;
        
        ((__global float *) mem_45282)[gtid_32901] = defunc_0_f_res_32942;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_32924
}
__kernel void mainDetailedzisegmap_33188(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t m_27772,
                                         int64_t iota32_arg_28203,
                                         int64_t iota32_arg_28233,
                                         int64_t distance_28243,
                                         int64_t segmap_usable_groups_33421,
                                         __global
                                         unsigned char *defunc_4_map_res_mem_45177,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global unsigned char *mem_45285,
                                         __global unsigned char *mem_45292,
                                         __global unsigned char *mem_45303,
                                         __global unsigned char *mem_45323)
{
    #define segmap_group_sizze_33420 (mainDetailedzisegmap_group_sizze_33190)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46622;
    int32_t local_tid_46623;
    int64_t group_sizze_46626;
    int32_t wave_sizze_46625;
    int32_t group_tid_46624;
    
    global_tid_46622 = get_global_id(0);
    local_tid_46623 = get_local_id(0);
    group_sizze_46626 = get_local_size(0);
    wave_sizze_46625 = LOCKSTEP_WIDTH;
    group_tid_46624 = get_group_id(0);
    
    int32_t phys_tid_33188;
    
    phys_tid_33188 = global_tid_46622;
    
    int64_t gtid_33187;
    
    gtid_33187 = sext_i32_i64(group_tid_46624) * segmap_group_sizze_33420 +
        sext_i32_i64(local_tid_46623);
    if (slt64(gtid_33187, m_27772)) {
        int32_t x_33423 = ((__global
                            int32_t *) defunc_4_map_res_mem_45177)[gtid_33187];
        int32_t x_33424 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_33187];
        int32_t y_33425 = ((__global int32_t *) mem_45285)[gtid_33187];
        
        for (int64_t i_46627 = 0; i_46627 < iota32_arg_28233; i_46627++) {
            ((__global float *) mem_45303)[phys_tid_33188 + i_46627 *
                                           (segmap_usable_groups_33421 *
                                            segmap_group_sizze_33420)] =
                ((__global float *) mem_45292)[gtid_33187 + i_46627 * m_27772];
        }
        for (int64_t i_33428 = 0; i_33428 < distance_28243; i_33428++) {
            int64_t index_primexp_33430 = add64((int64_t) 1, i_33428);
            bool cond_33431 = slt64((int64_t) 0, index_primexp_33430);
            bool loop_cond_33432;
            
            if (cond_33431) {
                bool x_33433 = sle64((int64_t) 0, index_primexp_33430);
                bool y_33434 = slt64(index_primexp_33430, iota32_arg_28233);
                bool bounds_check_33435 = x_33433 && y_33434;
                bool index_certs_33436;
                
                if (!bounds_check_33435) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 58) ==
                            -1) {
                            global_failure_args[0] = index_primexp_33430;
                            global_failure_args[1] = iota32_arg_28233;
                            ;
                        }
                        return;
                    }
                }
                
                float defunc_2_lifted_gt_arg_33437 = ((__global
                                                       float *) mem_45303)[phys_tid_33188 +
                                                                           index_primexp_33430 *
                                                                           (segmap_usable_groups_33421 *
                                                                            segmap_group_sizze_33420)];
                bool y_33438 = slt64(i_33428, iota32_arg_28233);
                bool index_certs_33439;
                
                if (!y_33438) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 59) ==
                            -1) {
                            global_failure_args[0] = i_33428;
                            global_failure_args[1] = iota32_arg_28233;
                            ;
                        }
                        return;
                    }
                }
                
                float defunc_1_lifted_gt_arg_33440 = ((__global
                                                       float *) mem_45303)[phys_tid_33188 +
                                                                           i_33428 *
                                                                           (segmap_usable_groups_33421 *
                                                                            segmap_group_sizze_33420)];
                bool defunc_1_zlze_res_33441 = defunc_1_lifted_gt_arg_33440 <=
                     defunc_2_lifted_gt_arg_33437;
                bool defunc_2_lifted_gt_res_33442 = !defunc_1_zlze_res_33441;
                
                loop_cond_33432 = defunc_2_lifted_gt_res_33442;
            } else {
                loop_cond_33432 = 0;
            }
            
            bool xszq_33443;
            int64_t xszq_33444;
            bool loop_while_33446;
            int64_t j_33447;
            
            loop_while_33446 = loop_cond_33432;
            j_33447 = index_primexp_33430;
            while (loop_while_33446) {
                int64_t loopres_33449 = sub64(j_33447, (int64_t) 1);
                bool x_33450 = sle64((int64_t) 0, j_33447);
                bool y_33451 = slt64(j_33447, iota32_arg_28233);
                bool bounds_check_33452 = x_33450 && y_33451;
                bool index_certs_33453;
                
                if (!bounds_check_33452) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 60) ==
                            -1) {
                            global_failure_args[0] = j_33447;
                            global_failure_args[1] = iota32_arg_28233;
                            ;
                        }
                        return;
                    }
                }
                
                float copy_arg_33454 = ((__global
                                         float *) mem_45303)[phys_tid_33188 +
                                                             j_33447 *
                                                             (segmap_usable_groups_33421 *
                                                              segmap_group_sizze_33420)];
                bool x_33455 = sle64((int64_t) 0, loopres_33449);
                bool y_33456 = slt64(loopres_33449, iota32_arg_28233);
                bool bounds_check_33457 = x_33455 && y_33456;
                bool index_certs_33458;
                
                if (!bounds_check_33457) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 61) ==
                            -1) {
                            global_failure_args[0] = loopres_33449;
                            global_failure_args[1] = iota32_arg_28233;
                            ;
                        }
                        return;
                    }
                }
                
                float copy_arg_33459 = ((__global
                                         float *) mem_45303)[phys_tid_33188 +
                                                             loopres_33449 *
                                                             (segmap_usable_groups_33421 *
                                                              segmap_group_sizze_33420)];
                
                ((__global float *) mem_45303)[phys_tid_33188 + j_33447 *
                                               (segmap_usable_groups_33421 *
                                                segmap_group_sizze_33420)] =
                    copy_arg_33459;
                ((__global float *) mem_45303)[phys_tid_33188 + loopres_33449 *
                                               (segmap_usable_groups_33421 *
                                                segmap_group_sizze_33420)] =
                    copy_arg_33454;
                
                bool cond_33462 = slt64((int64_t) 0, loopres_33449);
                bool loop_cond_33463;
                
                if (cond_33462) {
                    bool index_certs_33464;
                    
                    if (!bounds_check_33457) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          62) == -1) {
                                global_failure_args[0] = loopres_33449;
                                global_failure_args[1] = iota32_arg_28233;
                                ;
                            }
                            return;
                        }
                    }
                    
                    float defunc_2_lifted_gt_arg_33465 = ((__global
                                                           float *) mem_45303)[phys_tid_33188 +
                                                                               loopres_33449 *
                                                                               (segmap_usable_groups_33421 *
                                                                                segmap_group_sizze_33420)];
                    int64_t i_33466 = sub64(loopres_33449, (int64_t) 1);
                    bool x_33467 = sle64((int64_t) 0, i_33466);
                    bool y_33468 = slt64(i_33466, iota32_arg_28233);
                    bool bounds_check_33469 = x_33467 && y_33468;
                    bool index_certs_33470;
                    
                    if (!bounds_check_33469) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          63) == -1) {
                                global_failure_args[0] = i_33466;
                                global_failure_args[1] = iota32_arg_28233;
                                ;
                            }
                            return;
                        }
                    }
                    
                    float defunc_1_lifted_gt_arg_33471 = ((__global
                                                           float *) mem_45303)[phys_tid_33188 +
                                                                               i_33466 *
                                                                               (segmap_usable_groups_33421 *
                                                                                segmap_group_sizze_33420)];
                    bool defunc_1_zlze_res_33472 =
                         defunc_1_lifted_gt_arg_33471 <=
                         defunc_2_lifted_gt_arg_33465;
                    bool defunc_2_lifted_gt_res_33473 =
                         !defunc_1_zlze_res_33472;
                    
                    loop_cond_33463 = defunc_2_lifted_gt_res_33473;
                } else {
                    loop_cond_33463 = 0;
                }
                
                bool loop_while_tmp_46629 = loop_cond_33463;
                int64_t j_tmp_46630 = loopres_33449;
                
                loop_while_33446 = loop_while_tmp_46629;
                j_33447 = j_tmp_46630;
            }
            xszq_33443 = loop_while_33446;
            xszq_33444 = j_33447;
        }
        
        int32_t i_33474 = sdiv32(y_33425, 2);
        int32_t j_33475 = sub32(i_33474, 1);
        bool cond_33476 = x_33423 == x_33424;
        float defunc_0_f_res_33477;
        
        if (cond_33476) {
            defunc_0_f_res_33477 = 0.0F;
        } else {
            int32_t x_33478 = smod32(y_33425, 2);
            bool cond_33479 = x_33478 == 0;
            float defunc_0_f_res_f_res_33480;
            
            if (cond_33479) {
                int64_t j_33481 = sext_i32_i64(j_33475);
                bool x_33482 = sle64((int64_t) 0, j_33481);
                bool y_33483 = slt64(j_33481, iota32_arg_28203);
                bool bounds_check_33484 = x_33482 && y_33483;
                bool index_certs_33485;
                
                if (!bounds_check_33484) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 64) ==
                            -1) {
                            global_failure_args[0] = j_33481;
                            global_failure_args[1] = iota32_arg_28203;
                            ;
                        }
                        return;
                    }
                }
                
                float x_33486 = ((__global float *) mem_45303)[phys_tid_33188 +
                                                               j_33481 *
                                                               (segmap_usable_groups_33421 *
                                                                segmap_group_sizze_33420)];
                int64_t i_33487 = sext_i32_i64(i_33474);
                bool x_33488 = sle64((int64_t) 0, i_33487);
                bool y_33489 = slt64(i_33487, iota32_arg_28203);
                bool bounds_check_33490 = x_33488 && y_33489;
                bool index_certs_33491;
                
                if (!bounds_check_33490) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 65) ==
                            -1) {
                            global_failure_args[0] = i_33487;
                            global_failure_args[1] = iota32_arg_28203;
                            ;
                        }
                        return;
                    }
                }
                
                float y_33492 = ((__global float *) mem_45303)[phys_tid_33188 +
                                                               i_33487 *
                                                               (segmap_usable_groups_33421 *
                                                                segmap_group_sizze_33420)];
                float x_33493 = x_33486 + y_33492;
                float defunc_0_f_res_f_res_t_res_33494 = x_33493 / 2.0F;
                
                defunc_0_f_res_f_res_33480 = defunc_0_f_res_f_res_t_res_33494;
            } else {
                int64_t i_33495 = sext_i32_i64(i_33474);
                bool x_33496 = sle64((int64_t) 0, i_33495);
                bool y_33497 = slt64(i_33495, iota32_arg_28203);
                bool bounds_check_33498 = x_33496 && y_33497;
                bool index_certs_33499;
                
                if (!bounds_check_33498) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 66) ==
                            -1) {
                            global_failure_args[0] = i_33495;
                            global_failure_args[1] = iota32_arg_28203;
                            ;
                        }
                        return;
                    }
                }
                
                float defunc_0_f_res_f_res_f_res_33500 = ((__global
                                                           float *) mem_45303)[phys_tid_33188 +
                                                                               i_33495 *
                                                                               (segmap_usable_groups_33421 *
                                                                                segmap_group_sizze_33420)];
                
                defunc_0_f_res_f_res_33480 = defunc_0_f_res_f_res_f_res_33500;
            }
            defunc_0_f_res_33477 = defunc_0_f_res_f_res_33480;
        }
        ((__global float *) mem_45323)[gtid_33187] = defunc_0_f_res_33477;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_33420
}
__kernel void mainDetailedzisegmap_33309(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t N_27771, int64_t m_27772,
                                         int64_t iota32_arg_28233, __global
                                         unsigned char *defunc_4_map_res_mem_45178,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global unsigned char *mem_45285,
                                         __global unsigned char *mem_45289)
{
    #define segmap_group_sizze_33391 (mainDetailedzisegmap_group_sizze_33312)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46617;
    int32_t local_tid_46618;
    int64_t group_sizze_46621;
    int32_t wave_sizze_46620;
    int32_t group_tid_46619;
    
    global_tid_46617 = get_global_id(0);
    local_tid_46618 = get_local_id(0);
    group_sizze_46621 = get_local_size(0);
    wave_sizze_46620 = LOCKSTEP_WIDTH;
    group_tid_46619 = get_group_id(0);
    
    int32_t phys_tid_33309;
    
    phys_tid_33309 = global_tid_46617;
    
    int64_t gtid_33307;
    
    gtid_33307 = squot64(sext_i32_i64(group_tid_46619) *
                         segmap_group_sizze_33391 +
                         sext_i32_i64(local_tid_46618), iota32_arg_28233);
    
    int64_t gtid_33308;
    
    gtid_33308 = sext_i32_i64(group_tid_46619) * segmap_group_sizze_33391 +
        sext_i32_i64(local_tid_46618) - squot64(sext_i32_i64(group_tid_46619) *
                                                segmap_group_sizze_33391 +
                                                sext_i32_i64(local_tid_46618),
                                                iota32_arg_28233) *
        iota32_arg_28233;
    if (slt64(gtid_33307, m_27772) && slt64(gtid_33308, iota32_arg_28233)) {
        int32_t x_33394 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_33307];
        int32_t y_33396 = ((__global int32_t *) mem_45285)[gtid_33307];
        int32_t index_primexp_42397 = sext_i64_i32(gtid_33308);
        bool cond_33398 = slt32(index_primexp_42397, y_33396);
        bool cond_33399;
        
        if (cond_33398) {
            int32_t i_33400 = add32(x_33394, index_primexp_42397);
            int64_t i_33401 = sext_i32_i64(i_33400);
            bool x_33402 = sle64((int64_t) 0, i_33401);
            bool y_33403 = slt64(i_33401, N_27771);
            bool bounds_check_33404 = x_33402 && y_33403;
            bool index_certs_33405;
            
            if (!bounds_check_33404) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 56) ==
                        -1) {
                        global_failure_args[0] = i_33401;
                        global_failure_args[1] = N_27771;
                        ;
                    }
                    return;
                }
            }
            
            float isnan_arg_33406 = ((__global
                                      float *) defunc_4_map_res_mem_45178)[gtid_33307 *
                                                                           N_27771 +
                                                                           i_33401];
            bool isnan_res_33407;
            
            isnan_res_33407 = futrts_isnan32(isnan_arg_33406);
            
            bool cond_t_res_33408 = !isnan_res_33407;
            
            cond_33399 = cond_t_res_33408;
        } else {
            cond_33399 = 0;
        }
        
        float defunc_0_f_res_33409;
        
        if (cond_33399) {
            int32_t i_33410 = add32(x_33394, index_primexp_42397);
            int64_t i_33411 = sext_i32_i64(i_33410);
            bool x_33412 = sle64((int64_t) 0, i_33411);
            bool y_33413 = slt64(i_33411, N_27771);
            bool bounds_check_33414 = x_33412 && y_33413;
            bool index_certs_33415;
            
            if (!bounds_check_33414) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 57) ==
                        -1) {
                        global_failure_args[0] = i_33411;
                        global_failure_args[1] = N_27771;
                        ;
                    }
                    return;
                }
            }
            
            float defunc_0_f_res_t_res_33416 = ((__global
                                                 float *) defunc_4_map_res_mem_45178)[gtid_33307 *
                                                                                      N_27771 +
                                                                                      i_33411];
            
            defunc_0_f_res_33409 = defunc_0_f_res_t_res_33416;
        } else {
            defunc_0_f_res_33409 = INFINITY;
        }
        ((__global float *) mem_45289)[gtid_33307 * iota32_arg_28233 +
                                       gtid_33308] = defunc_0_f_res_33409;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_33391
}
__kernel void mainDetailedzisegmap_33367(__global int *global_failure,
                                         int64_t m_27772, __global
                                         unsigned char *defunc_4_map_res_mem_45177,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global unsigned char *mem_45285)
{
    #define segmap_group_sizze_33376 (mainDetailedzisegmap_group_sizze_33369)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46612;
    int32_t local_tid_46613;
    int64_t group_sizze_46616;
    int32_t wave_sizze_46615;
    int32_t group_tid_46614;
    
    global_tid_46612 = get_global_id(0);
    local_tid_46613 = get_local_id(0);
    group_sizze_46616 = get_local_size(0);
    wave_sizze_46615 = LOCKSTEP_WIDTH;
    group_tid_46614 = get_group_id(0);
    
    int32_t phys_tid_33367;
    
    phys_tid_33367 = global_tid_46612;
    
    int64_t gtid_33366;
    
    gtid_33366 = sext_i32_i64(group_tid_46614) * segmap_group_sizze_33376 +
        sext_i32_i64(local_tid_46613);
    if (slt64(gtid_33366, m_27772)) {
        int32_t x_33379 = ((__global
                            int32_t *) defunc_4_map_res_mem_45177)[gtid_33366];
        int32_t x_33380 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_33366];
        int32_t y_33381 = sub32(x_33379, x_33380);
        
        ((__global int32_t *) mem_45285)[gtid_33366] = y_33381;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_33376
}
__kernel void mainDetailedzisegmap_33545(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t n_27775,
                                         int64_t iota32_arg_28203,
                                         int64_t iota32_arg_28233,
                                         int64_t num_groups_33660, __global
                                         unsigned char *defunc_4_map_res_mem_45177,
                                         __global
                                         unsigned char *defunc_4_map_res_mem_45178,
                                         __global
                                         unsigned char *defunc_4_map_res_mem_45179,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45244,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45246,
                                         __global
                                         unsigned char *defunc_0_f_res_mem_45279,
                                         __global unsigned char *mem_45282,
                                         __global unsigned char *mem_45326,
                                         __global unsigned char *mem_45340,
                                         __global unsigned char *mem_45354,
                                         __global unsigned char *mem_45369,
                                         __global unsigned char *mem_45372,
                                         __global unsigned char *mem_45374,
                                         __global unsigned char *mem_45376)
{
    #define segmap_group_sizze_33659 (mainDetailedzisegmap_group_sizze_33547)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_46636;
    int32_t local_tid_46637;
    int64_t group_sizze_46640;
    int32_t wave_sizze_46639;
    int32_t group_tid_46638;
    
    global_tid_46636 = get_global_id(0);
    local_tid_46637 = get_local_id(0);
    group_sizze_46640 = get_local_size(0);
    wave_sizze_46639 = LOCKSTEP_WIDTH;
    group_tid_46638 = get_group_id(0);
    
    int32_t phys_tid_33545;
    
    phys_tid_33545 = global_tid_46636;
    
    int32_t phys_group_id_46641;
    
    phys_group_id_46641 = get_group_id(0);
    for (int32_t i_46642 = 0; i_46642 <
         sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_33659)) -
                   phys_group_id_46641, sext_i64_i32(num_groups_33660));
         i_46642++) {
        int32_t virt_group_id_46643 = phys_group_id_46641 + i_46642 *
                sext_i64_i32(num_groups_33660);
        int64_t gtid_33544 = sext_i32_i64(virt_group_id_46643) *
                segmap_group_sizze_33659 + sext_i32_i64(local_tid_46637);
        
        if (slt64(gtid_33544, m_27772)) {
            int32_t x_33666 = ((__global
                                int32_t *) defunc_4_map_res_mem_45177)[gtid_33544];
            int32_t x_33667 = ((__global
                                int32_t *) defunc_3_map_res_mem_45245)[gtid_33544];
            float x_33668 = ((__global
                              float *) defunc_3_map_res_mem_45246)[gtid_33544];
            int32_t x_33669 = ((__global
                                int32_t *) defunc_3_map_res_mem_45244)[gtid_33544];
            float x_33670 = ((__global
                              float *) defunc_0_f_res_mem_45279)[gtid_33544];
            int32_t y_33673 = sub32(x_33666, x_33667);
            float discard_44340;
            float scanacc_44336 = 0.0F;
            
            for (int64_t i_44338 = 0; i_44338 < iota32_arg_28233; i_44338++) {
                int32_t index_primexp_44371 = sext_i64_i32(i_44338);
                bool cond_33679 = sle32(y_33673, index_primexp_44371);
                float defunc_0_f_res_33680;
                
                if (cond_33679) {
                    defunc_0_f_res_33680 = 0.0F;
                } else {
                    bool cond_33681 = index_primexp_44371 == 0;
                    float defunc_0_f_res_f_res_33682;
                    
                    if (cond_33681) {
                        defunc_0_f_res_f_res_33682 = x_33670;
                    } else {
                        int32_t i_33683 = add32(x_33667, index_primexp_44371);
                        int64_t i_33684 = sext_i32_i64(i_33683);
                        bool x_33685 = sle64((int64_t) 0, i_33684);
                        bool y_33686 = slt64(i_33684, N_27771);
                        bool bounds_check_33687 = x_33685 && y_33686;
                        bool index_certs_33688;
                        
                        if (!bounds_check_33687) {
                            {
                                if (atomic_cmpxchg_i32_global(global_failure,
                                                              -1, 67) == -1) {
                                    global_failure_args[0] = i_33684;
                                    global_failure_args[1] = N_27771;
                                    ;
                                }
                                local_failure = true;
                                goto error_0;
                            }
                        }
                        
                        float x_33689 = ((__global
                                          float *) defunc_4_map_res_mem_45178)[gtid_33544 *
                                                                               N_27771 +
                                                                               i_33684];
                        int32_t x_33690 = sub32(x_33667, x_33669);
                        int32_t i_33691 = add32(x_33690, index_primexp_44371);
                        int64_t i_33692 = sext_i32_i64(i_33691);
                        bool x_33693 = sle64((int64_t) 0, i_33692);
                        bool y_33694 = slt64(i_33692, N_27771);
                        bool bounds_check_33695 = x_33693 && y_33694;
                        bool index_certs_33696;
                        
                        if (!bounds_check_33695) {
                            {
                                if (atomic_cmpxchg_i32_global(global_failure,
                                                              -1, 68) == -1) {
                                    global_failure_args[0] = i_33692;
                                    global_failure_args[1] = N_27771;
                                    ;
                                }
                                local_failure = true;
                                goto error_0;
                            }
                        }
                        
                        float y_33697 = ((__global
                                          float *) defunc_4_map_res_mem_45178)[gtid_33544 *
                                                                               N_27771 +
                                                                               i_33692];
                        float defunc_0_f_res_f_res_f_res_33698 = x_33689 -
                              y_33697;
                        
                        defunc_0_f_res_f_res_33682 =
                            defunc_0_f_res_f_res_f_res_33698;
                    }
                    defunc_0_f_res_33680 = defunc_0_f_res_f_res_33682;
                }
                
                float defunc_1_op_res_33677 = defunc_0_f_res_33680 +
                      scanacc_44336;
                
                ((__global float *) mem_45326)[phys_tid_33545 + i_44338 *
                                               (num_groups_33660 *
                                                segmap_group_sizze_33659)] =
                    defunc_1_op_res_33677;
                
                float scanacc_tmp_46644 = defunc_1_op_res_33677;
                
                scanacc_44336 = scanacc_tmp_46644;
            }
            discard_44340 = scanacc_44336;
            
            float i32_res_33699 = sitofp_i32_f32(x_33667);
            float sqrt_res_33700;
            
            sqrt_res_33700 = futrts_sqrt32(i32_res_33699);
            
            float y_33701 = x_33668 * sqrt_res_33700;
            bool defunc_0_f_res_33703;
            int32_t defunc_0_f_res_33704;
            float defunc_0_f_res_33705;
            bool redout_44342;
            int32_t redout_44343;
            float redout_44344;
            
            redout_44342 = 0;
            redout_44343 = -1;
            redout_44344 = 0.0F;
            for (int64_t i_44346 = 0; i_44346 < iota32_arg_28203; i_44346++) {
                float x_33721 = ((__global float *) mem_45326)[phys_tid_33545 +
                                                               i_44346 *
                                                               (num_groups_33660 *
                                                                segmap_group_sizze_33659)];
                float x_33722 = ((__global float *) mem_45282)[i_44346];
                int32_t index_primexp_44372 = sext_i64_i32(i_44346);
                int32_t x_33723 = index_primexp_44372;
                float defunc_0_f_res_33724 = x_33721 / y_33701;
                bool cond_33725 = slt32(index_primexp_44372, y_33673);
                bool isnan_res_33726;
                
                isnan_res_33726 = futrts_isnan32(defunc_0_f_res_33724);
                
                bool cond_t_res_33727 = !isnan_res_33726;
                bool x_33728 = cond_33725 && cond_t_res_33727;
                float abs_res_33729 = (float) fabs(defunc_0_f_res_33724);
                bool defunc_2_f_res_t_res_33730 = x_33722 < abs_res_33729;
                bool x_33731 = x_33728 && defunc_2_f_res_t_res_33730;
                float defunc_1_f_res_33732;
                
                if (cond_33725) {
                    defunc_1_f_res_33732 = defunc_0_f_res_33724;
                } else {
                    defunc_1_f_res_33732 = 0.0F;
                }
                
                bool defunc_1_op_res_33711;
                int32_t defunc_1_op_res_33712;
                
                if (redout_44342) {
                    defunc_1_op_res_33711 = redout_44342;
                    defunc_1_op_res_33712 = redout_44343;
                } else {
                    bool x_33713 = x_33731 && x_33731;
                    bool x_33714 = !x_33731;
                    bool y_33715 = x_33714 && redout_44342;
                    bool defunc_1_op_res_f_res_33716 = x_33713 || y_33715;
                    int32_t defunc_1_op_res_f_res_33717;
                    
                    if (x_33731) {
                        defunc_1_op_res_f_res_33717 = x_33723;
                    } else {
                        defunc_1_op_res_f_res_33717 = redout_44343;
                    }
                    defunc_1_op_res_33711 = defunc_1_op_res_f_res_33716;
                    defunc_1_op_res_33712 = defunc_1_op_res_f_res_33717;
                }
                
                float defunc_1_op_res_33720 = defunc_1_f_res_33732 +
                      redout_44344;
                
                ((__global float *) mem_45340)[phys_tid_33545 + i_44346 *
                                               (num_groups_33660 *
                                                segmap_group_sizze_33659)] =
                    defunc_0_f_res_33724;
                
                bool redout_tmp_46646 = defunc_1_op_res_33711;
                int32_t redout_tmp_46647 = defunc_1_op_res_33712;
                float redout_tmp_46648 = defunc_1_op_res_33720;
                
                redout_44342 = redout_tmp_46646;
                redout_44343 = redout_tmp_46647;
                redout_44344 = redout_tmp_46648;
            }
            defunc_0_f_res_33703 = redout_44342;
            defunc_0_f_res_33704 = redout_44343;
            defunc_0_f_res_33705 = redout_44344;
            
            bool cond_33733 = y_33673 == 0;
            float defunc_0_f_res_33734;
            
            if (cond_33733) {
                defunc_0_f_res_33734 = 0.0F;
            } else {
                float i32_res_33735 = sitofp_i32_f32(y_33673);
                float defunc_0_f_res_f_res_33736 = defunc_0_f_res_33705 /
                      i32_res_33735;
                
                defunc_0_f_res_33734 = defunc_0_f_res_f_res_33736;
            }
            
            bool cond_33737 = !defunc_0_f_res_33703;
            int32_t fst_breakzq_33738;
            
            if (cond_33737) {
                fst_breakzq_33738 = -1;
            } else {
                bool cond_33739 = slt32(defunc_0_f_res_33704, y_33673);
                int32_t adjustValInds_res_33740;
                
                if (cond_33739) {
                    int32_t i_33741 = add32(x_33667, defunc_0_f_res_33704);
                    int64_t i_33742 = sext_i32_i64(i_33741);
                    bool x_33743 = sle64((int64_t) 0, i_33742);
                    bool y_33744 = slt64(i_33742, N_27771);
                    bool bounds_check_33745 = x_33743 && y_33744;
                    bool index_certs_33746;
                    
                    if (!bounds_check_33745) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          69) == -1) {
                                global_failure_args[0] = i_33742;
                                global_failure_args[1] = N_27771;
                                ;
                            }
                            local_failure = true;
                            goto error_0;
                        }
                    }
                    
                    int32_t x_33747 = ((__global
                                        int32_t *) defunc_4_map_res_mem_45179)[gtid_33544 *
                                                                               N_27771 +
                                                                               i_33742];
                    int32_t adjustValInds_res_t_res_33748 = sub32(x_33747,
                                                                  n_27775);
                    
                    adjustValInds_res_33740 = adjustValInds_res_t_res_33748;
                } else {
                    adjustValInds_res_33740 = -1;
                }
                fst_breakzq_33738 = adjustValInds_res_33740;
            }
            
            bool cond_33749 = sle32(x_33667, 5);
            bool cond_f_res_33750 = sle32(y_33673, 5);
            bool x_33751 = !cond_33749;
            bool y_33752 = cond_f_res_33750 && x_33751;
            bool cond_33753 = cond_33749 || y_33752;
            int32_t fst_breakzq_33754;
            
            if (cond_33753) {
                fst_breakzq_33754 = -2;
            } else {
                fst_breakzq_33754 = fst_breakzq_33738;
            }
            for (int64_t i_46650 = 0; i_46650 < iota32_arg_28233; i_46650++) {
                ((__global float *) mem_45354)[phys_tid_33545 + i_46650 *
                                               (num_groups_33660 *
                                                segmap_group_sizze_33659)] =
                    NAN;
            }
            for (int64_t write_iter_44348 = 0; write_iter_44348 <
                 iota32_arg_28233; write_iter_44348++) {
                int32_t index_primexp_44374 = sext_i64_i32(write_iter_44348);
                bool cond_33760 = slt32(index_primexp_44374, y_33673);
                int32_t defunc_0_f_res_33761;
                
                if (cond_33760) {
                    int32_t i_33762 = add32(x_33667, index_primexp_44374);
                    int64_t i_33763 = sext_i32_i64(i_33762);
                    bool x_33764 = sle64((int64_t) 0, i_33763);
                    bool y_33765 = slt64(i_33763, N_27771);
                    bool bounds_check_33766 = x_33764 && y_33765;
                    bool index_certs_33767;
                    
                    if (!bounds_check_33766) {
                        {
                            if (atomic_cmpxchg_i32_global(global_failure, -1,
                                                          70) == -1) {
                                global_failure_args[0] = i_33763;
                                global_failure_args[1] = N_27771;
                                ;
                            }
                            local_failure = true;
                            goto error_0;
                        }
                    }
                    
                    int32_t x_33768 = ((__global
                                        int32_t *) defunc_4_map_res_mem_45179)[gtid_33544 *
                                                                               N_27771 +
                                                                               i_33763];
                    int32_t defunc_0_f_res_t_res_33769 = sub32(x_33768,
                                                               n_27775);
                    
                    defunc_0_f_res_33761 = defunc_0_f_res_t_res_33769;
                } else {
                    defunc_0_f_res_33761 = -1;
                }
                
                int64_t defunc_0_f_res_33770 =
                        sext_i32_i64(defunc_0_f_res_33761);
                bool less_than_zzero_44352 = slt64(defunc_0_f_res_33770,
                                                   (int64_t) 0);
                bool greater_than_sizze_44353 = sle64(iota32_arg_28233,
                                                      defunc_0_f_res_33770);
                bool outside_bounds_dim_44354 = less_than_zzero_44352 ||
                     greater_than_sizze_44353;
                
                if (!outside_bounds_dim_44354) {
                    for (int64_t i_46652 = 0; i_46652 < (int64_t) 1;
                         i_46652++) {
                        ((__global float *) mem_45354)[phys_tid_33545 +
                                                       (defunc_0_f_res_33770 +
                                                        i_46652) *
                                                       (num_groups_33660 *
                                                        segmap_group_sizze_33659)] =
                            ((__global float *) mem_45340)[phys_tid_33545 +
                                                           num_groups_33660 *
                                                           segmap_group_sizze_33659 *
                                                           write_iter_44348 +
                                                           i_46652 *
                                                           (num_groups_33660 *
                                                            segmap_group_sizze_33659)];
                    }
                }
            }
            for (int64_t i_46653 = 0; i_46653 < iota32_arg_28203; i_46653++) {
                ((__global float *) mem_45369)[i_46653 * m_27772 + gtid_33544] =
                    ((__global float *) mem_45354)[phys_tid_33545 + i_46653 *
                                                   (num_groups_33660 *
                                                    segmap_group_sizze_33659)];
            }
            for (int64_t i_46654 = 0; i_46654 < iota32_arg_28203; i_46654++) {
                ((__global float *) mem_45372)[i_46654 * m_27772 + gtid_33544] =
                    ((__global float *) mem_45340)[phys_tid_33545 + i_46654 *
                                                   (num_groups_33660 *
                                                    segmap_group_sizze_33659)];
            }
            ((__global int32_t *) mem_45374)[gtid_33544] = fst_breakzq_33754;
            ((__global float *) mem_45376)[gtid_33544] = defunc_0_f_res_33734;
        }
        barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_33659
}
__kernel void mainDetailedzisegmap_33893(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t n_27775,
                                         int64_t iota32_arg_28203,
                                         int64_t iota32_arg_28233, __global
                                         unsigned char *defunc_4_map_res_mem_45179,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global unsigned char *mem_45399,
                                         __global unsigned char *mem_45416,
                                         __global unsigned char *mem_45424)
{
    #define segmap_group_sizze_34224 (mainDetailedzisegmap_group_sizze_33896)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46853;
    int32_t local_tid_46854;
    int64_t group_sizze_46857;
    int32_t wave_sizze_46856;
    int32_t group_tid_46855;
    
    global_tid_46853 = get_global_id(0);
    local_tid_46854 = get_local_id(0);
    group_sizze_46857 = get_local_size(0);
    wave_sizze_46856 = LOCKSTEP_WIDTH;
    group_tid_46855 = get_group_id(0);
    
    int32_t phys_tid_33893;
    
    phys_tid_33893 = global_tid_46853;
    
    int64_t gtid_33891;
    
    gtid_33891 = squot64(sext_i32_i64(group_tid_46855) *
                         segmap_group_sizze_34224 +
                         sext_i32_i64(local_tid_46854), iota32_arg_28233);
    
    int64_t gtid_33892;
    
    gtid_33892 = sext_i32_i64(group_tid_46855) * segmap_group_sizze_34224 +
        sext_i32_i64(local_tid_46854) - squot64(sext_i32_i64(group_tid_46855) *
                                                segmap_group_sizze_34224 +
                                                sext_i32_i64(local_tid_46854),
                                                iota32_arg_28233) *
        iota32_arg_28233;
    if (slt64(gtid_33891, m_27772) && slt64(gtid_33892, iota32_arg_28233)) {
        int32_t y_34229 = ((__global int32_t *) mem_45399)[gtid_33891];
        int32_t index_primexp_42427 = sext_i64_i32(gtid_33892);
        int64_t binop_x_42415 = iota32_arg_28233 * gtid_33891;
        int64_t binop_x_42416 = gtid_33892 + binop_x_42415;
        int64_t new_index_42417 = squot64(binop_x_42416, iota32_arg_28203);
        int64_t binop_y_42423 = iota32_arg_28203 * new_index_42417;
        int64_t new_index_42424 = binop_x_42416 - binop_y_42423;
        float write_value_34233 = ((__global
                                    float *) mem_45416)[new_index_42417 *
                                                        iota32_arg_28203 +
                                                        new_index_42424];
        bool cond_34234 = slt32(index_primexp_42427, y_34229);
        int32_t defunc_0_f_res_34235;
        
        if (cond_34234) {
            int32_t x_34227 = ((__global
                                int32_t *) defunc_3_map_res_mem_45245)[gtid_33891];
            int32_t i_34236 = add32(x_34227, index_primexp_42427);
            int64_t i_34237 = sext_i32_i64(i_34236);
            bool x_34238 = sle64((int64_t) 0, i_34237);
            bool y_34239 = slt64(i_34237, N_27771);
            bool bounds_check_34240 = x_34238 && y_34239;
            bool index_certs_34241;
            
            if (!bounds_check_34240) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 78) ==
                        -1) {
                        global_failure_args[0] = i_34237;
                        global_failure_args[1] = N_27771;
                        ;
                    }
                    return;
                }
            }
            
            int32_t x_34242 = ((__global
                                int32_t *) defunc_4_map_res_mem_45179)[gtid_33891 *
                                                                       N_27771 +
                                                                       i_34237];
            int32_t defunc_0_f_res_t_res_34243 = sub32(x_34242, n_27775);
            
            defunc_0_f_res_34235 = defunc_0_f_res_t_res_34243;
        } else {
            defunc_0_f_res_34235 = -1;
        }
        
        int64_t defunc_0_f_res_34244 = sext_i32_i64(defunc_0_f_res_34235);
        
        if ((sle64((int64_t) 0, gtid_33891) && slt64(gtid_33891, m_27772)) &&
            (sle64((int64_t) 0, defunc_0_f_res_34244) &&
             slt64(defunc_0_f_res_34244, iota32_arg_28233))) {
            ((__global float *) mem_45424)[gtid_33891 * iota32_arg_28233 +
                                           defunc_0_f_res_34244] =
                write_value_34233;
        }
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_34224
}
__kernel void mainDetailedzisegmap_33950(__global int *global_failure,
                                         int failure_is_an_option, __global
                                         int64_t *global_failure_args,
                                         int64_t N_27771, int64_t m_27772,
                                         int32_t n_27775, __global
                                         unsigned char *defunc_4_map_res_mem_45179,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global unsigned char *mem_45399,
                                         __global unsigned char *mem_45409,
                                         __global unsigned char *mem_45411,
                                         __global unsigned char *mem_45413,
                                         __global unsigned char *mem_45419,
                                         __global unsigned char *mem_45421)
{
    #define segmap_group_sizze_34175 (mainDetailedzisegmap_group_sizze_33952)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46848;
    int32_t local_tid_46849;
    int64_t group_sizze_46852;
    int32_t wave_sizze_46851;
    int32_t group_tid_46850;
    
    global_tid_46848 = get_global_id(0);
    local_tid_46849 = get_local_id(0);
    group_sizze_46852 = get_local_size(0);
    wave_sizze_46851 = LOCKSTEP_WIDTH;
    group_tid_46850 = get_group_id(0);
    
    int32_t phys_tid_33950;
    
    phys_tid_33950 = global_tid_46848;
    
    int64_t gtid_33949;
    
    gtid_33949 = sext_i32_i64(group_tid_46850) * segmap_group_sizze_34175 +
        sext_i32_i64(local_tid_46849);
    if (slt64(gtid_33949, m_27772)) {
        int32_t x_34179 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_33949];
        int32_t y_34181 = ((__global int32_t *) mem_45399)[gtid_33949];
        bool defunc_0_f_res_34182 = ((__global bool *) mem_45409)[gtid_33949];
        bool cond_34185 = y_34181 == 0;
        float defunc_0_f_res_34186;
        
        if (cond_34185) {
            defunc_0_f_res_34186 = 0.0F;
        } else {
            float defunc_0_f_res_34184 = ((__global
                                           float *) mem_45413)[gtid_33949];
            float i32_res_34187 = sitofp_i32_f32(y_34181);
            float defunc_0_f_res_f_res_34188 = defunc_0_f_res_34184 /
                  i32_res_34187;
            
            defunc_0_f_res_34186 = defunc_0_f_res_f_res_34188;
        }
        
        bool cond_34189 = !defunc_0_f_res_34182;
        int32_t fst_breakzq_34190;
        
        if (cond_34189) {
            fst_breakzq_34190 = -1;
        } else {
            int32_t defunc_0_f_res_34183 = ((__global
                                             int32_t *) mem_45411)[gtid_33949];
            bool cond_34191 = slt32(defunc_0_f_res_34183, y_34181);
            int32_t adjustValInds_res_34192;
            
            if (cond_34191) {
                int32_t i_34193 = add32(x_34179, defunc_0_f_res_34183);
                int64_t i_34194 = sext_i32_i64(i_34193);
                bool x_34195 = sle64((int64_t) 0, i_34194);
                bool y_34196 = slt64(i_34194, N_27771);
                bool bounds_check_34197 = x_34195 && y_34196;
                bool index_certs_34198;
                
                if (!bounds_check_34197) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 77) ==
                            -1) {
                            global_failure_args[0] = i_34194;
                            global_failure_args[1] = N_27771;
                            ;
                        }
                        return;
                    }
                }
                
                int32_t x_34199 = ((__global
                                    int32_t *) defunc_4_map_res_mem_45179)[gtid_33949 *
                                                                           N_27771 +
                                                                           i_34194];
                int32_t adjustValInds_res_t_res_34200 = sub32(x_34199, n_27775);
                
                adjustValInds_res_34192 = adjustValInds_res_t_res_34200;
            } else {
                adjustValInds_res_34192 = -1;
            }
            fst_breakzq_34190 = adjustValInds_res_34192;
        }
        
        bool cond_34201 = sle32(x_34179, 5);
        bool cond_f_res_34202 = sle32(y_34181, 5);
        bool x_34203 = !cond_34201;
        bool y_34204 = cond_f_res_34202 && x_34203;
        bool cond_34205 = cond_34201 || y_34204;
        int32_t fst_breakzq_34206;
        
        if (cond_34205) {
            fst_breakzq_34206 = -2;
        } else {
            fst_breakzq_34206 = fst_breakzq_34190;
        }
        ((__global int32_t *) mem_45419)[gtid_33949] = fst_breakzq_34206;
        ((__global float *) mem_45421)[gtid_33949] = defunc_0_f_res_34186;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_34175
}
__kernel void mainDetailedzisegmap_34026(__global int *global_failure,
                                         int64_t m_27772, __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45246,
                                         __global unsigned char *mem_45406)
{
    #define segmap_group_sizze_34127 (mainDetailedzisegmap_group_sizze_34028)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46745;
    int32_t local_tid_46746;
    int64_t group_sizze_46749;
    int32_t wave_sizze_46748;
    int32_t group_tid_46747;
    
    global_tid_46745 = get_global_id(0);
    local_tid_46746 = get_local_id(0);
    group_sizze_46749 = get_local_size(0);
    wave_sizze_46748 = LOCKSTEP_WIDTH;
    group_tid_46747 = get_group_id(0);
    
    int32_t phys_tid_34026;
    
    phys_tid_34026 = global_tid_46745;
    
    int64_t gtid_34025;
    
    gtid_34025 = sext_i32_i64(group_tid_46747) * segmap_group_sizze_34127 +
        sext_i32_i64(local_tid_46746);
    if (slt64(gtid_34025, m_27772)) {
        int32_t x_34130 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_34025];
        float x_34131 = ((__global
                          float *) defunc_3_map_res_mem_45246)[gtid_34025];
        float i32_res_34132 = sitofp_i32_f32(x_34130);
        float sqrt_res_34133;
        
        sqrt_res_34133 = futrts_sqrt32(i32_res_34132);
        
        float y_34134 = x_34131 * sqrt_res_34133;
        
        ((__global float *) mem_45406)[gtid_34025] = y_34134;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_34127
}
__kernel void mainDetailedzisegmap_34076(__global int *global_failure,
                                         int64_t m_27772, __global
                                         unsigned char *defunc_4_map_res_mem_45177,
                                         __global
                                         unsigned char *defunc_3_map_res_mem_45245,
                                         __global unsigned char *mem_45399)
{
    #define segmap_group_sizze_34085 (mainDetailedzisegmap_group_sizze_34078)
    
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46684;
    int32_t local_tid_46685;
    int64_t group_sizze_46688;
    int32_t wave_sizze_46687;
    int32_t group_tid_46686;
    
    global_tid_46684 = get_global_id(0);
    local_tid_46685 = get_local_id(0);
    group_sizze_46688 = get_local_size(0);
    wave_sizze_46687 = LOCKSTEP_WIDTH;
    group_tid_46686 = get_group_id(0);
    
    int32_t phys_tid_34076;
    
    phys_tid_34076 = global_tid_46684;
    
    int64_t gtid_34075;
    
    gtid_34075 = sext_i32_i64(group_tid_46686) * segmap_group_sizze_34085 +
        sext_i32_i64(local_tid_46685);
    if (slt64(gtid_34075, m_27772)) {
        int32_t x_34088 = ((__global
                            int32_t *) defunc_4_map_res_mem_45177)[gtid_34075];
        int32_t x_34089 = ((__global
                            int32_t *) defunc_3_map_res_mem_45245)[gtid_34075];
        int32_t y_34090 = sub32(x_34088, x_34089);
        
        ((__global int32_t *) mem_45399)[gtid_34075] = y_34090;
    }
    
  error_0:
    return;
    #undef segmap_group_sizze_34085
}
__kernel void mainDetailedzisegmap_intragroup_30688(__global
                                                    int *global_failure,
                                                    int failure_is_an_option,
                                                    __global
                                                    int64_t *global_failure_args,
                                                    __local volatile
                                                    int64_t *mem_44563_backing_aligned_0,
                                                    __local volatile
                                                    int64_t *mem_44553_backing_aligned_1,
                                                    int32_t k2p2zq_27785,
                                                    int64_t i32_res_27787,
                                                    int32_t m_27918,
                                                    int64_t nm_27920,
                                                    int64_t i32_res_27935,
                                                    __global
                                                    unsigned char *defunc_3_map_res_mem_44549,
                                                    __global
                                                    unsigned char *mem_44573)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44563_backing_1 = (__local volatile
                                                           char *) mem_44563_backing_aligned_0;
    __local volatile char *restrict mem_44553_backing_0 = (__local volatile
                                                           char *) mem_44553_backing_aligned_1;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_45832;
    int32_t local_tid_45833;
    int64_t group_sizze_45836;
    int32_t wave_sizze_45835;
    int32_t group_tid_45834;
    
    global_tid_45832 = get_global_id(0);
    local_tid_45833 = get_local_id(0);
    group_sizze_45836 = get_local_size(0);
    wave_sizze_45835 = LOCKSTEP_WIDTH;
    group_tid_45834 = get_group_id(0);
    
    int32_t phys_tid_30688;
    
    phys_tid_30688 = group_tid_45834;
    
    int32_t ltid_pre_45837;
    
    ltid_pre_45837 = local_tid_45833;
    
    int64_t gtid_30619;
    
    gtid_30619 = sext_i32_i64(group_tid_45834);
    
    __local char *mem_44553;
    
    mem_44553 = (__local char *) mem_44553_backing_0;
    
    int64_t gtid_30622 = sext_i32_i64(ltid_pre_45837);
    int32_t phys_tid_30623 = local_tid_45833;
    int32_t index_primexp_42354 = sext_i64_i32(gtid_30622);
    int32_t defunc_0_f_res_30869 = sdiv32(index_primexp_42354, m_27918);
    int32_t defunc_0_f_res_30870 = smod32(index_primexp_42354, m_27918);
    bool cond_30871 = slt32(defunc_0_f_res_30870, k2p2zq_27785);
    float defunc_0_f_res_30872;
    
    if (cond_30871) {
        int64_t i_30873 = sext_i32_i64(defunc_0_f_res_30869);
        bool x_30874 = sle64((int64_t) 0, i_30873);
        bool y_30875 = slt64(i_30873, i32_res_27787);
        bool bounds_check_30876 = x_30874 && y_30875;
        int64_t j_30877 = sext_i32_i64(defunc_0_f_res_30870);
        bool x_30878 = sle64((int64_t) 0, j_30877);
        bool y_30879 = slt64(j_30877, i32_res_27787);
        bool bounds_check_30880 = x_30878 && y_30879;
        bool index_ok_30881 = bounds_check_30876 && bounds_check_30880;
        bool index_certs_30882;
        
        if (!index_ok_30881) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 31) == -1) {
                    global_failure_args[0] = i_30873;
                    global_failure_args[1] = j_30877;
                    global_failure_args[2] = i32_res_27787;
                    global_failure_args[3] = i32_res_27787;
                    ;
                }
                local_failure = true;
                goto error_0;
            }
        }
        
        float defunc_0_f_res_t_res_30883 = ((__global
                                             float *) defunc_3_map_res_mem_44549)[gtid_30619 *
                                                                                  (i32_res_27787 *
                                                                                   i32_res_27787) +
                                                                                  i_30873 *
                                                                                  i32_res_27787 +
                                                                                  j_30877];
        
        defunc_0_f_res_30872 = defunc_0_f_res_t_res_30883;
    } else {
        int32_t y_30884 = add32(k2p2zq_27785, defunc_0_f_res_30869);
        bool cond_30885 = defunc_0_f_res_30870 == y_30884;
        float defunc_0_f_res_f_res_30886;
        
        if (cond_30885) {
            defunc_0_f_res_f_res_30886 = 1.0F;
        } else {
            defunc_0_f_res_f_res_30886 = 0.0F;
        }
        defunc_0_f_res_30872 = defunc_0_f_res_f_res_30886;
    }
    ((__local float *) mem_44553)[gtid_30622] = defunc_0_f_res_30872;
    
  error_0:
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_failure)
        return;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    __local char *mem_44563;
    
    mem_44563 = (__local char *) mem_44563_backing_1;
    for (int32_t i_30888 = 0; i_30888 < k2p2zq_27785; i_30888++) {
        int64_t i32_res_30890 = sext_i32_i64(i_30888);
        bool x_30891 = sle64((int64_t) 0, i32_res_30890);
        bool y_30892 = slt64(i32_res_30890, nm_27920);
        bool bounds_check_30893 = x_30891 && y_30892;
        bool index_certs_30894;
        
        if (!bounds_check_30893) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 32) == -1) {
                    global_failure_args[0] = i32_res_30890;
                    global_failure_args[1] = nm_27920;
                    ;
                }
                local_failure = true;
                goto error_1;
            }
        }
        
        float v1_30895 = ((__local float *) mem_44553)[i32_res_30890];
        bool cond_30896 = v1_30895 == 0.0F;
        int64_t gtid_30643 = sext_i32_i64(ltid_pre_45837);
        int32_t phys_tid_30644 = local_tid_45833;
        int32_t defunc_0_f_res_30899 = sext_i64_i32(gtid_30643);
        int32_t defunc_0_f_res_30900 = sdiv32(defunc_0_f_res_30899, m_27918);
        int32_t defunc_0_f_res_30901 = smod32(defunc_0_f_res_30899, m_27918);
        float defunc_0_f_res_30902;
        
        if (cond_30896) {
            int32_t x_30903 = mul32(m_27918, defunc_0_f_res_30900);
            int32_t i32_arg_30904 = add32(defunc_0_f_res_30901, x_30903);
            int64_t i32_res_30905 = sext_i32_i64(i32_arg_30904);
            bool x_30906 = sle64((int64_t) 0, i32_res_30905);
            bool y_30907 = slt64(i32_res_30905, nm_27920);
            bool bounds_check_30908 = x_30906 && y_30907;
            bool index_certs_30909;
            
            if (!bounds_check_30908) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 33) ==
                        -1) {
                        global_failure_args[0] = i32_res_30905;
                        global_failure_args[1] = nm_27920;
                        ;
                    }
                    local_failure = true;
                    goto error_1;
                }
            }
            
            float defunc_0_f_res_t_res_30910 = ((__local
                                                 float *) mem_44553)[i32_res_30905];
            
            defunc_0_f_res_30902 = defunc_0_f_res_t_res_30910;
        } else {
            int64_t i32_res_30911 = sext_i32_i64(defunc_0_f_res_30901);
            bool x_30912 = sle64((int64_t) 0, i32_res_30911);
            bool y_30913 = slt64(i32_res_30911, nm_27920);
            bool bounds_check_30914 = x_30912 && y_30913;
            bool index_certs_30915;
            
            if (!bounds_check_30914) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 34) ==
                        -1) {
                        global_failure_args[0] = i32_res_30911;
                        global_failure_args[1] = nm_27920;
                        ;
                    }
                    local_failure = true;
                    goto error_1;
                }
            }
            
            float x_30916 = ((__local float *) mem_44553)[i32_res_30911];
            float x_30917 = x_30916 / v1_30895;
            int32_t y_30918 = sub32(k2p2zq_27785, 1);
            bool cond_30919 = slt32(defunc_0_f_res_30900, y_30918);
            float defunc_0_f_res_f_res_30920;
            
            if (cond_30919) {
                int32_t x_30921 = add32(1, defunc_0_f_res_30900);
                int32_t x_30922 = mul32(m_27918, x_30921);
                int32_t i32_arg_30923 = add32(defunc_0_f_res_30901, x_30922);
                int64_t i32_res_30924 = sext_i32_i64(i32_arg_30923);
                bool x_30925 = sle64((int64_t) 0, i32_res_30924);
                bool y_30926 = slt64(i32_res_30924, nm_27920);
                bool bounds_check_30927 = x_30925 && y_30926;
                bool index_certs_30928;
                
                if (!bounds_check_30927) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 35) ==
                            -1) {
                            global_failure_args[0] = i32_res_30924;
                            global_failure_args[1] = nm_27920;
                            ;
                        }
                        local_failure = true;
                        goto error_1;
                    }
                }
                
                float x_30929 = ((__local float *) mem_44553)[i32_res_30924];
                int32_t i32_arg_30930 = add32(i_30888, x_30922);
                int64_t i32_res_30931 = sext_i32_i64(i32_arg_30930);
                bool x_30932 = sle64((int64_t) 0, i32_res_30931);
                bool y_30933 = slt64(i32_res_30931, nm_27920);
                bool bounds_check_30934 = x_30932 && y_30933;
                bool index_certs_30935;
                
                if (!bounds_check_30934) {
                    {
                        if (atomic_cmpxchg_i32_global(global_failure, -1, 36) ==
                            -1) {
                            global_failure_args[0] = i32_res_30931;
                            global_failure_args[1] = nm_27920;
                            ;
                        }
                        local_failure = true;
                        goto error_1;
                    }
                }
                
                float x_30936 = ((__local float *) mem_44553)[i32_res_30931];
                float y_30937 = x_30917 * x_30936;
                float defunc_0_f_res_f_res_t_res_30938 = x_30929 - y_30937;
                
                defunc_0_f_res_f_res_30920 = defunc_0_f_res_f_res_t_res_30938;
            } else {
                defunc_0_f_res_f_res_30920 = x_30917;
            }
            defunc_0_f_res_30902 = defunc_0_f_res_f_res_30920;
        }
        ((__local float *) mem_44563)[gtid_30643] = defunc_0_f_res_30902;
        
      error_1:
        barrier(CLK_LOCAL_MEM_FENCE);
        if (local_failure)
            return;
        barrier(CLK_LOCAL_MEM_FENCE);
        
        int64_t write_i_30686 = sext_i32_i64(ltid_pre_45837);
        int32_t phys_tid_30687 = local_tid_45833;
        float write_value_30941 = ((__local float *) mem_44563)[write_i_30686];
        
        if (sle64((int64_t) 0, write_i_30686) && slt64(write_i_30686,
                                                       nm_27920)) {
            ((__local float *) mem_44553)[write_i_30686] = write_value_30941;
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    for (int64_t i_45839 = 0; i_45839 < sdiv_up64(i32_res_27787 *
                                                  i32_res_27787 -
                                                  sext_i32_i64(local_tid_45833),
                                                  nm_27920); i_45839++) {
        ((__global float *) mem_44573)[gtid_30619 * (i32_res_27787 *
                                                     i32_res_27787) +
                                       squot64(i_45839 * nm_27920 +
                                               sext_i32_i64(local_tid_45833),
                                               i32_res_27787) * i32_res_27787 +
                                       (i_45839 * nm_27920 +
                                        sext_i32_i64(local_tid_45833) -
                                        squot64(i_45839 * nm_27920 +
                                                sext_i32_i64(local_tid_45833),
                                                i32_res_27787) *
                                        i32_res_27787)] = ((__local
                                                            float *) mem_44553)[i32_res_27787 +
                                                                                (squot64(i_45839 *
                                                                                         nm_27920 +
                                                                                         sext_i32_i64(local_tid_45833),
                                                                                         i32_res_27787) *
                                                                                 i32_res_27935 +
                                                                                 (i_45839 *
                                                                                  nm_27920 +
                                                                                  sext_i32_i64(local_tid_45833) -
                                                                                  squot64(i_45839 *
                                                                                          nm_27920 +
                                                                                          sext_i32_i64(local_tid_45833),
                                                                                          i32_res_27787) *
                                                                                  i32_res_27787))];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    
  error_3:
    return;
}
__kernel void mainDetailedzisegmap_intragroup_31015(__global
                                                    int *global_failure,
                                                    int failure_is_an_option,
                                                    __global
                                                    int64_t *global_failure_args,
                                                    __local volatile
                                                    int64_t *mem_44594_backing_aligned_0,
                                                    int64_t m_27772,
                                                    int32_t k2p2zq_27785,
                                                    int32_t m_27918,
                                                    int64_t nm_27920,
                                                    int32_t i_31554,
                                                    int64_t i32_res_31556,
                                                    int64_t ctx_param_ext_44580,
                                                    int64_t ctx_param_ext_44581,
                                                    int64_t ctx_param_ext_44583,
                                                    __global
                                                    unsigned char *mem_param_44585,
                                                    __global
                                                    unsigned char *mem_44590,
                                                    __global
                                                    unsigned char *mem_44598)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_44594_backing_0 = (__local volatile
                                                           char *) mem_44594_backing_aligned_0;
    volatile __local bool local_failure;
    
    if (failure_is_an_option) {
        int failed = *global_failure >= 0;
        
        if (failed)
            return;
    }
    local_failure = false;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int32_t global_tid_45866;
    int32_t local_tid_45867;
    int64_t group_sizze_45870;
    int32_t wave_sizze_45869;
    int32_t group_tid_45868;
    
    global_tid_45866 = get_global_id(0);
    local_tid_45867 = get_local_id(0);
    group_sizze_45870 = get_local_size(0);
    wave_sizze_45869 = LOCKSTEP_WIDTH;
    group_tid_45868 = get_group_id(0);
    
    int32_t phys_tid_31015;
    
    phys_tid_31015 = group_tid_45868;
    
    int32_t ltid_pre_45871;
    
    ltid_pre_45871 = local_tid_45867;
    
    int64_t gtid_30967;
    
    gtid_30967 = sext_i32_i64(group_tid_45868);
    
    float v1_31573 = ((__global float *) mem_param_44585)[ctx_param_ext_44580 +
                                                          (gtid_30967 *
                                                           ctx_param_ext_44581 +
                                                           i32_res_31556 *
                                                           ctx_param_ext_44583)];
    bool cond_31574 = v1_31573 == 0.0F;
    __local char *mem_44594;
    
    mem_44594 = (__local char *) mem_44594_backing_0;
    
    int64_t gtid_30970 = sext_i32_i64(ltid_pre_45871);
    int32_t phys_tid_30971 = local_tid_45867;
    int32_t defunc_0_f_res_31577 = sext_i64_i32(gtid_30970);
    int32_t defunc_0_f_res_31578 = sdiv32(defunc_0_f_res_31577, m_27918);
    int32_t defunc_0_f_res_31579 = smod32(defunc_0_f_res_31577, m_27918);
    float defunc_0_f_res_31580;
    
    if (cond_31574) {
        int32_t x_31581 = mul32(m_27918, defunc_0_f_res_31578);
        int32_t i32_arg_31582 = add32(defunc_0_f_res_31579, x_31581);
        int64_t i32_res_31583 = sext_i32_i64(i32_arg_31582);
        bool x_31584 = sle64((int64_t) 0, i32_res_31583);
        bool y_31585 = slt64(i32_res_31583, nm_27920);
        bool bounds_check_31586 = x_31584 && y_31585;
        bool index_certs_31587;
        
        if (!bounds_check_31586) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 38) == -1) {
                    global_failure_args[0] = i32_res_31583;
                    global_failure_args[1] = nm_27920;
                    ;
                }
                local_failure = true;
                goto error_0;
            }
        }
        
        float defunc_0_f_res_t_res_31588 = ((__global
                                             float *) mem_param_44585)[ctx_param_ext_44580 +
                                                                       (gtid_30967 *
                                                                        ctx_param_ext_44581 +
                                                                        i32_res_31583 *
                                                                        ctx_param_ext_44583)];
        
        defunc_0_f_res_31580 = defunc_0_f_res_t_res_31588;
    } else {
        int64_t i32_res_31589 = sext_i32_i64(defunc_0_f_res_31579);
        bool x_31590 = sle64((int64_t) 0, i32_res_31589);
        bool y_31591 = slt64(i32_res_31589, nm_27920);
        bool bounds_check_31592 = x_31590 && y_31591;
        bool index_certs_31593;
        
        if (!bounds_check_31592) {
            {
                if (atomic_cmpxchg_i32_global(global_failure, -1, 39) == -1) {
                    global_failure_args[0] = i32_res_31589;
                    global_failure_args[1] = nm_27920;
                    ;
                }
                local_failure = true;
                goto error_0;
            }
        }
        
        float x_31594 = ((__global
                          float *) mem_param_44585)[ctx_param_ext_44580 +
                                                    (gtid_30967 *
                                                     ctx_param_ext_44581 +
                                                     i32_res_31589 *
                                                     ctx_param_ext_44583)];
        float x_31595 = x_31594 / v1_31573;
        int32_t y_31596 = sub32(k2p2zq_27785, 1);
        bool cond_31597 = slt32(defunc_0_f_res_31578, y_31596);
        float defunc_0_f_res_f_res_31598;
        
        if (cond_31597) {
            int32_t x_31599 = add32(1, defunc_0_f_res_31578);
            int32_t x_31600 = mul32(m_27918, x_31599);
            int32_t i32_arg_31601 = add32(defunc_0_f_res_31579, x_31600);
            int64_t i32_res_31602 = sext_i32_i64(i32_arg_31601);
            bool x_31603 = sle64((int64_t) 0, i32_res_31602);
            bool y_31604 = slt64(i32_res_31602, nm_27920);
            bool bounds_check_31605 = x_31603 && y_31604;
            bool index_certs_31606;
            
            if (!bounds_check_31605) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 40) ==
                        -1) {
                        global_failure_args[0] = i32_res_31602;
                        global_failure_args[1] = nm_27920;
                        ;
                    }
                    local_failure = true;
                    goto error_0;
                }
            }
            
            float x_31607 = ((__global
                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                        (gtid_30967 *
                                                         ctx_param_ext_44581 +
                                                         i32_res_31602 *
                                                         ctx_param_ext_44583)];
            int32_t i32_arg_31608 = add32(i_31554, x_31600);
            int64_t i32_res_31609 = sext_i32_i64(i32_arg_31608);
            bool x_31610 = sle64((int64_t) 0, i32_res_31609);
            bool y_31611 = slt64(i32_res_31609, nm_27920);
            bool bounds_check_31612 = x_31610 && y_31611;
            bool index_certs_31613;
            
            if (!bounds_check_31612) {
                {
                    if (atomic_cmpxchg_i32_global(global_failure, -1, 41) ==
                        -1) {
                        global_failure_args[0] = i32_res_31609;
                        global_failure_args[1] = nm_27920;
                        ;
                    }
                    local_failure = true;
                    goto error_0;
                }
            }
            
            float x_31614 = ((__global
                              float *) mem_param_44585)[ctx_param_ext_44580 +
                                                        (gtid_30967 *
                                                         ctx_param_ext_44581 +
                                                         i32_res_31609 *
                                                         ctx_param_ext_44583)];
            float y_31615 = x_31595 * x_31614;
            float defunc_0_f_res_f_res_t_res_31616 = x_31607 - y_31615;
            
            defunc_0_f_res_f_res_31598 = defunc_0_f_res_f_res_t_res_31616;
        } else {
            defunc_0_f_res_f_res_31598 = x_31595;
        }
        defunc_0_f_res_31580 = defunc_0_f_res_f_res_31598;
    }
    ((__local float *) mem_44594)[gtid_30970] = defunc_0_f_res_31580;
    
  error_0:
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_failure)
        return;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t write_i_31013 = sext_i32_i64(ltid_pre_45871);
    int32_t phys_tid_31014 = local_tid_45867;
    float write_value_31619 = ((__local float *) mem_44594)[write_i_31013];
    
    if (sle64((int64_t) 0, write_i_31013) && slt64(write_i_31013, nm_27920)) {
        ((__global float *) mem_44590)[gtid_30967 + write_i_31013 * m_27772] =
            write_value_31619;
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    if (local_tid_45867 == 0) {
        for (int64_t i_45872 = 0; i_45872 < nm_27920; i_45872++) {
            ((__global float *) mem_44598)[gtid_30967 * nm_27920 + i_45872] =
                ((__global float *) mem_44590)[gtid_30967 + i_45872 * m_27772];
        }
    }
    
  error_2:
    return;
}
__kernel void mainDetailedzisegmap_intragroup_32146(__global
                                                    int *global_failure,
                                                    __local volatile
                                                    int64_t *mem_45150_backing_aligned_0,
                                                    __local volatile
                                                    int64_t *mem_45148_backing_aligned_1,
                                                    __local volatile
                                                    int64_t *mem_45146_backing_aligned_2,
                                                    __local volatile
                                                    int64_t *mem_45144_backing_aligned_3,
                                                    int64_t N_27771,
                                                    int64_t i_28075, __global
                                                    unsigned char *images_mem_44381,
                                                    __global
                                                    unsigned char *defunc_3_map_res_mem_45140,
                                                    __global
                                                    unsigned char *mem_45153,
                                                    __global
                                                    unsigned char *mem_45156,
                                                    __global
                                                    unsigned char *mem_45159)
{
    const int block_dim0 = 0;
    const int block_dim1 = 1;
    const int block_dim2 = 2;
    __local volatile char *restrict mem_45150_backing_3 = (__local volatile
                                                           char *) mem_45150_backing_aligned_0;
    __local volatile char *restrict mem_45148_backing_2 = (__local volatile
                                                           char *) mem_45148_backing_aligned_1;
    __local volatile char *restrict mem_45146_backing_1 = (__local volatile
                                                           char *) mem_45146_backing_aligned_2;
    __local volatile char *restrict mem_45144_backing_0 = (__local volatile
                                                           char *) mem_45144_backing_aligned_3;
    
    if (*global_failure >= 0)
        return;
    
    int32_t global_tid_46248;
    int32_t local_tid_46249;
    int64_t group_sizze_46252;
    int32_t wave_sizze_46251;
    int32_t group_tid_46250;
    
    global_tid_46248 = get_global_id(0);
    local_tid_46249 = get_local_id(0);
    group_sizze_46252 = get_local_size(0);
    wave_sizze_46251 = LOCKSTEP_WIDTH;
    group_tid_46250 = get_group_id(0);
    
    int32_t phys_tid_32146;
    
    phys_tid_32146 = group_tid_46250;
    
    int32_t ltid_pre_46253;
    
    ltid_pre_46253 = local_tid_46249;
    
    int64_t gtid_32139;
    
    gtid_32139 = sext_i32_i64(group_tid_46250);
    
    __local char *mem_45144;
    
    mem_45144 = (__local char *) mem_45144_backing_0;
    
    __local char *mem_45146;
    
    mem_45146 = (__local char *) mem_45146_backing_1;
    
    int64_t gtid_32142 = sext_i32_i64(ltid_pre_46253);
    int32_t phys_tid_32143 = local_tid_46249;
    float x_32235 = ((__global float *) images_mem_44381)[gtid_32139 * N_27771 +
                                                          gtid_32142];
    bool isnan_res_32237;
    
    isnan_res_32237 = futrts_isnan32(x_32235);
    
    bool cond_32238 = !isnan_res_32237;
    float defunc_1_f_res_32239;
    
    if (cond_32238) {
        float x_32236 = ((__global
                          float *) defunc_3_map_res_mem_45140)[gtid_32139 *
                                                               N_27771 +
                                                               gtid_32142];
        float defunc_1_f_res_t_res_32240 = x_32235 - x_32236;
        
        defunc_1_f_res_32239 = defunc_1_f_res_t_res_32240;
    } else {
        defunc_1_f_res_32239 = NAN;
    }
    
    bool isnan_res_32241;
    
    isnan_res_32241 = futrts_isnan32(defunc_1_f_res_32239);
    
    bool defunc_0_p_res_32242 = !isnan_res_32241;
    int64_t defunc_0_f_res_32243 = btoi_bool_i64(defunc_0_p_res_32242);
    
    ((__local int64_t *) mem_45144)[gtid_32142] = defunc_0_f_res_32243;
    ((__local float *) mem_45146)[gtid_32142] = defunc_1_f_res_32239;
    barrier(CLK_LOCAL_MEM_FENCE);
    
    int64_t dims_flat_46254;
    
    dims_flat_46254 = N_27771;
    
    int64_t x_32232;
    int64_t x_32233;
    int64_t x_46256;
    int64_t x_46257;
    bool ltid_in_bounds_46259;
    
    ltid_in_bounds_46259 = slt64(sext_i32_i64(local_tid_46249), N_27771);
    
    int32_t skip_threads_46260;
    
    // read input for in-block scan
    {
        if (ltid_in_bounds_46259) {
            x_32233 = ((volatile __local
                        int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)];
            if ((local_tid_46249 - squot32(local_tid_46249, 32) * 32) == 0) {
                x_32232 = x_32233;
            }
        }
    }
    // in-block scan (hopefully no barriers needed)
    {
        skip_threads_46260 = 1;
        while (slt32(skip_threads_46260, 32)) {
            if (sle32(skip_threads_46260, local_tid_46249 -
                      squot32(local_tid_46249, 32) * 32) &&
                ltid_in_bounds_46259) {
                // read operands
                {
                    x_32232 = ((volatile __local
                                int64_t *) mem_45144)[sext_i32_i64(local_tid_46249) -
                                                      sext_i32_i64(skip_threads_46260)];
                }
                // perform operation
                {
                    bool inactive_46261 =
                         slt64(srem64(sext_i32_i64(local_tid_46249), N_27771),
                               sext_i32_i64(local_tid_46249) -
                               sext_i32_i64(local_tid_46249 -
                               skip_threads_46260));
                    
                    if (inactive_46261) {
                        x_32232 = x_32233;
                    }
                    if (!inactive_46261) {
                        int64_t defunc_1_op_res_32234 = add64(x_32232, x_32233);
                        
                        x_32232 = defunc_1_op_res_32234;
                    }
                }
            }
            if (sle32(wave_sizze_46251, skip_threads_46260)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            if (sle32(skip_threads_46260, local_tid_46249 -
                      squot32(local_tid_46249, 32) * 32) &&
                ltid_in_bounds_46259) {
                // write result
                {
                    ((volatile __local
                      int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)] =
                        x_32232;
                    x_32233 = x_32232;
                }
            }
            if (sle32(wave_sizze_46251, skip_threads_46260)) {
                barrier(CLK_LOCAL_MEM_FENCE);
            }
            skip_threads_46260 *= 2;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // last thread of block 'i' writes its result to offset 'i'
    {
        if ((local_tid_46249 - squot32(local_tid_46249, 32) * 32) == 31 &&
            ltid_in_bounds_46259) {
            ((volatile __local
              int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46249,
                                                         32))] = x_32232;
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);
    // scan the first block, after which offset 'i' contains carry-in for block 'i+1'
    {
        int32_t skip_threads_46262;
        
        // read input for in-block scan
        {
            if (squot32(local_tid_46249, 32) == 0 && ltid_in_bounds_46259) {
                x_46257 = ((volatile __local
                            int64_t *) mem_45144)[sext_i32_i64