/*
 * Copyright (C) 2020 Intel Corporation
 *
 * This software and the related documents are Intel copyrighted materials, and your use of them
 * is governed by the express license under which they were provided to you ("License"). Unless
 * the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose
 * or transmit this software or the related documents without Intel's prior written permission.
 *
 * This software and the related documents are provided as is, with no express or implied
 * warranties, other than those that are expressly stated in the License.
*/

__kernel void LocalAdd3(__local float4 *pa, int size)
{
    const int id = (get_local_id(0)%(size-128));

    float4 a = (1.f, 1.f, 1.f, 1.f);

    for (int count = 0; count < 128; count++)
    {
        a += pa[id + count];
    }
        
    // This branch never be executed, but we need this code
    // to prevent compiler from optimizing out loop above
    if (!a.x && !a.y && !a.w && !a.z)
    {
        pa[id] = size;
    }
}
