pi.cpp
· 1.1 KiB · C++
Raw
#include <iostream>
#include <curand.h>
#include <curand_kernel.h>
__global__ void calculate_pi(int *count, unsigned long seed) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
curandState state;
curand_init(seed, idx, 0, &state);
float x = curand_uniform(&state);
float y = curand_uniform(&state);
if (x * x + y * y <= 1.0f) {
atomicAdd(count, 1);
}
}
int main() {
int N = 1000000;
int *d_count, h_count = 0;
// Allocate memory on the device
cudaMalloc((void**)&d_count, sizeof(int));
cudaMemcpy(d_count, &h_count, sizeof(int), cudaMemcpyHostToDevice);
// Launch kernel
int threadsPerBlock = 1024;
int blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
calculate_pi<<<blocks, threadsPerBlock>>>(d_count, time(NULL));
// Copy result back to host
cudaMemcpy(&h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost);
// Calculate pi
float pi = 4.0f * h_count / N;
std::cout << "Estimated Pi = " << pi << std::endl;
// Free device memory
cudaFree(d_count);
return 0;
}
1 | #include <iostream> |
2 | #include <curand.h> |
3 | #include <curand_kernel.h> |
4 | |
5 | __global__ void calculate_pi(int *count, unsigned long seed) { |
6 | int idx = threadIdx.x + blockIdx.x * blockDim.x; |
7 | curandState state; |
8 | curand_init(seed, idx, 0, &state); |
9 | |
10 | float x = curand_uniform(&state); |
11 | float y = curand_uniform(&state); |
12 | if (x * x + y * y <= 1.0f) { |
13 | atomicAdd(count, 1); |
14 | } |
15 | } |
16 | |
17 | int main() { |
18 | int N = 1000000; |
19 | int *d_count, h_count = 0; |
20 | |
21 | // Allocate memory on the device |
22 | cudaMalloc((void**)&d_count, sizeof(int)); |
23 | cudaMemcpy(d_count, &h_count, sizeof(int), cudaMemcpyHostToDevice); |
24 | |
25 | // Launch kernel |
26 | int threadsPerBlock = 1024; |
27 | int blocks = (N + threadsPerBlock - 1) / threadsPerBlock; |
28 | calculate_pi<<<blocks, threadsPerBlock>>>(d_count, time(NULL)); |
29 | |
30 | // Copy result back to host |
31 | cudaMemcpy(&h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost); |
32 | |
33 | // Calculate pi |
34 | float pi = 4.0f * h_count / N; |
35 | std::cout << "Estimated Pi = " << pi << std::endl; |
36 | |
37 | // Free device memory |
38 | cudaFree(d_count); |
39 | |
40 | return 0; |
41 | } |
42 |