4a | padhodosto

Hpc 04 coda 01:

import cupy as cp

import numpy as np

import time

# Size of large vectors

N = 10000000

# Create large vectors on CPU

a_cpu = np.random.rand(N).astype(np.float32)

b_cpu = np.random.rand(N).astype(np.float32)

# ---------- CPU ----------

start_cpu = time.time()

c_cpu = a_cpu + b_cpu

end_cpu = time.time()

print(f"CPU Vector Addition Time: {end_cpu - start_cpu:.4f} seconds")

# ---------- GPU ----------

a_gpu = cp.asarray(a_cpu)

b_gpu = cp.asarray(b_cpu)

start_gpu = time.time()

c_gpu = a_gpu + b_gpu

cp.cuda.Device(0).synchronize() # Ensure GPU is done

end_gpu = time.time()

print(f"GPU Vector Addition Time: {end_gpu - start_gpu:.4f} seconds")

Output:

CPU Vector Addition Time: 0.0146 seconds

GPU Vector Addition Time: 1.1832 seconds

Hpc 04 coda 00:

# Size of matrices

N = 1024

# Create random matrices on CPU

A_cpu = np.random.rand(N, N).astype(np.float32)

B_cpu = np.random.rand(N, N).astype(np.float32)

# ---------- CPU ----------

start_cpu = time.time()

C_cpu = np.matmul(A_cpu, B_cpu)

end_cpu = time.time()

print(f"CPU Matrix Multiplication Time: {end_cpu - start_cpu:.4f} seconds")

# ---------- GPU ----------

A_gpu = cp.asarray(A_cpu)

B_gpu = cp.asarray(B_cpu)

start_gpu = time.time()

C_gpu = cp.matmul(A_gpu, B_gpu)

cp.cuda.Device(0).synchronize()

end_gpu = time.time()

print(f"GPU Matrix Multiplication Time: {end_gpu - start_gpu:.4f} seconds")

Output:

CPU Matrix Multiplication Time: 0.0374 seconds

GPU Matrix Multiplication Time: 0.1681 seconds

HPC 04 CUDA Commented:

import cupy as cp # Import CuPy for GPU-accelerated array computations

import numpy as np # Import NumPy for CPU-based array computations

import time # Import time module to measure execution time

# Size of large vectors

N = 10000000 # Define the number of elements in each vector (10 million)

# Create large vectors on CPU

a_cpu = np.random.rand(N).astype(np.float32) # Generate random float32 array on CPU for vector a

b_cpu = np.random.rand(N).astype(np.float32) # Generate random float32 array on CPU for vector b

# ---------- CPU ----------

start_cpu = time.time() # Record start time for CPU addition

c_cpu = a_cpu + b_cpu # Perform vector addition on CPU

end_cpu = time.time() # Record end time for CPU addition

print(f"CPU Vector Addition Time: {end_cpu - start_cpu:.4f} seconds") # Display CPU execution time

# ---------- GPU ----------

a_gpu = cp.asarray(a_cpu) # Transfer vector a from CPU to GPU

b_gpu = cp.asarray(b_cpu) # Transfer vector b from CPU to GPU

start_gpu = time.time() # Record start time for GPU addition

c_gpu = a_gpu + b_gpu # Perform vector addition on GPU

cp.cuda.Device(0).synchronize() # Synchronize GPU to ensure addition is completed

end_gpu = time.time() # Record end time for GPU addition

print(f"GPU Vector Addition Time: {end_gpu - start_gpu:.4f} seconds") # Display GPU execution time

# Size of matrices

N = 1024 # Define the size of NxN matrices (1024 x 1024)

# Create random matrices on CPU

A_cpu = np.random.rand(N, N).astype(np.float32) # Generate random float32 matrix A on CPU

B_cpu = np.random.rand(N, N).astype(np.float32) # Generate random float32 matrix B on CPU

# ---------- CPU ----------

start_cpu = time.time() # Record start time for CPU matrix multiplication

C_cpu = np.matmul(A_cpu, B_cpu) # Perform matrix multiplication on CPU using NumPy

end_cpu = time.time() # Record end time for CPU computation

print(f"CPU Matrix Multiplication Time: {end_cpu - start_cpu:.4f} seconds") # Display CPU execution time

# ---------- GPU ----------

A_gpu = cp.asarray(A_cpu) # Transfer matrix A to GPU

B_gpu = cp.asarray(B_cpu) # Transfer matrix B to GPU

start_gpu = time.time() # Record start time for GPU matrix multiplication

C_gpu = cp.matmul(A_gpu, B_gpu) # Perform matrix multiplication on GPU using CuPy

cp.cuda.Device(0).synchronize() # Synchronize to ensure GPU operation is completed

end_gpu = time.time() # Record end time for GPU computation

print(f"GPU Matrix Multiplication Time: {end_gpu - start_gpu:.4f} seconds") # Display GPU execution time