from concurrent.futures import ThreadPoolExecutor def matrix_multiply_parallel(A, B, num_threads=1): n = len(A) result = [[0] * n for _ in range(n)] def worker(start, end): for i in range(start, end): for j in range(n): result[i][j] = sum(A[i][k] * B[k][j] for k in range(n)) chunk_size = n // num_threads with ThreadPoolExecutor(max_workers=num_threads) as executor: futures = [ executor.submit(worker, i * chunk_size, (i + 1) * chunk_size) for i in range(num_threads) ] for future in futures: future.result() return result