lab is done
This commit is contained in:
2
fedotov_ilia_lab_6/.gitignore
vendored
Normal file
2
fedotov_ilia_lab_6/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
x64
|
||||
.vs
|
||||
10
fedotov_ilia_lab_6/README.md
Normal file
10
fedotov_ilia_lab_6/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Лабораторная работа 6
|
||||
***
|
||||
|
||||
Видео-отчет -- https://youtu.be/RYI5FWkm-HM
|
||||
|
||||
| № | async | no-async |
|
||||
------------------------
|
||||
| 1 | .092 | .674 |
|
||||
| 2 | .266 | 7.974 |
|
||||
| 500 | .44 | 36.092 |
|
||||
25
fedotov_ilia_lab_6/cudaMatrixDet/cudaMatrixDet.sln
Normal file
25
fedotov_ilia_lab_6/cudaMatrixDet/cudaMatrixDet.sln
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.14.36717.8 d17.14
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaMatrixDet", "cudaMatrixDet.vcxproj", "{A878508F-5BC6-9D1F-BB7D-53DAA1D90114}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{A878508F-5BC6-9D1F-BB7D-53DAA1D90114}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{A878508F-5BC6-9D1F-BB7D-53DAA1D90114}.Debug|x64.Build.0 = Debug|x64
|
||||
{A878508F-5BC6-9D1F-BB7D-53DAA1D90114}.Release|x64.ActiveCfg = Release|x64
|
||||
{A878508F-5BC6-9D1F-BB7D-53DAA1D90114}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {4BE25B17-AC07-4F2C-9A8E-8108EB12D15E}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
86
fedotov_ilia_lab_6/cudaMatrixDet/cudaMatrixDet.vcxproj
Normal file
86
fedotov_ilia_lab_6/cudaMatrixDet/cudaMatrixDet.vcxproj
Normal file
@@ -0,0 +1,86 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{A878508F-5BC6-9D1F-BB7D-53DAA1D90114}</ProjectGuid>
|
||||
<RootNamespace>cudaMatrixDet</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 13.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="kernel.cu" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 13.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup />
|
||||
</Project>
|
||||
128
fedotov_ilia_lab_6/cudaMatrixDet/kernel.cu
Normal file
128
fedotov_ilia_lab_6/cudaMatrixDet/kernel.cu
Normal file
@@ -0,0 +1,128 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "device_launch_parameters.h"
|
||||
|
||||
__global__ void cudaMatrixDet(double *m, size_t size, int N, double *result, bool async = true)
|
||||
{
|
||||
int row = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
int col = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (!async) {
|
||||
if (row != 0 && col != 0) { return; }
|
||||
for (int i = 0; i < N; ++i) {
|
||||
for (int j = 0; j < N; ++j) {
|
||||
int index = i * (2*N - 1) + j;
|
||||
if (i == 0) {
|
||||
double tmp = m[index];
|
||||
int curr_index = index + 2*N;
|
||||
for (int i = 1; i < N && curr_index < size; ++i) {
|
||||
tmp *= m[curr_index];
|
||||
curr_index += 2*N;
|
||||
}
|
||||
result[j] = tmp;
|
||||
}
|
||||
else if (row == N - 1) {
|
||||
double tmp = m[index];
|
||||
int curr_index = index - (2*N - 2);
|
||||
for (int i = row - 1; i >= 0 && curr_index >= 0; --i) {
|
||||
tmp *= m[curr_index];
|
||||
curr_index -= (2*N - 2);
|
||||
}
|
||||
result[j + N] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (row < N && col < N) {
|
||||
int index = row * (2*N - 1) + col;
|
||||
if (row == 0) {
|
||||
double tmp = m[index];
|
||||
int curr_index = index + 2*N;
|
||||
for (int i = 1; i < N && curr_index < size; ++i) {
|
||||
tmp *= m[curr_index];
|
||||
curr_index += 2*N;
|
||||
}
|
||||
result[col] = tmp;
|
||||
}
|
||||
else if (row == N - 1) {
|
||||
double tmp = m[index];
|
||||
int curr_index = index - (2*N-2);
|
||||
for (int i = row - 1; i >= 0 && curr_index >= 0; --i) {
|
||||
tmp *= m[curr_index];
|
||||
curr_index -= (2*N - 2);
|
||||
}
|
||||
result[col + N] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init_matrix(double *m,size_t size, int N)
|
||||
{
|
||||
for (int i = 0, j = 1;i < size;) {
|
||||
m[i] = (double)rand() / RAND_MAX;
|
||||
if (j < N) { m[i + N] = m[i]; ++i; ++j; }
|
||||
else if (j == N) { i += N; j = 1; }
|
||||
}
|
||||
}
|
||||
|
||||
void print(double *m, int N)
|
||||
{
|
||||
for (int i = 0; i < N; ++i) {
|
||||
for (int j = 0, size = 2*N-1; j < size; ++j) {
|
||||
std::cerr << m[i*size+j] << " ";
|
||||
}
|
||||
std::cerr << '\n';
|
||||
}
|
||||
}
|
||||
void print(double *m, size_t size)
|
||||
{
|
||||
for (int i = 0; i < size; ++i) { std::cerr << m[i] << " "; }
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int N;
|
||||
std::cin >> N;
|
||||
|
||||
size_t size = (N*N) + (N*N-N);
|
||||
|
||||
double *m;
|
||||
double *result;
|
||||
cudaMallocManaged(&m, size * sizeof(double));
|
||||
cudaMallocManaged(&result, 2*N*sizeof(double));
|
||||
|
||||
init_matrix(m, size, N);
|
||||
print(m, N);
|
||||
|
||||
int threads = 32;
|
||||
int blocks = (N + threads - 1) / threads;
|
||||
|
||||
dim3 THREADS(threads, threads);
|
||||
dim3 BLOCKS(blocks, blocks);
|
||||
|
||||
clock_t start, stop;
|
||||
|
||||
start = clock();
|
||||
cudaMatrixDet<<<BLOCKS, THREADS>>>(m, size, N, result, false);
|
||||
cudaDeviceSynchronize();
|
||||
double det = 0;
|
||||
for (int i = 0, size = 2 * N; i < size; ++i) {
|
||||
std::cerr << result[i] << " ";
|
||||
if (i >= N) { det += result[i] * -1; }
|
||||
else { det += result[i]; }
|
||||
}
|
||||
stop = clock();
|
||||
std::cerr << '\n';
|
||||
std::cout << det;
|
||||
|
||||
double timer = ((double)(stop - start)) / CLOCKS_PER_SEC;
|
||||
std::cerr << "took " << timer << " seconds.\n";
|
||||
|
||||
cudaFree(m);
|
||||
cudaDeviceReset();
|
||||
}
|
||||
Reference in New Issue
Block a user