For me
[DirectX12] 20) Compute Shader 본문
GPU 에게 일감을 넘기도록 하는 방법
대량 병렬 GPGPU 알고리즘 또는 게임 렌더링의 일부를 가속시키기 위해 사용할 수 있음
Effect, Particle System, Instancing...
Why GPU?
Compute Shader
더보기
#ifndef _COMPUTE_FX_
#define _COMPUTE_FX_
#include "params.fx"
// Shader 코드 내부에서 읽을 수 있음
// 내용 수정도 가능 // 지금까지 Readonly로 사용함
RWTexture2D<float4> g_rwtex_0 : register(u0);
// 쓰레드 그룹당 쓰레드 개수
// max : 1024 (CS_5.0)
// - 하나의 쓰레드 그룹은 하나의 다중처리기에서 실행
[numthreads(1024, 1, 1)]
void CS_Main(int3 threadIndex : SV_DispatchThreadID)
{
if (threadIndex.y % 2 == 0)
g_rwtex_0[threadIndex.xy] = float4(1.f, 0.f, 0.f, 1.f);
else
g_rwtex_0[threadIndex.xy] = float4(0.f, 1.f, 0.f, 1.f);
}
#endif
Command Queue
Graphic , Compute 둘로 나눔
더보기
// *********************
// *ComputeCommandQueue*
// *********************
class ComputeCommandQueue
{
public:
~ComputeCommandQueue();
void Init(ComPtr<ID3D12Device> device);
void WaitSync();
void FlushComputeCommandQueue();
ComPtr<ID3D12CommandQueue> GetCmdQueue() { return _cmdQueue; }
ComPtr<ID3D12GraphicsCommandList> GetComputeCmdList() { return _cmdList; }
private:
ComPtr<ID3D12CommandQueue> _cmdQueue;
ComPtr<ID3D12CommandAllocator> _cmdAlloc;
ComPtr<ID3D12GraphicsCommandList> _cmdList;
ComPtr<ID3D12Fence> _fence;
uint32 _fenceValue = 0;
HANDLE _fenceEvent = INVALID_HANDLE_VALUE;
};
// *********************
// *ComputeCommandQueue*
// *********************
ComputeCommandQueue::~ComputeCommandQueue()
{
::CloseHandle(_fenceEvent);
}
void ComputeCommandQueue::Init(ComPtr<ID3D12Device> device)
{
D3D12_COMMAND_QUEUE_DESC computeQueueDesc = {};
computeQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
computeQueueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
device->CreateCommandQueue(&computeQueueDesc, IID_PPV_ARGS(&_cmdQueue));
device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&_cmdAlloc));
device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, _cmdAlloc.Get(), nullptr, IID_PPV_ARGS(&_cmdList));
device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&_fence));
// CreateFence
// - CPU와 GPU의 동기화 수단으로 쓰인다
device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&_fence));
_fenceEvent = ::CreateEvent(nullptr, FALSE, FALSE, nullptr);
}
void ComputeCommandQueue::WaitSync()
{
_fenceValue++;
_cmdQueue->Signal(_fence.Get(), _fenceValue);
if (_fence->GetCompletedValue() < _fenceValue)
{
_fence->SetEventOnCompletion(_fenceValue, _fenceEvent);
::WaitForSingleObject(_fenceEvent, INFINITE);
}
}
void ComputeCommandQueue::FlushComputeCommandQueue()
{
_cmdList->Close();
ID3D12CommandList* cmdListArr[] = { _cmdList.Get() };
auto t = _countof(cmdListArr);
_cmdQueue->ExecuteCommandLists(_countof(cmdListArr), cmdListArr);
WaitSync();
_cmdAlloc->Reset();
_cmdList->Reset(_cmdAlloc.Get(), nullptr);
COMPUTE_CMD_LIST->SetComputeRootSignature(COMPUTE_ROOT_SIGNATURE.Get());
}
#pragma endregion
RootSignature
더보기
void RootSignature::CreateComputeRootSignature()
{
CD3DX12_DESCRIPTOR_RANGE ranges[] =
{
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, CBV_REGISTER_COUNT, 0), // b0~b4
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, SRV_REGISTER_COUNT, 0), // t0~t9
CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, UAV_REGISTER_COUNT, 0), // u0~u4
};
CD3DX12_ROOT_PARAMETER param[1];
param[0].InitAsDescriptorTable(_countof(ranges), ranges);
D3D12_ROOT_SIGNATURE_DESC sigDesc = CD3DX12_ROOT_SIGNATURE_DESC(_countof(param), param);
sigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
ComPtr<ID3DBlob> blobSignature;
ComPtr<ID3DBlob> blobError;
::D3D12SerializeRootSignature(&sigDesc, D3D_ROOT_SIGNATURE_VERSION_1, &blobSignature, &blobError);
DEVICE->CreateRootSignature(0, blobSignature->GetBufferPointer(), blobSignature->GetBufferSize(), IID_PPV_ARGS(&_computeRootSignature));
COMPUTE_CMD_LIST->SetComputeRootSignature(_computeRootSignature.Get());
}
Descriptor Heap
더보기
// ************************
// ComputeDescriptorHeap
// ************************
class ComputeDescriptorHeap
{
public:
void Init();
void SetCBV(D3D12_CPU_DESCRIPTOR_HANDLE srcHandle, CBV_REGISTER reg);
void SetSRV(D3D12_CPU_DESCRIPTOR_HANDLE srcHandle, SRV_REGISTER reg);
void SetUAV(D3D12_CPU_DESCRIPTOR_HANDLE srcHandle, UAV_REGISTER reg);
void CommitTable();
D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(CBV_REGISTER reg);
D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(SRV_REGISTER reg);
D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(UAV_REGISTER reg);
private:
D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(uint8 reg);
private:
ComPtr<ID3D12DescriptorHeap> _descHeap;
uint64 _handleSize = 0;
};
#pragma region ComputeDescriptorHeap
void ComputeDescriptorHeap::Init()
{
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
desc.NumDescriptors = TOTAL_REGISTER_COUNT;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
DEVICE->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&_descHeap));
_handleSize = DEVICE->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void ComputeDescriptorHeap::SetCBV(D3D12_CPU_DESCRIPTOR_HANDLE srcHandle, CBV_REGISTER reg)
{
D3D12_CPU_DESCRIPTOR_HANDLE destHandle = GetCPUHandle(reg);
uint32 destRange = 1;
uint32 srcRange = 1;
DEVICE->CopyDescriptors(1, &destHandle, &destRange, 1, &srcHandle, &srcRange, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void ComputeDescriptorHeap::SetSRV(D3D12_CPU_DESCRIPTOR_HANDLE srcHandle, SRV_REGISTER reg)
{
D3D12_CPU_DESCRIPTOR_HANDLE destHandle = GetCPUHandle(reg);
uint32 destRange = 1;
uint32 srcRange = 1;
DEVICE->CopyDescriptors(1, &destHandle, &destRange, 1, &srcHandle, &srcRange, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
void ComputeDescriptorHeap::SetUAV(D3D12_CPU_DESCRIPTOR_HANDLE srcHandle, UAV_REGISTER reg)
{
D3D12_CPU_DESCRIPTOR_HANDLE destHandle = GetCPUHandle(reg);
uint32 destRange = 1;
uint32 srcRange = 1;
DEVICE->CopyDescriptors(1, &destHandle, &destRange, 1, &srcHandle, &srcRange, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
// TODO : 리소스 상태 변경
}
void ComputeDescriptorHeap::CommitTable()
{
ID3D12DescriptorHeap* descHeap = _descHeap.Get();
COMPUTE_CMD_LIST->SetDescriptorHeaps(1, &descHeap);
D3D12_GPU_DESCRIPTOR_HANDLE handle = descHeap->GetGPUDescriptorHandleForHeapStart();
COMPUTE_CMD_LIST->SetComputeRootDescriptorTable(0, handle);
}
D3D12_CPU_DESCRIPTOR_HANDLE ComputeDescriptorHeap::GetCPUHandle(CBV_REGISTER reg)
{
return GetCPUHandle(static_cast<uint8>(reg));
}
D3D12_CPU_DESCRIPTOR_HANDLE ComputeDescriptorHeap::GetCPUHandle(SRV_REGISTER reg)
{
return GetCPUHandle(static_cast<uint8>(reg));
}
D3D12_CPU_DESCRIPTOR_HANDLE ComputeDescriptorHeap::GetCPUHandle(UAV_REGISTER reg)
{
return GetCPUHandle(static_cast<uint8>(reg));
}
D3D12_CPU_DESCRIPTOR_HANDLE ComputeDescriptorHeap::GetCPUHandle(uint8 reg)
{
D3D12_CPU_DESCRIPTOR_HANDLE handle = _descHeap->GetCPUDescriptorHandleForHeapStart();
handle.ptr += reg * _handleSize;
return handle;
}
#pragma endregion
'DirectX12' 카테고리의 다른 글
[DirectX12] 22) Instancing (0) | 2023.01.20 |
---|---|
[DirectX12] 21) Particle System (0) | 2023.01.19 |
[DirectX12] 19) Deferred , Forward Rendering (0) | 2023.01.19 |
[DirectX12] 18) Render Target (1) | 2023.01.19 |
[DirectX12] 17) 직교투영 (0) | 2023.01.19 |