自定义URP管线的延迟渲染

2025/7/4 技术延迟管线

在看本文章前最好先去了解下延迟渲染的基本原理，甚至修改过老管线的延迟渲染，不然可能有看不懂的地方。

unity版本：unity2022.3.18.f1

1.修改管线

由于我们直接修改URP的代码会被自动改回去，为了让我们修改的代码能够保存，我们要将项目文件夹下Library.unity.render-pipelines.universal@14.0.10剪切到项目文件夹下的Packages文件中

从左边到右边

如果没什么意外的话再打开untiy的话修改的代码应该就不会再该回去了。

2.设置延迟管线

想自定义延迟管线的话第一步当然是先改成延迟管线，我们依次点击Edit-ProjectSettings，在Graphics中应该能看到有个选择当前管线的地方，点一下，就能定位到当前使用的管线。

就是这里

然后再点击这个管线的RendererList，就能找到修改为延迟管线的地方

然后RenderingPath从Forward改为Deferred

3.GbufferPass

总所周知，延迟渲染主要由传递数据的GbufferPass和计算光照的LightPass组成。这里我们先讲GbufferPass。

untiy没给统一的URPshader模板，这里先给个我的URP模板方便后续更改：

Shader "Gbuffer"
{
    Properties
    {
        _MainTex ("MainTex",2D) = "white"{}
    }
    SubShader
    {
        Tags 
        {
            "RenderType"="Opaque"
        }
        //渲染的pass
        Pass
        {
            Tags
            {
                "LightMode"="UniversalForward"
            }
            HLSLPROGRAM
            #pragma vertex vert
            #pragma fragment frag
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"

            struct a2v
            {
                float4 vertex : POSITION;
                float4 uv : TEXCOORD0;
            };
            struct v2f
            {
                float4 posCS : SV_POSITION;
                float2 uv : TEXCOORD;
            };

            CBUFFER_START(UnityPerMaterial)
            float4 _MainTex_ST;

        CBUFFER_END
            TEXTURE2D(_MainTex);
            SAMPLER(sampler_MainTex);


            v2f vert(a2v v)
            {
                v2f o;
                
               
                VertexPositionInputs vertexInput = GetVertexPositionInputs(v.vertex.xyz);
                o.posCS = vertexInput.positionCS;
                o.uv = TRANSFORM_TEX(v.uv,_MainTex);
                return o;
            }
            float4 frag(v2f i) : SV_Target
       {
                float4 MainTex = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.uv);
        return float4(MainTex);
       }
            ENDHLSL
        } 
    }
}

URP会将所有LightMode为UniversalGBuffer的shader作为Gbuffer阶段的shader，所以我们首先修改LightMode

总所周知：Gbuffer有4个，分别输出不同的信息用于后续的光照计算，我们创建个结构体，用于输出4个Gbuffer

//将Gubffer打包成结构体输出
            struct DeferredOutPut{
                float4 gBuffer0 : SV_TARGET0;
                float4 gBuffer1 : SV_TARGET1;
                float4 gBuffer2 : SV_TARGET2;
                float4 gBuffer3 : SV_TARGET3;
            };

然后我们相应的修改我们的片源着色器，这里我只是往4个Buffer中塞了一堆1

            DeferredOutPut frag(v2f i) : SV_Target
       {
        DeferredOutPut g;
                //贴图采样
                float4 MainTex = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.uv);
        g.gBuffer0 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer1 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer2 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer3 = float4(1.0,1.0,1.0,1.0);
        return g;
     }

这四个Buffer官方给的精度各不相同，用于应对不同的数据，官方的做法是Gbuffer0：RGB漫反射，A：材质ID？（不确定，不懂是干什么用的），Gbuffer1：RGB高光，A：AO ，Gbuffer2：RGB法线，A：光滑度，Gbuffer3：RGB环境光和自发光，A：无（是个1）。

官方的输出

我们可以在FrameDebugger中看到它们各自的精度：

Gbuffer0

Gbuffer1

Gbuffer2

Gbuffer3

可以看到Gbuffer3的精度给的最高，因为他考虑到环境光和自发光可能需要HDR，并且没A通道，所以想往A通道塞东西得改代码（后面会说）。

完整的Gbuffer部分：

Shader "Gbuffer"
{
    Properties
    {
        _MainTex ("MainTex",2D) = "white"{}
    }
    SubShader
    {
        Tags 
        {
            "RenderType"="Opaque"
        }
        //渲染的pass
        Pass
        {
            Tags
            {
                "LightMode"="UniversalGBuffer"
            }
            HLSLPROGRAM
            #pragma vertex vert
            #pragma fragment frag
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"

            struct a2v
            {
                float4 vertex : POSITION;
                float4 uv : TEXCOORD0;
            };
            struct v2f
            {
                float4 posCS : SV_POSITION;
                float2 uv : TEXCOORD;
            };
            //将Gubffer打包成结构体输出
            struct DeferredOutPut{
                float4 gBuffer0 : SV_TARGET0;
                float4 gBuffer1 : SV_TARGET1;
                float4 gBuffer2 : SV_TARGET2;
                float4 gBuffer3 : SV_TARGET3;
            };

            CBUFFER_START(UnityPerMaterial)
            float4 _MainTex_ST;

        CBUFFER_END
            TEXTURE2D(_MainTex);//在CG中会写成sampler2D _MainTex;
            SAMPLER(sampler_MainTex);


            v2f vert(a2v v)
            {
                v2f o;
                
                //URP获得裁剪和世界空间顶点
                VertexPositionInputs vertexInput = GetVertexPositionInputs(v.vertex.xyz);
                o.posCS = vertexInput.positionCS;
                o.uv = TRANSFORM_TEX(v.uv,_MainTex);
                return o;
            }
            //片源着色器
            DeferredOutPut frag(v2f i) : SV_Target
        {
        DeferredOutPut g;
                //贴图采样
                float4 MainTex = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.uv);
        g.gBuffer0 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer1 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer2 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer3 = float4(1.0,1.0,1.0,1.0);
        return g;
         }
            ENDHLSL
        }
    }
}

4.LightPass

老的渲染管线可以自己写个LightPass然后在untiy中直接替换，但URP中却不能用这样的方式，所以我们直接去修改URP的shader（其实是有别的方式，可以用自己的shader，但我不会）。

我们去翻FrameDebugger，直接打开URP的LightPass

直接点那个shader就能定位到位置

他是一个700多行的shader，挺多，别慌，其实untiy用到的就是其中的一两个Pass，也就是3，4Pass，这点也可以在FrameDebugger中看到：

URP所使用的Pass

我们打开那个shader看看这俩pass分别写了什么：

关键在于红框的位置，注释部分标明了这个pass的用途，下面则写了他所使用的顶点片源着色器，这两个pass所使用的片源着色器是相同的，这就方便了我们的修改，我们的目的正是修改片源着色器的部分

URP的片源着色器部分

接上图

接上图

其中有各种乱七八糟的宏来定义各种情况，咱不用管，关键在于两个地方：其一是采样Gbuffer的方式，我们可以直接用URP的

这里是3个Gbuffer和一个深度

可以看到unity只采样了前3个buffer，而gbuffer3却没有采样。这是因为unity会自动将gbuffer3给加到最后的return中，这点需要注意，我们在计算的过程中别再加一遍gbuffer3的数值了，也意味着gbuffer3只能存一些能直接加的东西，一些花里胡哨的东西就别往gbuffer3里存了。

第二个关键点在于深度图重构世界坐标的方式，我们可以直接用unity的方式：

深度图重构世界坐标

其中eyeIndex是关于适配xr的，我们不去管xr，直接把eyeIndex改成0，他是我们正常使用的矩阵

剩下的地方咱删了自己写：

    half4 DeferredShading(Varyings input) : SV_Target
    {

        float2 screen_uv = (input.screenUV.xy / input.screenUV.z);
        
        float d        = SAMPLE_TEXTURE2D_X_LOD(_CameraDepthTexture, my_point_clamp_sampler, screen_uv, 0).x; // raw depth value has UNITY_REVERSED_Z applied on most platforms.
        half4 gbuffer0 = SAMPLE_TEXTURE2D_X_LOD(_GBuffer0, my_point_clamp_sampler, screen_uv, 0);
        half4 gbuffer1 = SAMPLE_TEXTURE2D_X_LOD(_GBuffer1, my_point_clamp_sampler, screen_uv, 0);
        half4 gbuffer2 = SAMPLE_TEXTURE2D_X_LOD(_GBuffer2, my_point_clamp_sampler, screen_uv, 0);
        
        float4 posWS = mul(_ScreenToWorld[0], float4(input.positionCS.xy, d, 1.0));
        posWS.xyz *= rcp(posWS.w);

        

        return half4(gbuffer0.rgb, 1.0);
    }

这里我直接将gbuffer0的rgb输出来看结果，我将GbufferPass的gbuffer0输出改为MainTex，并且把Gbuffer3的值改为了0（防止unity直接将gbuffer3加上去影响对结果的判断）

加了贴图的结果

5.法线

由于法线是-1_{1的数据，而Gbuffer是0}1的数据，所以我们需要在Gbuffer阶段编码一下，再到LightPass阶段解码一下。

//在GbufferPass
float3 nDirWS = normalize(i.nDirWS)*0.5+0.5;
g.gBuffer2 = float4(nDirWS,1.0);

//在LightPass
float3 nDirWS = gbuffer2.rgb*2-1;

但是这种方式所得的结果法线精度会有不够的问题，计算兰伯特的时候会导致结果很丑：

兰伯特结果

半兰伯特结果

unity中有个设置，可以提高法线精度

就是这个选项

打开它，untiy会改变Gbuffer2的rt格式并且使用八面体映射的方式来进行编码法线（前提是你解码法线和编码法线是用的unity的函数）

使用高精度法线后的rt格式

使用八面体映射编码后的法线（未解码）

这里我直接把unity八面体映射的编码和解码方式给搬过来：

//在GbufferPass（写在片源着色器前面）
//八面体映射压缩法线，解决法线精度问题
half3 EyPackNormal(half3 n)
{
     float2 octNormalWS = PackNormalOctQuadEncode(n);                  // values between [-1, +1], must use fp32 on some platforms.
     float2 remappedOctNormalWS = saturate(octNormalWS * 0.5 + 0.5);   // values between [ 0, +1]
     return half3(PackFloat2To888(remappedOctNormalWS));               // values between [ 0, +1]
}
//Gbufferpass的片源着色器中
float3 nDirWS = normalize(i.nDirWS);
nDirWS = EyPackNormal(nDirWS);
g.gBuffer2 = float4(nDirWS,1.0);

//LightPass的片源着色器前面
//解码八面体映射的法线
    half3 EyUnpackNormal(half3 pn)
    {
        half2 remappedOctNormalWS = half2(Unpack888ToFloat2(pn));          // values between [ 0, +1]
        half2 octNormalWS = remappedOctNormalWS.xy * half(2.0) - half(1.0);// values between [-1, +1]
        return half3(UnpackNormalOctQuadEncode(octNormalWS));              // values between [-1, +1]
    }

//在LightPass 的片源着色器里面
float3 nDirWS = gbuffer2.rgb;
nDirWS = EyUnpackNormal(nDirWS);

这样之后法线精度就差不多了：

处理法线之后的半兰伯特

GbufferPass的完整代码如下

Shader "Gbuffer"
{
    Properties
    {
        _MainTex ("MainTex",2D) = "white"{}
    }
    SubShader
    {
        Tags 
        {
            "RenderType"="Opaque"
        }
        //渲染的pass
        Pass
        {
            Tags
            {
                "LightMode"="UniversalGBuffer"
            }
            HLSLPROGRAM
            #pragma vertex vert
            #pragma fragment frag
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"

            struct a2v
            {
                float4 vertex : POSITION;
                float4 uv : TEXCOORD0;
                float3 normal : NORMAL;
            };
            struct v2f
            {
                float4 posCS : SV_POSITION;
                float2 uv : TEXCOORD;
                float3 nDirWS : TEXCOORD1;
            };
            //将Gubffer打包成结构体输出
            struct DeferredOutPut{
                float4 gBuffer0 : SV_TARGET0;
                float4 gBuffer1 : SV_TARGET1;
                float4 gBuffer2 : SV_TARGET2;
                float4 gBuffer3 : SV_TARGET3;
            };

            CBUFFER_START(UnityPerMaterial)
            float4 _MainTex_ST;

            CBUFFER_END
            TEXTURE2D(_MainTex);//在CG中会写成sampler2D _MainTex;
            SAMPLER(sampler_MainTex);


            v2f vert(a2v v)
            {
                v2f o;
                
                //URP获得裁剪和世界空间顶点
                VertexPositionInputs vertexInput = GetVertexPositionInputs(v.vertex.xyz);
                o.posCS = vertexInput.positionCS;
                o.uv = TRANSFORM_TEX(v.uv,_MainTex);
                o.nDirWS = TransformObjectToWorldNormal(v.normal.xyz) ;
                return o;
            }
            //八面体映射压缩法线，解决法线精度问题
            half3 EyPackNormal(half3 n)
            {
                float2 octNormalWS = PackNormalOctQuadEncode(n);                  // values between [-1, +1], must use fp32 on some platforms.
                float2 remappedOctNormalWS = saturate(octNormalWS * 0.5 + 0.5);   // values between [ 0, +1]
                return half3(PackFloat2To888(remappedOctNormalWS));               // values between [ 0, +1]
            }
            //片源着色器
            DeferredOutPut frag(v2f i) : SV_Target
        {
            DeferredOutPut g;
        //取数据
        float3 nDirWS = normalize(i.nDirWS);
                //贴图采样
                float4 MainTex = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.uv);
        nDirWS = EyPackNormal(nDirWS);
        g.gBuffer0 = float4(MainTex.rgb,1.0);
        g.gBuffer1 = float4(1.0,1.0,1.0,1.0);
        g.gBuffer2 = float4(nDirWS,1.0);
        g.gBuffer3 = float4(0.0,0.0,0.0,0.0);
        return g;
         }
            ENDHLSL
        }
    }
}

LightPass部分代码如下：

    //解码八面体映射的法线
    half3 EyUnpackNormal(half3 pn)
    {
        half2 remappedOctNormalWS = half2(Unpack888ToFloat2(pn));          // values between [ 0, +1]
        half2 octNormalWS = remappedOctNormalWS.xy * half(2.0) - half(1.0);// values between [-1, +1]
        return half3(UnpackNormalOctQuadEncode(octNormalWS));              // values between [-1, +1]
    }
    half4 DeferredShading(Varyings input) : SV_Target
    {

        float2 screen_uv = (input.screenUV.xy / input.screenUV.z);
        
        float d        = SAMPLE_TEXTURE2D_X_LOD(_CameraDepthTexture, my_point_clamp_sampler, screen_uv, 0).x; // raw depth value has UNITY_REVERSED_Z applied on most platforms.
        half4 gbuffer0 = SAMPLE_TEXTURE2D_X_LOD(_GBuffer0, my_point_clamp_sampler, screen_uv, 0);
        half4 gbuffer1 = SAMPLE_TEXTURE2D_X_LOD(_GBuffer1, my_point_clamp_sampler, screen_uv, 0);
        half4 gbuffer2 = SAMPLE_TEXTURE2D_X_LOD(_GBuffer2, my_point_clamp_sampler, screen_uv, 0);

        //拿数据
        float3 nDirWS = gbuffer2.rgb;
        nDirWS = EyUnpackNormal(nDirWS);
        float4 posWS = mul(_ScreenToWorld[0], float4(input.positionCS.xy, d, 1.0));
        posWS.xyz *= rcp(posWS.w);
        float3 color = dot(float3(1,0,0),nDirWS)*0.5+0.5;
        return half4(color, 1.0);
    }

这里贴个八面体映射的原理想了解可以去看看

6.光照

有了法线我们终于可以计算光照，首先是如何区分平行光和其他光。

URP是用一个宏来区分的：

#if defined(_DIRECTIONAL)

#else

#endif

if后面为平行光照，else后面为其他光源

先说平行光照。平行光照主要是需要它的方向、颜色（强度被包含在了颜色里），没什么好说的，用的话直接用下面的方法拿就行

//计算光照
#if defined(_DIRECTIONAL)
float3 lDirWS = normalize( _MainLightPosition.xyz);
float3 lightColor = _MainLightColor.rgb;
#else

#endif

然后是其他光源，主要是点光源和聚光灯。我这里完全没考虑聚光灯，只写了点光源相关，所以如果用了聚光灯的话结果可能会有些奇怪

对于点光源考虑的就多了：点光源的位置，光照方向，光照的衰减，颜色（强度也是被包含在颜色里了）

位置和颜色可以直接拿到：

        float3 lightWS = _LightPosWS;//点光源的位置
        float3 lightColor = _LightColor.rgb;//颜色

光照方向的话就是点光源位置减去模型的顶点位置（记得归一化）

        float3 lightWS = _LightPosWS;//点光源的位置
        float3 lightColor = _LightColor.rgb;//颜色
        //计算光照方向
        float3 lightVector = lightWS - posWS.xyz;
        float3 lDirWS = normalize(lightVector);

光照衰减：这里直接照搬unity的光照衰减。

原理的话可以看这篇文章：Unity中URP下额外灯的距离衰减_distanceattenuation-CSDN博客

//计算衰减（暂时不管聚光灯）
        float distanceSqr = max(dot(lightVector, lightVector), HALF_MIN);//防止取0
        float4 attenuation = _LightAttenuation;
        float lightAtten = rcp(distanceSqr);
        float2 distanceAttenuationFloat = float2(attenuation.xy);
        half factor = half(distanceSqr * distanceAttenuationFloat.x);
        half smoothFactor = saturate(half(1.0) - factor * factor);
        smoothFactor = smoothFactor * smoothFactor;
        float distanceAttenuation = lightAtten * smoothFactor;