blob: 649883777f62a6ff9e08c7154ea8204138f05afa [file] [log] [blame]
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include "../display_mode_lib.h"
#include "display_mode_vba_20.h"
#include "../dml_inline_defs.h"
/*
* NOTE:
* This file is gcc-parseable HW gospel, coming straight from HW engineers.
*
* It doesn't adhere to Linux kernel style and sometimes will do things in odd
* ways. Unless there is something clearly wrong with it the code should
* remain as-is as it provides us with a guarantee from HW that it is correct.
*/
#define BPP_INVALID 0
#define BPP_BLENDED_PIPE 0xffffffff
static double adjust_ReturnBW(
struct display_mode_lib *mode_lib,
double ReturnBW,
bool DCCEnabledAnyPlane,
double ReturnBandwidthToDCN);
static unsigned int dscceComputeDelay(
unsigned int bpc,
double bpp,
unsigned int sliceWidth,
unsigned int numSlices,
enum output_format_class pixelFormat);
static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
// Super monster function with some 45 argument
static bool CalculatePrefetchSchedule(
struct display_mode_lib *mode_lib,
double DPPCLK,
double DISPCLK,
double PixelClock,
double DCFCLKDeepSleep,
unsigned int DSCDelay,
unsigned int DPPPerPlane,
bool ScalerEnabled,
unsigned int NumberOfCursors,
double DPPCLKDelaySubtotal,
double DPPCLKDelaySCL,
double DPPCLKDelaySCLLBOnly,
double DPPCLKDelayCNVCFormater,
double DPPCLKDelayCNVCCursor,
double DISPCLKDelaySubtotal,
unsigned int ScalerRecoutWidth,
enum output_format_class OutputFormat,
unsigned int VBlank,
unsigned int HTotal,
unsigned int MaxInterDCNTileRepeaters,
unsigned int VStartup,
unsigned int PageTableLevels,
bool GPUVMEnable,
bool DynamicMetadataEnable,
unsigned int DynamicMetadataLinesBeforeActiveRequired,
unsigned int DynamicMetadataTransmittedBytes,
bool DCCEnable,
double UrgentLatencyPixelDataOnly,
double UrgentExtraLatency,
double TCalc,
unsigned int PDEAndMetaPTEBytesFrame,
unsigned int MetaRowByte,
unsigned int PixelPTEBytesPerRow,
double PrefetchSourceLinesY,
unsigned int SwathWidthY,
double BytePerPixelDETY,
double VInitPreFillY,
unsigned int MaxNumSwathY,
double PrefetchSourceLinesC,
double BytePerPixelDETC,
double VInitPreFillC,
unsigned int MaxNumSwathC,
unsigned int SwathHeightY,
unsigned int SwathHeightC,
double TWait,
bool XFCEnabled,
double XFCRemoteSurfaceFlipDelay,
bool InterlaceEnable,
bool ProgressiveToInterlaceUnitInOPP,
double *DSTXAfterScaler,
double *DSTYAfterScaler,
double *DestinationLinesForPrefetch,
double *PrefetchBandwidth,
double *DestinationLinesToRequestVMInVBlank,
double *DestinationLinesToRequestRowInVBlank,
double *VRatioPrefetchY,
double *VRatioPrefetchC,
double *RequiredPrefetchPixDataBW,
unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
double *Tno_bw,
unsigned int *VUpdateOffsetPix,
double *VUpdateWidthPix,
double *VReadyOffsetPix);
static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
static double CalculatePrefetchSourceLines(
struct display_mode_lib *mode_lib,
double VRatio,
double vtaps,
bool Interlace,
bool ProgressiveToInterlaceUnitInOPP,
unsigned int SwathHeight,
unsigned int ViewportYStart,
double *VInitPreFill,
unsigned int *MaxNumSwath);
static unsigned int CalculateVMAndRowBytes(
struct display_mode_lib *mode_lib,
bool DCCEnable,
unsigned int BlockHeight256Bytes,
unsigned int BlockWidth256Bytes,
enum source_format_class SourcePixelFormat,
unsigned int SurfaceTiling,
unsigned int BytePerPixel,
enum scan_direction_class ScanDirection,
unsigned int ViewportWidth,
unsigned int ViewportHeight,
unsigned int SwathWidthY,
bool GPUVMEnable,
unsigned int VMMPageSize,
unsigned int PTEBufferSizeInRequestsLuma,
unsigned int PDEProcessingBufIn64KBReqs,
unsigned int Pitch,
unsigned int DCCMetaPitch,
unsigned int *MacroTileWidth,
unsigned int *MetaRowByte,
unsigned int *PixelPTEBytesPerRow,
bool *PTEBufferSizeNotExceeded,
unsigned int *dpte_row_height,
unsigned int *meta_row_height);
static double CalculateTWait(
unsigned int PrefetchMode,
double DRAMClockChangeLatency,
double UrgentLatencyPixelDataOnly,
double SREnterPlusExitTime);
static double CalculateRemoteSurfaceFlipDelay(
struct display_mode_lib *mode_lib,
double VRatio,
double SwathWidth,
double Bpp,
double LineTime,
double XFCTSlvVupdateOffset,
double XFCTSlvVupdateWidth,
double XFCTSlvVreadyOffset,
double XFCXBUFLatencyTolerance,
double XFCFillBWOverhead,
double XFCSlvChunkSize,
double XFCBusTransportTime,
double TCalc,
double TWait,
double *SrcActiveDrainRate,
double *TInitXFill,
double *TslvChk);
static void CalculateActiveRowBandwidth(
bool GPUVMEnable,
enum source_format_class SourcePixelFormat,
double VRatio,
bool DCCEnable,
double LineTime,
unsigned int MetaRowByteLuma,
unsigned int MetaRowByteChroma,
unsigned int meta_row_height_luma,
unsigned int meta_row_height_chroma,
unsigned int PixelPTEBytesPerRowLuma,
unsigned int PixelPTEBytesPerRowChroma,
unsigned int dpte_row_height_luma,
unsigned int dpte_row_height_chroma,
double *meta_row_bw,
double *dpte_row_bw,
double *qual_row_bw);
static void CalculateFlipSchedule(
struct display_mode_lib *mode_lib,
double UrgentExtraLatency,
double UrgentLatencyPixelDataOnly,
unsigned int GPUVMMaxPageTableLevels,
bool GPUVMEnable,
double BandwidthAvailableForImmediateFlip,
unsigned int TotImmediateFlipBytes,
enum source_format_class SourcePixelFormat,
unsigned int ImmediateFlipBytes,
double LineTime,
double VRatio,
double Tno_bw,
double PDEAndMetaPTEBytesFrame,
unsigned int MetaRowByte,
unsigned int PixelPTEBytesPerRow,
bool DCCEnable,
unsigned int dpte_row_height,
unsigned int meta_row_height,
double qual_row_bw,
double *DestinationLinesToRequestVMInImmediateFlip,
double *DestinationLinesToRequestRowInImmediateFlip,
double *final_flip_bw,
bool *ImmediateFlipSupportedForPipe);
static double CalculateWriteBackDelay(
enum source_format_class WritebackPixelFormat,
double WritebackHRatio,
double WritebackVRatio,
unsigned int WritebackLumaHTaps,
unsigned int WritebackLumaVTaps,
unsigned int WritebackChromaHTaps,
unsigned int WritebackChromaVTaps,
unsigned int WritebackDestinationWidth);
static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
struct display_mode_lib *mode_lib);
void dml20_recalculate(struct display_mode_lib *mode_lib)
{
ModeSupportAndSystemConfiguration(mode_lib);
mode_lib->vba.FabricAndDRAMBandwidth = dml_min(
mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth,
mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0;
PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
dml20_DisplayPipeConfiguration(mode_lib);
dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
}
static double adjust_ReturnBW(
struct display_mode_lib *mode_lib,
double ReturnBW,
bool DCCEnabledAnyPlane,
double ReturnBandwidthToDCN)
{
double CriticalCompression;
if (DCCEnabledAnyPlane
&& ReturnBandwidthToDCN
> mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0)
ReturnBW =
dml_min(
ReturnBW,
ReturnBandwidthToDCN * 4
* (1.0
- mode_lib->vba.UrgentLatencyPixelDataOnly
/ ((mode_lib->vba.ROBBufferSizeInKByte
- mode_lib->vba.PixelChunkSizeInKByte)
* 1024
/ ReturnBandwidthToDCN
- mode_lib->vba.DCFCLK
* mode_lib->vba.ReturnBusWidth
/ 4)
+ mode_lib->vba.UrgentLatencyPixelDataOnly));
CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK
* mode_lib->vba.UrgentLatencyPixelDataOnly
/ (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly
+ (mode_lib->vba.ROBBufferSizeInKByte
- mode_lib->vba.PixelChunkSizeInKByte)
* 1024);
if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0)
ReturnBW =
dml_min(
ReturnBW,
4.0 * ReturnBandwidthToDCN
* (mode_lib->vba.ROBBufferSizeInKByte
- mode_lib->vba.PixelChunkSizeInKByte)
* 1024
* mode_lib->vba.ReturnBusWidth
* mode_lib->vba.DCFCLK
* mode_lib->vba.UrgentLatencyPixelDataOnly
/ dml_pow(
(ReturnBandwidthToDCN
* mode_lib->vba.UrgentLatencyPixelDataOnly
+ (mode_lib->vba.ROBBufferSizeInKByte
- mode_lib->vba.PixelChunkSizeInKByte)
* 1024),
2));
return ReturnBW;
}
static unsigned int dscceComputeDelay(
unsigned int bpc,
double bpp,
unsigned int sliceWidth,
unsigned int numSlices,
enum output_format_class pixelFormat)
{
// valid bpc = source bits per component in the set of {8, 10, 12}
// valid bpp = increments of 1/16 of a bit
// min = 6/7/8 in N420/N422/444, respectively
// max = such that compression is 1:1
//valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
//valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
// fixed value
unsigned int rcModelSize = 8192;
// N422/N420 operate at 2 pixels per clock
unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l,
Delay, pixels;
if (pixelFormat == dm_n422 || pixelFormat == dm_420)
pixelsPerClock = 2;
// #all other modes operate at 1 pixel per clock
else
pixelsPerClock = 1;
//initial transmit delay as per PPS
initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
//compute ssm delay
if (bpc == 8)
D = 81;
else if (bpc == 10)
D = 89;
else
D = 113;
//divide by pixel per cycle to compute slice width as seen by DSC
w = sliceWidth / pixelsPerClock;
//422 mode has an additional cycle of delay
if (pixelFormat == dm_s422)
s = 1;
else
s = 0;
//main calculation for the dscce
ix = initalXmitDelay + 45;
wx = (w + 2) / 3;
p = 3 * wx - w;
l0 = ix / w;
a = ix + p * l0;
ax = (a + 2) / 3 + D + 6 + 1;
l = (ax + wx - 1) / wx;
if ((ix % w) == 0 && p != 0)
lstall = 1;
else
lstall = 0;
Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22;
//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
pixels = Delay * 3 * pixelsPerClock;
return pixels;
}
static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
{
unsigned int Delay = 0;
if (pixelFormat == dm_420) {
// sfr
Delay = Delay + 2;
// dsccif
Delay = Delay + 0;
// dscc - input deserializer
Delay = Delay + 3;
// dscc gets pixels every other cycle
Delay = Delay + 2;
// dscc - input cdc fifo
Delay = Delay + 12;
// dscc gets pixels every other cycle
Delay = Delay + 13;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output cdc fifo
Delay = Delay + 7;
// dscc gets pixels every other cycle
Delay = Delay + 3;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output serializer
Delay = Delay + 1;
// sft
Delay = Delay + 1;
} else if (pixelFormat == dm_n422) {
// sfr
Delay = Delay + 2;
// dsccif
Delay = Delay + 1;
// dscc - input deserializer
Delay = Delay + 5;
// dscc - input cdc fifo
Delay = Delay + 25;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output cdc fifo
Delay = Delay + 10;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output serializer
Delay = Delay + 1;
// sft
Delay = Delay + 1;
} else {
// sfr
Delay = Delay + 2;
// dsccif
Delay = Delay + 0;
// dscc - input deserializer
Delay = Delay + 3;
// dscc - input cdc fifo
Delay = Delay + 12;
// dscc - cdc uncertainty
Delay = Delay + 2;
// dscc - output cdc fifo
Delay = Delay + 7;
// dscc - output serializer
Delay = Delay + 1;
// dscc - cdc uncertainty
Delay = Delay + 2;
// sft
Delay = Delay + 1;
}
return Delay;
}
static bool CalculatePrefetchSchedule(
struct display_mode_lib *mode_lib,
double DPPCLK,
double DISPCLK,
double PixelClock,
double DCFCLKDeepSleep,
unsigned int DSCDelay,
unsigned int DPPPerPlane,
bool ScalerEnabled,
unsigned int NumberOfCursors,
double DPPCLKDelaySubtotal,
double DPPCLKDelaySCL,
double DPPCLKDelaySCLLBOnly,
double DPPCLKDelayCNVCFormater,
double DPPCLKDelayCNVCCursor,
double DISPCLKDelaySubtotal,
unsigned int ScalerRecoutWidth,
enum output_format_class OutputFormat,
unsigned int VBlank,
unsigned int HTotal,
unsigned int MaxInterDCNTileRepeaters,
unsigned int VStartup,
unsigned int PageTableLevels,
bool GPUVMEnable,
bool DynamicMetadataEnable,
unsigned int DynamicMetadataLinesBeforeActiveRequired,
unsigned int DynamicMetadataTransmittedBytes,
bool DCCEnable,
double UrgentLatencyPixelDataOnly,
double UrgentExtraLatency,
double TCalc,
unsigned int PDEAndMetaPTEBytesFrame,
unsigned int MetaRowByte,
unsigned int PixelPTEBytesPerRow,
double PrefetchSourceLinesY,
unsigned int SwathWidthY,
double BytePerPixelDETY,
double VInitPreFillY,
unsigned int MaxNumSwathY,
double PrefetchSourceLinesC,
double BytePerPixelDETC,
double VInitPreFillC,
unsigned int MaxNumSwathC,
unsigned int SwathHeightY,
unsigned int SwathHeightC,
double TWait,
bool XFCEnabled,
double XFCRemoteSurfaceFlipDelay,
bool InterlaceEnable,
bool ProgressiveToInterlaceUnitInOPP,
double *DSTXAfterScaler,
double *DSTYAfterScaler,
double *DestinationLinesForPrefetch,
double *PrefetchBandwidth,
double *DestinationLinesToRequestVMInVBlank,
double *DestinationLinesToRequestRowInVBlank,
double *VRatioPrefetchY,
double *VRatioPrefetchC,
double *RequiredPrefetchPixDataBW,
unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
double *Tno_bw,
unsigned int *VUpdateOffsetPix,
double *VUpdateWidthPix,
double *VReadyOffsetPix)
{
bool MyError = false;
unsigned int DPPCycles, DISPCLKCycles;
double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime;
double Tdm, LineTime, Tsetup;
double dst_y_prefetch_equ;
double Tsw_oto;
double prefetch_bw_oto;
double Tvm_oto;
double Tr0_oto;
double Tpre_oto;
double dst_y_prefetch_oto;
double TimeForFetchingMetaPTE = 0;
double TimeForFetchingRowInVBlank = 0;
double LinesToRequestPrefetchPixelData = 0;
if (ScalerEnabled)
DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
else
DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor;
DISPCLKCycles = DISPCLKDelaySubtotal;
if (DPPCLK == 0.0 || DISPCLK == 0.0)
return true;
*DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK
+ DSCDelay;
if (DPPPerPlane > 1)
*DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
if (OutputFormat == dm_420 || (InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
*DSTYAfterScaler = 1;
else
*DSTYAfterScaler = 0;
DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler;
*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1);
*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal));
*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK);
*VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime)
* PixelClock;
*VReadyOffsetPix = dml_max(
150.0 / DPPCLK,
TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK)
* PixelClock;
Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
LineTime = (double) HTotal / PixelClock;
if (DynamicMetadataEnable) {
double Tdmbf, Tdmec, Tdmsks;
Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
Tdmec = LineTime;
if (DynamicMetadataLinesBeforeActiveRequired == 0)
Tdmsks = VBlank * LineTime / 2.0;
else
Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
Tdmsks = Tdmsks / 2;
if (VStartup * LineTime
< Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
MyError = true;
*VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait
+ UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime;
} else
*VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0;
} else
Tdm = 0;
if (GPUVMEnable) {
if (PageTableLevels == 4)
*Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly;
else if (PageTableLevels == 3)
*Tno_bw = UrgentExtraLatency;
else
*Tno_bw = 0;
} else if (DCCEnable)
*Tno_bw = LineTime;
else
*Tno_bw = LineTime / 4;
dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
- (Tsetup + Tdm) / LineTime
- (*DSTYAfterScaler + *DSTXAfterScaler / HTotal);
Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow
+ PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1)
+ PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2))
/ Tsw_oto;
if (GPUVMEnable == true) {
Tvm_oto =
dml_max(
*Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto,
dml_max(
UrgentExtraLatency
+ UrgentLatencyPixelDataOnly
* (PageTableLevels
- 1),
LineTime / 4.0));
} else
Tvm_oto = LineTime / 4.0;
if ((GPUVMEnable == true || DCCEnable == true)) {
Tr0_oto = dml_max(
(MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto,
dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4)));
} else
Tr0_oto = LineTime - Tvm_oto;
Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto;
dst_y_prefetch_oto = Tpre_oto / LineTime;
if (dst_y_prefetch_oto < dst_y_prefetch_equ)
*DestinationLinesForPrefetch = dst_y_prefetch_oto;
else
*DestinationLinesForPrefetch = dst_y_prefetch_equ;
*DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1)
/ 4;
dml_print("DML: VStartup: %d\n", VStartup);
dml_print("DML: TCalc: %f\n", TCalc);
dml_print("DML: TWait: %f\n", TWait);
dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
dml_print("DML: LineTime: %f\n", LineTime);
dml_print("DML: Tsetup: %f\n", Tsetup);
dml_print("DML: Tdm: %f\n", Tdm);
dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler);
dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler);
dml_print("DML: HTotal: %d\n", HTotal);
*PrefetchBandwidth = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
if (*DestinationLinesForPrefetch > 1) {
*PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte
+ 2 * PixelPTEBytesPerRow
+ PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1)
+ PrefetchSourceLinesC * SwathWidthY / 2
* dml_ceil(BytePerPixelDETC, 2))
/ (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
if (GPUVMEnable) {
TimeForFetchingMetaPTE =
dml_max(
*Tno_bw
+ (double) PDEAndMetaPTEBytesFrame
/ *PrefetchBandwidth,
dml_max(
UrgentExtraLatency
+ UrgentLatencyPixelDataOnly
* (PageTableLevels
- 1),
LineTime / 4));
} else {
if (NumberOfCursors > 0 || XFCEnabled)
TimeForFetchingMetaPTE = LineTime / 4;
else
TimeForFetchingMetaPTE = 0.0;
}
if ((GPUVMEnable == true || DCCEnable == true)) {
TimeForFetchingRowInVBlank =
dml_max(
(MetaRowByte + PixelPTEBytesPerRow)
/ *PrefetchBandwidth,
dml_max(
UrgentLatencyPixelDataOnly,
dml_max(
LineTime
- TimeForFetchingMetaPTE,
LineTime
/ 4.0)));
} else {
if (NumberOfCursors > 0 || XFCEnabled)
TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE;
else
TimeForFetchingRowInVBlank = 0.0;
}
*DestinationLinesToRequestVMInVBlank = dml_floor(
4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125),
1) / 4.0;
*DestinationLinesToRequestRowInVBlank = dml_floor(
4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125),
1) / 4.0;
LinesToRequestPrefetchPixelData =
*DestinationLinesForPrefetch
- ((NumberOfCursors > 0 || GPUVMEnable
|| DCCEnable) ?
(*DestinationLinesToRequestVMInVBlank
+ *DestinationLinesToRequestRowInVBlank) :
0.0);
if (LinesToRequestPrefetchPixelData > 0) {
*VRatioPrefetchY = (double) PrefetchSourceLinesY
/ LinesToRequestPrefetchPixelData;
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
*VRatioPrefetchY =
dml_max(
(double) PrefetchSourceLinesY
/ LinesToRequestPrefetchPixelData,
(double) MaxNumSwathY
* SwathHeightY
/ (LinesToRequestPrefetchPixelData
- (VInitPreFillY
- 3.0)
/ 2.0));
*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
} else {
MyError = true;
*VRatioPrefetchY = 0;
}
}
*VRatioPrefetchC = (double) PrefetchSourceLinesC
/ LinesToRequestPrefetchPixelData;
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
if ((SwathHeightC > 4)) {
if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
*VRatioPrefetchC =
dml_max(
*VRatioPrefetchC,
(double) MaxNumSwathC
* SwathHeightC
/ (LinesToRequestPrefetchPixelData
- (VInitPreFillC
- 3.0)
/ 2.0));
*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
} else {
MyError = true;
*VRatioPrefetchC = 0;
}
}
*RequiredPrefetchPixDataBW =
DPPPerPlane
* ((double) PrefetchSourceLinesY
/ LinesToRequestPrefetchPixelData
* dml_ceil(
BytePerPixelDETY,
1)
+ (double) PrefetchSourceLinesC
/ LinesToRequestPrefetchPixelData
* dml_ceil(
BytePerPixelDETC,
2)
/ 2)
* SwathWidthY / LineTime;
} else {
MyError = true;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
}
} else {
MyError = true;
}
if (MyError) {
*PrefetchBandwidth = 0;
TimeForFetchingMetaPTE = 0;
TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
}
return MyError;
}
static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
{
return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
}
static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
{
return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
}
static double CalculatePrefetchSourceLines(
struct display_mode_lib *mode_lib,
double VRatio,
double vtaps,
bool Interlace,
bool ProgressiveToInterlaceUnitInOPP,
unsigned int SwathHeight,
unsigned int ViewportYStart,
double *VInitPreFill,
unsigned int *MaxNumSwath)
{
unsigned int MaxPartialSwath;
if (ProgressiveToInterlaceUnitInOPP)
*VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
else
*VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
if (!mode_lib->vba.IgnoreViewportPositioning) {
*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
if (*VInitPreFill > 1.0)
MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
else
MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
% SwathHeight;
MaxPartialSwath = dml_max(1U, MaxPartialSwath);
} else {
if (ViewportYStart != 0)
dml_print(
"WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
*MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
if (*VInitPreFill > 1.0)
MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
else
MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
% SwathHeight;
}
return *MaxNumSwath * SwathHeight + MaxPartialSwath;
}
static unsigned int CalculateVMAndRowBytes(
struct display_mode_lib *mode_lib,
bool DCCEnable,
unsigned int BlockHeight256Bytes,
unsigned int BlockWidth256Bytes,
enum source_format_class SourcePixelFormat,
unsigned int SurfaceTiling,
unsigned int BytePerPixel,
enum scan_direction_class ScanDirection,
unsigned int ViewportWidth,
unsigned int ViewportHeight,
unsigned int SwathWidth,
bool GPUVMEnable,
unsigned int VMMPageSize,
unsigned int PTEBufferSizeInRequestsLuma,
unsigned int PDEProcessingBufIn64KBReqs,
unsigned int Pitch,
unsigned int DCCMetaPitch,
unsigned int *MacroTileWidth,
unsigned int *MetaRowByte,
unsigned int *PixelPTEBytesPerRow,
bool *PTEBufferSizeNotExceeded,
unsigned int *dpte_row_height,
unsigned int *meta_row_height)
{
unsigned int MetaRequestHeight;
unsigned int MetaRequestWidth;
unsigned int MetaSurfWidth;
unsigned int MetaSurfHeight;
unsigned int MPDEBytesFrame;
unsigned int MetaPTEBytesFrame;
unsigned int DCCMetaSurfaceBytes;
unsigned int MacroTileSizeBytes;
unsigned int MacroTileHeight;
unsigned int DPDE0BytesFrame;
unsigned int ExtraDPDEBytesFrame;
unsigned int PDEAndMetaPTEBytesFrame;
if (DCCEnable == true) {
MetaRequestHeight = 8 * BlockHeight256Bytes;
MetaRequestWidth = 8 * BlockWidth256Bytes;
if (ScanDirection == dm_horz) {
*meta_row_height = MetaRequestHeight;
MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth)
+ MetaRequestWidth;
*MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0;
} else {
*meta_row_height = MetaRequestWidth;
MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight)
+ MetaRequestHeight;
*MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0;
}
if (ScanDirection == dm_horz) {
DCCMetaSurfaceBytes = DCCMetaPitch
* (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
+ 64 * BlockHeight256Bytes) * BytePerPixel
/ 256;
} else {
DCCMetaSurfaceBytes = DCCMetaPitch
* (dml_ceil(
(double) ViewportHeight - 1,
64 * BlockHeight256Bytes)
+ 64 * BlockHeight256Bytes) * BytePerPixel
/ 256;
}
if (GPUVMEnable == true) {
MetaPTEBytesFrame = (dml_ceil(
(double) (DCCMetaSurfaceBytes - VMMPageSize)
/ (8 * VMMPageSize),
1) + 1) * 64;
MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
} else {
MetaPTEBytesFrame = 0;
MPDEBytesFrame = 0;
}
} else {
MetaPTEBytesFrame = 0;
MPDEBytesFrame = 0;
*MetaRowByte = 0;
}
if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) {
MacroTileSizeBytes = 256;
MacroTileHeight = BlockHeight256Bytes;
} else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
|| SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
MacroTileSizeBytes = 4096;
MacroTileHeight = 4 * BlockHeight256Bytes;
} else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
|| SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
|| SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
|| SurfaceTiling == dm_sw_64kb_r_x) {
MacroTileSizeBytes = 65536;
MacroTileHeight = 16 * BlockHeight256Bytes;
} else {
MacroTileSizeBytes = 262144;
MacroTileHeight = 32 * BlockHeight256Bytes;
}
*MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
if (ScanDirection == dm_horz) {
DPDE0BytesFrame =
64
* (dml_ceil(
((Pitch
* (dml_ceil(
ViewportHeight
- 1,
MacroTileHeight)
+ MacroTileHeight)
* BytePerPixel)
- MacroTileSizeBytes)
/ (8
* 2097152),
1) + 1);
} else {
DPDE0BytesFrame =
64
* (dml_ceil(
((Pitch
* (dml_ceil(
(double) SwathWidth
- 1,
MacroTileHeight)
+ MacroTileHeight)
* BytePerPixel)
- MacroTileSizeBytes)
/ (8
* 2097152),
1) + 1);
}
ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
} else {
DPDE0BytesFrame = 0;
ExtraDPDEBytesFrame = 0;
}
PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame
+ ExtraDPDEBytesFrame;
if (GPUVMEnable == true) {
unsigned int PTERequestSize;
unsigned int PixelPTEReqHeight;
unsigned int PixelPTEReqWidth;
double FractionOfPTEReturnDrop;
unsigned int EffectivePDEProcessingBufIn64KBReqs;
if (SurfaceTiling == dm_sw_linear) {
PixelPTEReqHeight = 1;
PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
} else if (MacroTileSizeBytes == 4096) {
PixelPTEReqHeight = MacroTileHeight;
PixelPTEReqWidth = 8 * *MacroTileWidth;
PTERequestSize = 64;
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
FractionOfPTEReturnDrop = 7 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeight = 16 * BlockHeight256Bytes;
PixelPTEReqWidth = 16 * BlockWidth256Bytes;
PTERequestSize = 128;
FractionOfPTEReturnDrop = 0;
} else {
PixelPTEReqHeight = MacroTileHeight;
PixelPTEReqWidth = 8 * *MacroTileWidth;
PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
}
if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)
EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2;
else
EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs;
if (SurfaceTiling == dm_sw_linear) {
*dpte_row_height =
dml_min(
128,
1
<< (unsigned int) dml_floor(
dml_log2(
dml_min(
(double) PTEBufferSizeInRequestsLuma
* PixelPTEReqWidth,
EffectivePDEProcessingBufIn64KBReqs
* 65536.0
/ BytePerPixel)
/ Pitch),
1));
*PixelPTEBytesPerRow = PTERequestSize
* (dml_ceil(
(double) (Pitch * *dpte_row_height - 1)
/ PixelPTEReqWidth,
1) + 1);
} else if (ScanDirection == dm_horz) {
*dpte_row_height = PixelPTEReqHeight;
*PixelPTEBytesPerRow = PTERequestSize
* (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1)
+ 1);
} else {
*dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth);
*PixelPTEBytesPerRow = PTERequestSize
* (dml_ceil(
((double) SwathWidth - 1)
/ PixelPTEReqHeight,
1) + 1);
}
if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
<= 64 * PTEBufferSizeInRequestsLuma) {
*PTEBufferSizeNotExceeded = true;
} else {
*PTEBufferSizeNotExceeded = false;
}
} else {
*PixelPTEBytesPerRow = 0;
*PTEBufferSizeNotExceeded = true;
}
return PDEAndMetaPTEBytesFrame;
}
static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
struct display_mode_lib *mode_lib)
{
unsigned int j, k;
mode_lib->vba.WritebackDISPCLK = 0.0;
mode_lib->vba.DISPCLKWithRamping = 0;
mode_lib->vba.DISPCLKWithoutRamping = 0;
mode_lib->vba.GlobalDPPCLK = 0.0;
// dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation
//
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.WritebackEnable[k]) {
mode_lib->vba.WritebackDISPCLK =
dml_max(
mode_lib->vba.WritebackDISPCLK,
CalculateWriteBackDISPCLK(
mode_lib->vba.WritebackPixelFormat[k],
mode_lib->vba.PixelClock[k],
mode_lib->vba.WritebackHRatio[k],
mode_lib->vba.WritebackVRatio[k],
mode_lib->vba.WritebackLumaHTaps[k],
mode_lib->vba.WritebackLumaVTaps[k],
mode_lib->vba.WritebackChromaHTaps[k],
mode_lib->vba.WritebackChromaVTaps[k],
mode_lib->vba.WritebackDestinationWidth[k],
mode_lib->vba.HTotal[k],
mode_lib->vba.WritebackChromaLineBufferWidth));
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.HRatio[k] > 1) {
mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min(
mode_lib->vba.MaxDCHUBToPSCLThroughput,
mode_lib->vba.MaxPSCLToLBThroughput
* mode_lib->vba.HRatio[k]
/ dml_ceil(
mode_lib->vba.htaps[k]
/ 6.0,
1));
} else {
mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min(
mode_lib->vba.MaxDCHUBToPSCLThroughput,
mode_lib->vba.MaxPSCLToLBThroughput);
}
mode_lib->vba.DPPCLKUsingSingleDPPLuma =
mode_lib->vba.PixelClock[k]
* dml_max(
mode_lib->vba.vtaps[k] / 6.0
* dml_min(
1.0,
mode_lib->vba.HRatio[k]),
dml_max(
mode_lib->vba.HRatio[k]
* mode_lib->vba.VRatio[k]
/ mode_lib->vba.PSCL_THROUGHPUT_LUMA[k],
1.0));
if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
&& mode_lib->vba.DPPCLKUsingSingleDPPLuma
< 2 * mode_lib->vba.PixelClock[k]) {
mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
}
if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0;
mode_lib->vba.DPPCLKUsingSingleDPP[k] =
mode_lib->vba.DPPCLKUsingSingleDPPLuma;
} else {
if (mode_lib->vba.HRatio[k] > 1) {
mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] =
dml_min(
mode_lib->vba.MaxDCHUBToPSCLThroughput,
mode_lib->vba.MaxPSCLToLBThroughput
* mode_lib->vba.HRatio[k]
/ 2
/ dml_ceil(
mode_lib->vba.HTAPsChroma[k]
/ 6.0,
1.0));
} else {
mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min(
mode_lib->vba.MaxDCHUBToPSCLThroughput,
mode_lib->vba.MaxPSCLToLBThroughput);
}
mode_lib->vba.DPPCLKUsingSingleDPPChroma =
mode_lib->vba.PixelClock[k]
* dml_max(
mode_lib->vba.VTAPsChroma[k]
/ 6.0
* dml_min(
1.0,
mode_lib->vba.HRatio[k]
/ 2),
dml_max(
mode_lib->vba.HRatio[k]
* mode_lib->vba.VRatio[k]
/ 4
/ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k],
1.0));
if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
&& mode_lib->vba.DPPCLKUsingSingleDPPChroma
< 2 * mode_lib->vba.PixelClock[k]) {
mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
* mode_lib->vba.PixelClock[k];
}
mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max(
mode_lib->vba.DPPCLKUsingSingleDPPLuma,
mode_lib->vba.DPPCLKUsingSingleDPPChroma);
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.BlendingAndTiming[k] != k)
continue;
if (mode_lib->vba.ODMCombineEnabled[k]) {
mode_lib->vba.DISPCLKWithRamping =
dml_max(
mode_lib->vba.DISPCLKWithRamping,
mode_lib->vba.PixelClock[k] / 2
* (1
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
/ 100)
* (1
+ mode_lib->vba.DISPCLKRampingMargin
/ 100));
mode_lib->vba.DISPCLKWithoutRamping =
dml_max(
mode_lib->vba.DISPCLKWithoutRamping,
mode_lib->vba.PixelClock[k] / 2
* (1
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
/ 100));
} else if (!mode_lib->vba.ODMCombineEnabled[k]) {
mode_lib->vba.DISPCLKWithRamping =
dml_max(
mode_lib->vba.DISPCLKWithRamping,
mode_lib->vba.PixelClock[k]
* (1
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
/ 100)
* (1
+ mode_lib->vba.DISPCLKRampingMargin
/ 100));
mode_lib->vba.DISPCLKWithoutRamping =
dml_max(
mode_lib->vba.DISPCLKWithoutRamping,
mode_lib->vba.PixelClock[k]
* (1
+ mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
/ 100));
}
}
mode_lib->vba.DISPCLKWithRamping = dml_max(
mode_lib->vba.DISPCLKWithRamping,
mode_lib->vba.WritebackDISPCLK);
mode_lib->vba.DISPCLKWithoutRamping = dml_max(
mode_lib->vba.DISPCLKWithoutRamping,
mode_lib->vba.WritebackDISPCLK);
ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
mode_lib->vba.DISPCLKWithRamping,
mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
mode_lib->vba.DISPCLKWithoutRamping,
mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz,
mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
> mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
mode_lib->vba.DISPCLK_calculated =
mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
} else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
> mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
} else {
mode_lib->vba.DISPCLK_calculated =
mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
}
DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.DPPPerPlane[k] == 0) {
mode_lib->vba.DPPCLK_calculated[k] = 0;
} else {
mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k]
/ mode_lib->vba.DPPPerPlane[k]
* (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
}
mode_lib->vba.GlobalDPPCLK = dml_max(
mode_lib->vba.GlobalDPPCLK,
mode_lib->vba.DPPCLK_calculated[k]);
}
mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
mode_lib->vba.GlobalDPPCLK,
mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
* dml_ceil(
mode_lib->vba.DPPCLK_calculated[k] * 255
/ mode_lib->vba.GlobalDPPCLK,
1);
DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
}
// Urgent Watermark
mode_lib->vba.DCCEnabledAnyPlane = false;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
if (mode_lib->vba.DCCEnable[k])
mode_lib->vba.DCCEnabledAnyPlane = true;
mode_lib->vba.ReturnBandwidthToDCN = dml_min(
mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK,
mode_lib->vba.FabricAndDRAMBandwidth * 1000)
* mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN;
mode_lib->vba.ReturnBW = adjust_ReturnBW(
mode_lib,
mode_lib->vba.ReturnBW,
mode_lib->vba.DCCEnabledAnyPlane,
mode_lib->vba.ReturnBandwidthToDCN);
// Let's do this calculation again??
mode_lib->vba.ReturnBandwidthToDCN = dml_min(
mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK,
mode_lib->vba.FabricAndDRAMBandwidth * 1000);
mode_lib->vba.ReturnBW = adjust_ReturnBW(
mode_lib,
mode_lib->vba.ReturnBW,
mode_lib->vba.DCCEnabledAnyPlane,
mode_lib->vba.ReturnBandwidthToDCN);
DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
bool MainPlaneDoesODMCombine = false;
if (mode_lib->vba.SourceScan[k] == dm_horz)
mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
else
mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
if (mode_lib->vba.ODMCombineEnabled[k] == true)
MainPlaneDoesODMCombine = true;
for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
if (mode_lib->vba.BlendingAndTiming[k] == j
&& mode_lib->vba.ODMCombineEnabled[j] == true)
MainPlaneDoesODMCombine = true;
if (MainPlaneDoesODMCombine == true)
mode_lib->vba.SwathWidthY[k] = dml_min(
(double) mode_lib->vba.SwathWidthSingleDPPY[k],
dml_round(
mode_lib->vba.HActive[k] / 2.0
* mode_lib->vba.HRatio[k]));
else {
if (mode_lib->vba.DPPPerPlane[k] == 0) {
mode_lib->vba.SwathWidthY[k] = 0;
} else {
mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k]
/ mode_lib->vba.DPPPerPlane[k];
}
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
mode_lib->vba.BytePerPixelDETY[k] = 8;
mode_lib->vba.BytePerPixelDETC[k] = 0;
} else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
mode_lib->vba.BytePerPixelDETY[k] = 4;
mode_lib->vba.BytePerPixelDETC[k] = 0;
} else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
mode_lib->vba.BytePerPixelDETY[k] = 2;
mode_lib->vba.BytePerPixelDETC[k] = 0;
} else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
mode_lib->vba.BytePerPixelDETY[k] = 1;
mode_lib->vba.BytePerPixelDETC[k] = 0;
} else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
mode_lib->vba.BytePerPixelDETY[k] = 1;
mode_lib->vba.BytePerPixelDETC[k] = 2;
} else { // dm_420_10
mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0;
mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0;
}
}
mode_lib->vba.TotalDataReadBandwidth = 0.0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k]
* dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1)
/ (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
* mode_lib->vba.VRatio[k];
mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k]
/ 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2)
/ (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
* mode_lib->vba.VRatio[k] / 2;
DTRACE(
" read_bw[%i] = %fBps",
k,
mode_lib->vba.ReadBandwidthPlaneLuma[k]
+ mode_lib->vba.ReadBandwidthPlaneChroma[k]);
mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k]
+ mode_lib->vba.ReadBandwidthPlaneChroma[k];
}
mode_lib->vba.TotalDCCActiveDPP = 0;
mode_lib->vba.TotalActiveDPP = 0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
+ mode_lib->vba.DPPPerPlane[k];
if (mode_lib->vba.DCCEnable[k])
mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
+ mode_lib->vba.DPPPerPlane[k];
}
mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
(mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
+ mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly
* mode_lib->vba.NumberOfChannels
/ mode_lib->vba.ReturnBW;
mode_lib->vba.LastPixelOfLineExtraWatermark = 0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
double DataFabricLineDeliveryTimeLuma, DataFabricLineDeliveryTimeChroma;
if (mode_lib->vba.VRatio[k] <= 1.0)
mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] =
(double) mode_lib->vba.SwathWidthY[k]
* mode_lib->vba.DPPPerPlane[k]
/ mode_lib->vba.HRatio[k]
/ mode_lib->vba.PixelClock[k];
else
mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] =
(double) mode_lib->vba.SwathWidthY[k]
/ mode_lib->vba.PSCL_THROUGHPUT_LUMA[k]
/ mode_lib->vba.DPPCLK[k];
DataFabricLineDeliveryTimeLuma = mode_lib->vba.SwathWidthSingleDPPY[k]
* mode_lib->vba.SwathHeightY[k]
* dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1)
/ (mode_lib->vba.ReturnBW * mode_lib->vba.ReadBandwidthPlaneLuma[k]
/ mode_lib->vba.TotalDataReadBandwidth);
mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(
mode_lib->vba.LastPixelOfLineExtraWatermark,
DataFabricLineDeliveryTimeLuma
- mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]);
if (mode_lib->vba.BytePerPixelDETC[k] == 0)
mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = 0.0;
else if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0)
mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] =
mode_lib->vba.SwathWidthY[k] / 2.0
* mode_lib->vba.DPPPerPlane[k]
/ (mode_lib->vba.HRatio[k] / 2.0)
/ mode_lib->vba.PixelClock[k];
else
mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] =
mode_lib->vba.SwathWidthY[k] / 2.0
/ mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k]
/ mode_lib->vba.DPPCLK[k];
DataFabricLineDeliveryTimeChroma = mode_lib->vba.SwathWidthSingleDPPY[k] / 2.0
* mode_lib->vba.SwathHeightC[k]
* dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2)
/ (mode_lib->vba.ReturnBW
* mode_lib->vba.ReadBandwidthPlaneChroma[k]
/ mode_lib->vba.TotalDataReadBandwidth);
mode_lib->vba.LastPixelOfLineExtraWatermark =
dml_max(
mode_lib->vba.LastPixelOfLineExtraWatermark,
DataFabricLineDeliveryTimeChroma
- mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]);
}
mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency
+ (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte
+ mode_lib->vba.TotalDCCActiveDPP
* mode_lib->vba.MetaChunkSize) * 1024.0
/ mode_lib->vba.ReturnBW;
if (mode_lib->vba.GPUVMEnable)
mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP
* mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW;
mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly
+ mode_lib->vba.LastPixelOfLineExtraWatermark
+ mode_lib->vba.UrgentExtraLatency;
DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency);
DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark);
mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly;
mode_lib->vba.TotalActiveWriteback = 0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.WritebackEnable[k])
mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k];
}
if (mode_lib->vba.TotalActiveWriteback <= 1)
mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency;
else
mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency
+ mode_lib->vba.WritebackChunkSize * 1024.0 / 32
/ mode_lib->vba.SOCCLK;
DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark);
// NB P-State/DRAM Clock Change Watermark
mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency
+ mode_lib->vba.UrgentWatermark;
DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark);
DTRACE(" calculating wb pstate watermark");
DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback);
DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK);
if (mode_lib->vba.TotalActiveWriteback <= 1)
mode_lib->vba.WritebackDRAMClockChangeWatermark =
mode_lib->vba.DRAMClockChangeLatency
+ mode_lib->vba.WritebackLatency;
else
mode_lib->vba.WritebackDRAMClockChangeWatermark =
mode_lib->vba.DRAMClockChangeLatency
+ mode_lib->vba.WritebackLatency
+ mode_lib->vba.WritebackChunkSize * 1024.0 / 32
/ mode_lib->vba.SOCCLK;
DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark);
// Stutter Efficiency
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k]
/ mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k];
mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor(
mode_lib->vba.LinesInDETY[k],
mode_lib->vba.SwathHeightY[k]);
mode_lib->vba.FullDETBufferingTimeY[k] =
mode_lib->vba.LinesInDETYRoundedDownToSwath[k]
* (mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k])
/ mode_lib->vba.VRatio[k];
if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k]
/ mode_lib->vba.BytePerPixelDETC[k]
/ (mode_lib->vba.SwathWidthY[k] / 2);
mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor(
mode_lib->vba.LinesInDETC[k],
mode_lib->vba.SwathHeightC[k]);
mode_lib->vba.FullDETBufferingTimeC[k] =
mode_lib->vba.LinesInDETCRoundedDownToSwath[k]
* (mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k])
/ (mode_lib->vba.VRatio[k] / 2);
} else {
mode_lib->vba.LinesInDETC[k] = 0;
mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0;
mode_lib->vba.FullDETBufferingTimeC[k] = 999999;
}
}
mode_lib->vba.MinFullDETBufferingTime = 999999.0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.FullDETBufferingTimeY[k]
< mode_lib->vba.MinFullDETBufferingTime) {
mode_lib->vba.MinFullDETBufferingTime =
mode_lib->vba.FullDETBufferingTimeY[k];
mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
(double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k];
}
if (mode_lib->vba.FullDETBufferingTimeC[k]
< mode_lib->vba.MinFullDETBufferingTime) {
mode_lib->vba.MinFullDETBufferingTime =
mode_lib->vba.FullDETBufferingTimeC[k];
mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
(double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k];
}
}
mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.DCCEnable[k]) {
mode_lib->vba.AverageReadBandwidthGBytePerSecond =
mode_lib->vba.AverageReadBandwidthGBytePerSecond
+ mode_lib->vba.ReadBandwidthPlaneLuma[k]
/ mode_lib->vba.DCCRate[k]
/ 1000
+ mode_lib->vba.ReadBandwidthPlaneChroma[k]
/ mode_lib->vba.DCCRate[k]
/ 1000;
} else {
mode_lib->vba.AverageReadBandwidthGBytePerSecond =
mode_lib->vba.AverageReadBandwidthGBytePerSecond
+ mode_lib->vba.ReadBandwidthPlaneLuma[k]
/ 1000
+ mode_lib->vba.ReadBandwidthPlaneChroma[k]
/ 1000;
}
if (mode_lib->vba.DCCEnable[k]) {
mode_lib->vba.AverageReadBandwidthGBytePerSecond =
mode_lib->vba.AverageReadBandwidthGBytePerSecond
+ mode_lib->vba.ReadBandwidthPlaneLuma[k]
/ 1000 / 256
+ mode_lib->vba.ReadBandwidthPlaneChroma[k]
/ 1000 / 256;
}
if (mode_lib->vba.GPUVMEnable) {
mode_lib->vba.AverageReadBandwidthGBytePerSecond =
mode_lib->vba.AverageReadBandwidthGBytePerSecond
+ mode_lib->vba.ReadBandwidthPlaneLuma[k]
/ 1000 / 512
+ mode_lib->vba.ReadBandwidthPlaneChroma[k]
/ 1000 / 512;
}
}
mode_lib->vba.PartOfBurstThatFitsInROB =
dml_min(
mode_lib->vba.MinFullDETBufferingTime
* mode_lib->vba.TotalDataReadBandwidth,
mode_lib->vba.ROBBufferSizeInKByte * 1024
* mode_lib->vba.TotalDataReadBandwidth
/ (mode_lib->vba.AverageReadBandwidthGBytePerSecond
* 1000));
mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
* (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000)
/ mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW
+ (mode_lib->vba.MinFullDETBufferingTime
* mode_lib->vba.TotalDataReadBandwidth
- mode_lib->vba.PartOfBurstThatFitsInROB)
/ (mode_lib->vba.DCFCLK * 64);
if (mode_lib->vba.TotalActiveWriteback == 0) {
mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
- (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
/ mode_lib->vba.MinFullDETBufferingTime) * 100;
} else {
mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
}
mode_lib->vba.SmallestVBlank = 999999;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
- mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k];
} else {
mode_lib->vba.VBlankTime = 0;
}
mode_lib->vba.SmallestVBlank = dml_min(
mode_lib->vba.SmallestVBlank,
mode_lib->vba.VBlankTime);
}
mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
* (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
- mode_lib->vba.SmallestVBlank)
+ mode_lib->vba.SmallestVBlank)
/ mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
// dml_ml->vba.DCFCLK Deep Sleep
mode_lib->vba.DCFCLKDeepSleep = 8.0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) {
if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
mode_lib->vba.DCFCLKDeepSleepPerPlane[k] =
dml_max(
1.1 * mode_lib->vba.SwathWidthY[k]
* dml_ceil(
mode_lib->vba.BytePerPixelDETY[k],
1) / 32
/ mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k],
1.1 * mode_lib->vba.SwathWidthY[k] / 2.0
* dml_ceil(
mode_lib->vba.BytePerPixelDETC[k],
2) / 32
/ mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]);
} else
mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k]
* dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0
/ mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k];
mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
mode_lib->vba.PixelClock[k] / 16.0);
mode_lib->vba.DCFCLKDeepSleep = dml_max(
mode_lib->vba.DCFCLKDeepSleep,
mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
DTRACE(
" dcfclk_deepsleep_per_plane[%i] = %fMHz",
k,
mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
}
DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep);
// Stutter Watermark
mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime
+ mode_lib->vba.LastPixelOfLineExtraWatermark
+ mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep;
mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime
+ mode_lib->vba.LastPixelOfLineExtraWatermark
+ mode_lib->vba.UrgentExtraLatency;
DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark);
DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark);
// Urgent Latency Supported
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.EffectiveDETPlusLBLinesLuma =
dml_floor(
mode_lib->vba.LinesInDETY[k]
+ dml_min(
mode_lib->vba.LinesInDETY[k]
* mode_lib->vba.DPPCLK[k]
* mode_lib->vba.BytePerPixelDETY[k]
* mode_lib->vba.PSCL_THROUGHPUT_LUMA[k]
/ (mode_lib->vba.ReturnBW
/ mode_lib->vba.DPPPerPlane[k]),
(double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma),
mode_lib->vba.SwathHeightY[k]);
mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma
* (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
/ mode_lib->vba.VRatio[k]
- mode_lib->vba.EffectiveDETPlusLBLinesLuma
* mode_lib->vba.SwathWidthY[k]
* mode_lib->vba.BytePerPixelDETY[k]
/ (mode_lib->vba.ReturnBW
/ mode_lib->vba.DPPPerPlane[k]);
if (mode_lib->vba.BytePerPixelDETC[k] > 0) {
mode_lib->vba.EffectiveDETPlusLBLinesChroma =
dml_floor(
mode_lib->vba.LinesInDETC[k]
+ dml_min(
mode_lib->vba.LinesInDETC[k]
* mode_lib->vba.DPPCLK[k]
* mode_lib->vba.BytePerPixelDETC[k]
* mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k]
/ (mode_lib->vba.ReturnBW
/ mode_lib->vba.DPPPerPlane[k]),
(double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma),
mode_lib->vba.SwathHeightC[k]);
mode_lib->vba.UrgentLatencySupportUsChroma =
mode_lib->vba.EffectiveDETPlusLBLinesChroma
* (mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k])
/ (mode_lib->vba.VRatio[k] / 2)
- mode_lib->vba.EffectiveDETPlusLBLinesChroma
* (mode_lib->vba.SwathWidthY[k]
/ 2)
* mode_lib->vba.BytePerPixelDETC[k]
/ (mode_lib->vba.ReturnBW
/ mode_lib->vba.DPPPerPlane[k]);
mode_lib->vba.UrgentLatencySupportUs[k] = dml_min(
mode_lib->vba.UrgentLatencySupportUsLuma,
mode_lib->vba.UrgentLatencySupportUsChroma);
} else {
mode_lib->vba.UrgentLatencySupportUs[k] =
mode_lib->vba.UrgentLatencySupportUsLuma;
}
}
mode_lib->vba.MinUrgentLatencySupportUs = 999999;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.MinUrgentLatencySupportUs = dml_min(
mode_lib->vba.MinUrgentLatencySupportUs,
mode_lib->vba.UrgentLatencySupportUs[k]);
}
// Non-Urgent Latency Tolerance
mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs
- mode_lib->vba.UrgentWatermark;
// DSCCLK
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
mode_lib->vba.DSCCLK_calculated[k] = 0.0;
} else {
if (mode_lib->vba.OutputFormat[k] == dm_420
|| mode_lib->vba.OutputFormat[k] == dm_n422)
mode_lib->vba.DSCFormatFactor = 2;
else
mode_lib->vba.DSCFormatFactor = 1;
if (mode_lib->vba.ODMCombineEnabled[k])
mode_lib->vba.DSCCLK_calculated[k] =
mode_lib->vba.PixelClockBackEnd[k] / 6
/ mode_lib->vba.DSCFormatFactor
/ (1
- mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
/ 100);
else
mode_lib->vba.DSCCLK_calculated[k] =
mode_lib->vba.PixelClockBackEnd[k] / 3
/ mode_lib->vba.DSCFormatFactor
/ (1
- mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
/ 100);
}
}
// DSC Delay
// TODO
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
double bpp = mode_lib->vba.OutputBpp[k];
unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
if (!mode_lib->vba.ODMCombineEnabled[k]) {
mode_lib->vba.DSCDelay[k] =
dscceComputeDelay(
mode_lib->vba.DSCInputBitPerComponent[k],
bpp,
dml_ceil(
(double) mode_lib->vba.HActive[k]
/ mode_lib->vba.NumberOfDSCSlices[k],
1),
slices,
mode_lib->vba.OutputFormat[k])
+ dscComputeDelay(
mode_lib->vba.OutputFormat[k]);
} else {
mode_lib->vba.DSCDelay[k] =
2
* (dscceComputeDelay(
mode_lib->vba.DSCInputBitPerComponent[k],
bpp,
dml_ceil(
(double) mode_lib->vba.HActive[k]
/ mode_lib->vba.NumberOfDSCSlices[k],
1),
slices / 2.0,
mode_lib->vba.OutputFormat[k])
+ dscComputeDelay(
mode_lib->vba.OutputFormat[k]));
}
mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k]
* mode_lib->vba.PixelClock[k]
/ mode_lib->vba.PixelClockBackEnd[k];
} else {
mode_lib->vba.DSCDelay[k] = 0;
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
&& mode_lib->vba.DSCEnabled[j])
mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j];
// Prefetch
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
unsigned int PDEAndMetaPTEBytesFrameY;
unsigned int PixelPTEBytesPerRowY;
unsigned int MetaRowByteY;
unsigned int MetaRowByteC;
unsigned int PDEAndMetaPTEBytesFrameC;
unsigned int PixelPTEBytesPerRowC;
Calculate256BBlockSizes(
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1),
dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2),
&mode_lib->vba.BlockHeight256BytesY[k],
&mode_lib->vba.BlockHeight256BytesC[k],
&mode_lib->vba.BlockWidth256BytesY[k],
&mode_lib->vba.BlockWidth256BytesC[k]);
PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
mode_lib,
mode_lib->vba.DCCEnable[k],
mode_lib->vba.BlockHeight256BytesY[k],
mode_lib->vba.BlockWidth256BytesY[k],
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1),
mode_lib->vba.SourceScan[k],
mode_lib->vba.ViewportWidth[k],
mode_lib->vba.ViewportHeight[k],
mode_lib->vba.SwathWidthY[k],
mode_lib->vba.GPUVMEnable,
mode_lib->vba.VMMPageSize,
mode_lib->vba.PTEBufferSizeInRequestsLuma,
mode_lib->vba.PDEProcessingBufIn64KBReqs,
mode_lib->vba.PitchY[k],
mode_lib->vba.DCCMetaPitchY[k],
&mode_lib->vba.MacroTileWidthY[k],
&MetaRowByteY,
&PixelPTEBytesPerRowY,
&mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0],
&mode_lib->vba.dpte_row_height[k],
&mode_lib->vba.meta_row_height[k]);
mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
mode_lib,
mode_lib->vba.VRatio[k],
mode_lib->vba.vtaps[k],
mode_lib->vba.Interlace[k],
mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
mode_lib->vba.SwathHeightY[k],
mode_lib->vba.ViewportYStartY[k],
&mode_lib->vba.VInitPreFillY[k],
&mode_lib->vba.MaxNumSwathY[k]);
if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
&& mode_lib->vba.SourcePixelFormat[k] != dm_444_32
&& mode_lib->vba.SourcePixelFormat[k] != dm_444_16
&& mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
PDEAndMetaPTEBytesFrameC =
CalculateVMAndRowBytes(
mode_lib,
mode_lib->vba.DCCEnable[k],
mode_lib->vba.BlockHeight256BytesC[k],
mode_lib->vba.BlockWidth256BytesC[k],
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.SurfaceTiling[k],
dml_ceil(
mode_lib->vba.BytePerPixelDETC[k],
2),
mode_lib->vba.SourceScan[k],
mode_lib->vba.ViewportWidth[k] / 2,
mode_lib->vba.ViewportHeight[k] / 2,
mode_lib->vba.SwathWidthY[k] / 2,
mode_lib->vba.GPUVMEnable,
mode_lib->vba.VMMPageSize,
mode_lib->vba.PTEBufferSizeInRequestsLuma,
mode_lib->vba.PDEProcessingBufIn64KBReqs,
mode_lib->vba.PitchC[k],
0,
&mode_lib->vba.MacroTileWidthC[k],
&MetaRowByteC,
&PixelPTEBytesPerRowC,
&mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0],
&mode_lib->vba.dpte_row_height_chroma[k],
&mode_lib->vba.meta_row_height_chroma[k]);
mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
mode_lib,
mode_lib->vba.VRatio[k] / 2,
mode_lib->vba.VTAPsChroma[k],
mode_lib->vba.Interlace[k],
mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
mode_lib->vba.SwathHeightC[k],
mode_lib->vba.ViewportYStartC[k],
&mode_lib->vba.VInitPreFillC[k],
&mode_lib->vba.MaxNumSwathC[k]);
} else {
PixelPTEBytesPerRowC = 0;
PDEAndMetaPTEBytesFrameC = 0;
MetaRowByteC = 0;
mode_lib->vba.MaxNumSwathC[k] = 0;
mode_lib->vba.PrefetchSourceLinesC[k] = 0;
}
mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
+ PDEAndMetaPTEBytesFrameC;
mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
CalculateActiveRowBandwidth(
mode_lib->vba.GPUVMEnable,
mode_lib->vba.SourcePixelFormat[k],
mode_lib->vba.VRatio[k],
mode_lib->vba.DCCEnable[k],
mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
MetaRowByteY,
MetaRowByteC,
mode_lib->vba.meta_row_height[k],
mode_lib->vba.meta_row_height_chroma[k],
PixelPTEBytesPerRowY,
PixelPTEBytesPerRowC,
mode_lib->vba.dpte_row_height[k],
mode_lib->vba.dpte_row_height_chroma[k],
&mode_lib->vba.meta_row_bw[k],
&mode_lib->vba.dpte_row_bw[k],
&mode_lib->vba.qual_row_bw[k]);
}
mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.BlendingAndTiming[k] == k) {
if (mode_lib->vba.WritebackEnable[k] == true) {
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] =
mode_lib->vba.WritebackLatency
+ CalculateWriteBackDelay(
mode_lib->vba.WritebackPixelFormat[k],
mode_lib->vba.WritebackHRatio[k],
mode_lib->vba.WritebackVRatio[k],
mode_lib->vba.WritebackLumaHTaps[k],
mode_lib->vba.WritebackLumaVTaps[k],
mode_lib->vba.WritebackChromaHTaps[k],
mode_lib->vba.WritebackChromaVTaps[k],
mode_lib->vba.WritebackDestinationWidth[k])
/ mode_lib->vba.DISPCLK;
} else
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
if (mode_lib->vba.BlendingAndTiming[j] == k
&& mode_lib->vba.WritebackEnable[j] == true) {
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] =
dml_max(
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k],
mode_lib->vba.WritebackLatency
+ CalculateWriteBackDelay(
mode_lib->vba.WritebackPixelFormat[j],
mode_lib->vba.WritebackHRatio[j],
mode_lib->vba.WritebackVRatio[j],
mode_lib->vba.WritebackLumaHTaps[j],
mode_lib->vba.WritebackLumaVTaps[j],
mode_lib->vba.WritebackChromaHTaps[j],
mode_lib->vba.WritebackChromaVTaps[j],
mode_lib->vba.WritebackDestinationWidth[j])
/ mode_lib->vba.DISPCLK);
}
}
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
if (mode_lib->vba.BlendingAndTiming[k] == j)
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] =
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j];
mode_lib->vba.VStartupLines = 13;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.MaxVStartupLines[k] =
mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
- dml_max(
1.0,
dml_ceil(
mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k]
/ (mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k]),
1));
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
mode_lib->vba.MaximumMaxVStartupLines = dml_max(
mode_lib->vba.MaximumMaxVStartupLines,
mode_lib->vba.MaxVStartupLines[k]);
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.cursor_bw[k] = 0.0;
for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j)
mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j]
* mode_lib->vba.CursorBPP[k][j] / 8.0
/ (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
* mode_lib->vba.VRatio[k];
}
do {
double MaxTotalRDBandwidth = 0;
bool DestinationLineTimesForPrefetchLessThan2 = false;
bool VRatioPrefetchMoreThan4 = false;
bool prefetch_vm_bw_valid = true;
bool prefetch_row_bw_valid = true;
double TWait = CalculateTWait(
mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
mode_lib->vba.DRAMClockChangeLatency,
mode_lib->vba.UrgentLatencyPixelDataOnly,
mode_lib->vba.SREnterPlusExitTime);
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.XFCEnabled[k] == true) {
mode_lib->vba.XFCRemoteSurfaceFlipDelay =
CalculateRemoteSurfaceFlipDelay(
mode_lib,
mode_lib->vba.VRatio[k],
mode_lib->vba.SwathWidthY[k],
dml_ceil(
mode_lib->vba.BytePerPixelDETY[k],
1),
mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k],
mode_lib->vba.XFCTSlvVupdateOffset,
mode_lib->vba.XFCTSlvVupdateWidth,
mode_lib->vba.XFCTSlvVreadyOffset,
mode_lib->vba.XFCXBUFLatencyTolerance,
mode_lib->vba.XFCFillBWOverhead,
mode_lib->vba.XFCSlvChunkSize,
mode_lib->vba.XFCBusTransportTime,
mode_lib->vba.TCalc,
TWait,
&mode_lib->vba.SrcActiveDrainRate,
&mode_lib->vba.TInitXFill,
&mode_lib->vba.TslvChk);
} else {
mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
}
mode_lib->vba.ErrorResult[k] =
CalculatePrefetchSchedule(
mode_lib,
mode_lib->vba.DPPCLK[k],
mode_lib->vba.DISPCLK,
mode_lib->vba.PixelClock[k],
mode_lib->vba.DCFCLKDeepSleep,
mode_lib->vba.DSCDelay[k],
mode_lib->vba.DPPPerPlane[k],
mode_lib->vba.ScalerEnabled[k],
mode_lib->vba.NumberOfCursors[k],
mode_lib->vba.DPPCLKDelaySubtotal,
mode_lib->vba.DPPCLKDelaySCL,
mode_lib->vba.DPPCLKDelaySCLLBOnly,
mode_lib->vba.DPPCLKDelayCNVCFormater,
mode_lib->vba.DPPCLKDelayCNVCCursor,
mode_lib->vba.DISPCLKDelaySubtotal,
(unsigned int) (mode_lib->vba.SwathWidthY[k]
/ mode_lib->vba.HRatio[k]),
mode_lib->vba.OutputFormat[k],
mode_lib->vba.VTotal[k]
- mode_lib->vba.VActive[k],
mode_lib->vba.HTotal[k],
mode_lib->vba.MaxInterDCNTileRepeaters,
dml_min(
mode_lib->vba.VStartupLines,
mode_lib->vba.MaxVStartupLines[k]),
mode_lib->vba.GPUVMMaxPageTableLevels,
mode_lib->vba.GPUVMEnable,
mode_lib->vba.DynamicMetadataEnable[k],
mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
mode_lib->vba.DynamicMetadataTransmittedBytes[k],
mode_lib->vba.DCCEnable[k],
mode_lib->vba.UrgentLatencyPixelDataOnly,
mode_lib->vba.UrgentExtraLatency,
mode_lib->vba.TCalc,
mode_lib->vba.PDEAndMetaPTEBytesFrame[k],
mode_lib->vba.MetaRowByte[k],
mode_lib->vba.PixelPTEBytesPerRow[k],
mode_lib->vba.PrefetchSourceLinesY[k],
mode_lib->vba.SwathWidthY[k],
mode_lib->vba.BytePerPixelDETY[k],
mode_lib->vba.VInitPreFillY[k],
mode_lib->vba.MaxNumSwathY[k],
mode_lib->vba.PrefetchSourceLinesC[k],
mode_lib->vba.BytePerPixelDETC[k],
mode_lib->vba.VInitPreFillC[k],
mode_lib->vba.MaxNumSwathC[k],
mode_lib->vba.SwathHeightY[k],
mode_lib->vba.SwathHeightC[k],
TWait,
mode_lib->vba.XFCEnabled[k],
mode_lib->vba.XFCRemoteSurfaceFlipDelay,
mode_lib->vba.Interlace[k],
mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
&mode_lib->vba.DSTXAfterScaler[k],
&mode_lib->vba.DSTYAfterScaler[k],
&mode_lib->vba.DestinationLinesForPrefetch[k],
&mode_lib->vba.PrefetchBandwidth[k],
&mode_lib->vba.DestinationLinesToRequestVMInVBlank[k],
&mode_lib->vba.DestinationLinesToRequestRowInVBlank[k],
&mode_lib->vba.VRatioPrefetchY[k],
&mode_lib->vba.VRatioPrefetchC[k],
&mode_lib->vba.RequiredPrefetchPixDataBWLuma[k],
&mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
&mode_lib->vba.Tno_bw[k],
&mode_lib->vba.VUpdateOffsetPix[k],
&mode_lib->vba.VUpdateWidthPix[k],
&mode_lib->vba.VReadyOffsetPix[k]);
if (mode_lib->vba.BlendingAndTiming[k] == k) {
mode_lib->vba.VStartup[k] = dml_min(
mode_lib->vba.VStartupLines,
mode_lib->vba.MaxVStartupLines[k]);
if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
!= 0) {
mode_lib->vba.VStartup[k] =
mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
}
} else {
mode_lib->vba.VStartup[k] =
dml_min(
mode_lib->vba.VStartupLines,
mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0)
mode_lib->vba.prefetch_vm_bw[k] = 0;
else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) {
mode_lib->vba.prefetch_vm_bw[k] =
(double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k]
/ (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k]
* mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k]);
} else {
mode_lib->vba.prefetch_vm_bw[k] = 0;
prefetch_vm_bw_valid = false;
}
if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k]
== 0)
mode_lib->vba.prefetch_row_bw[k] = 0;
else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) {
mode_lib->vba.prefetch_row_bw[k] =
(double) (mode_lib->vba.MetaRowByte[k]
+ mode_lib->vba.PixelPTEBytesPerRow[k])
/ (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]
* mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k]);
} else {
mode_lib->vba.prefetch_row_bw[k] = 0;
prefetch_row_bw_valid = false;
}
MaxTotalRDBandwidth =
MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k]
+ dml_max(
mode_lib->vba.prefetch_vm_bw[k],
dml_max(
mode_lib->vba.prefetch_row_bw[k],
dml_max(
mode_lib->vba.ReadBandwidthPlaneLuma[k]
+ mode_lib->vba.ReadBandwidthPlaneChroma[k],
mode_lib->vba.RequiredPrefetchPixDataBWLuma[k])
+ mode_lib->vba.meta_row_bw[k]
+ mode_lib->vba.dpte_row_bw[k]));
if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2)
DestinationLineTimesForPrefetchLessThan2 = true;
if (mode_lib->vba.VRatioPrefetchY[k] > 4
|| mode_lib->vba.VRatioPrefetchC[k] > 4)
VRatioPrefetchMoreThan4 = true;
}
if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid
&& prefetch_row_bw_valid && !VRatioPrefetchMoreThan4
&& !DestinationLineTimesForPrefetchLessThan2)
mode_lib->vba.PrefetchModeSupported = true;
else {
mode_lib->vba.PrefetchModeSupported = false;
dml_print(
"DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
}
if (mode_lib->vba.PrefetchModeSupported == true) {
double final_flip_bw[DC__NUM_DPP__MAX];
unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX];
double total_dcn_read_bw_with_flip = 0;
mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
mode_lib->vba.BandwidthAvailableForImmediateFlip =
mode_lib->vba.BandwidthAvailableForImmediateFlip
- mode_lib->vba.cursor_bw[k]
- dml_max(
mode_lib->vba.ReadBandwidthPlaneLuma[k]
+ mode_lib->vba.ReadBandwidthPlaneChroma[k]
+ mode_lib->vba.qual_row_bw[k],
mode_lib->vba.PrefetchBandwidth[k]);
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
ImmediateFlipBytes[k] = 0;
if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
ImmediateFlipBytes[k] =
mode_lib->vba.PDEAndMetaPTEBytesFrame[k]
+ mode_lib->vba.MetaRowByte[k]
+ mode_lib->vba.PixelPTEBytesPerRow[k];
}
}
mode_lib->vba.TotImmediateFlipBytes = 0;
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
&& mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
mode_lib->vba.TotImmediateFlipBytes =
mode_lib->vba.TotImmediateFlipBytes
+ ImmediateFlipBytes[k];
}
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
CalculateFlipSchedule(
mode_lib,
mode_lib->vba.UrgentExtraLatency,
mode_lib->vba.UrgentLatencyPixelDataOnly,
mode_lib->vba.GPUVMMaxPageTableLevels,
mode_lib->vba.GPUVMEnable,
mode_lib->vba.BandwidthAvailableForImmediateFlip,
mode_lib->vba.TotImmediateFlipBytes,
mode_lib->vba.SourcePixelFormat[k],
ImmediateFlipBytes[k],
mode_lib->vba.HTotal[k]
/ mode_lib->vba.PixelClock[k],
mode_lib->vba.VRatio[k],
mode_lib->vba.Tno_bw[k],
mode_lib->vba.PDEAndMetaPTEBytesFrame[k],
mode_lib->vba.MetaRowByte[k],
mode_lib->vba.PixelPTEBytesPerRow[k],
mode_lib->vba.DCCEnable[k],
mode_lib->vba.dpte_row_height[k],
mode_lib->vba.meta_row_height[k],
mode_lib->vba.qual_row_bw[k],
&mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k],
&mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k],
&final_flip_bw[k],
&mode_lib->vba.ImmediateFlipSupportedForPipe[k]);
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
total_dcn_read_bw_with_flip =
total_dcn_read_bw_with_flip
+ mode_lib->vba.cursor_bw[k]
+ dml_max(
mode_lib->vba.prefetch_vm_bw[k],
dml_max(
mode_lib->vba.prefetch_row_bw[k],
final_flip_bw[k]
+ dml_max(
mode_lib->vba.ReadBandwidthPlaneLuma[k]
+ mode_lib->vba.ReadBandwidthPlaneChroma[k],
mode_lib->vba.RequiredPrefetchPixDataBWLuma[k])));
}
mode_lib->vba.ImmediateFlipSupported = true;
if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
mode_lib->vba.ImmediateFlipSupported = false;
}
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) {
mode_lib->vba.ImmediateFlipSupported = false;
}
}
} else {
mode_lib->vba.ImmediateFlipSupported