| /* |
| * Copyright 2018 Advanced Micro Devices, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * Authors: AMD |
| * |
| */ |
| |
| #include "../display_mode_lib.h" |
| #include "display_mode_vba_20.h" |
| #include "../dml_inline_defs.h" |
| |
| /* |
| * NOTE: |
| * This file is gcc-parseable HW gospel, coming straight from HW engineers. |
| * |
| * It doesn't adhere to Linux kernel style and sometimes will do things in odd |
| * ways. Unless there is something clearly wrong with it the code should |
| * remain as-is as it provides us with a guarantee from HW that it is correct. |
| */ |
| |
| #define BPP_INVALID 0 |
| #define BPP_BLENDED_PIPE 0xffffffff |
| |
| static double adjust_ReturnBW( |
| struct display_mode_lib *mode_lib, |
| double ReturnBW, |
| bool DCCEnabledAnyPlane, |
| double ReturnBandwidthToDCN); |
| static unsigned int dscceComputeDelay( |
| unsigned int bpc, |
| double bpp, |
| unsigned int sliceWidth, |
| unsigned int numSlices, |
| enum output_format_class pixelFormat); |
| static unsigned int dscComputeDelay(enum output_format_class pixelFormat); |
| // Super monster function with some 45 argument |
| static bool CalculatePrefetchSchedule( |
| struct display_mode_lib *mode_lib, |
| double DPPCLK, |
| double DISPCLK, |
| double PixelClock, |
| double DCFCLKDeepSleep, |
| unsigned int DSCDelay, |
| unsigned int DPPPerPlane, |
| bool ScalerEnabled, |
| unsigned int NumberOfCursors, |
| double DPPCLKDelaySubtotal, |
| double DPPCLKDelaySCL, |
| double DPPCLKDelaySCLLBOnly, |
| double DPPCLKDelayCNVCFormater, |
| double DPPCLKDelayCNVCCursor, |
| double DISPCLKDelaySubtotal, |
| unsigned int ScalerRecoutWidth, |
| enum output_format_class OutputFormat, |
| unsigned int VBlank, |
| unsigned int HTotal, |
| unsigned int MaxInterDCNTileRepeaters, |
| unsigned int VStartup, |
| unsigned int PageTableLevels, |
| bool GPUVMEnable, |
| bool DynamicMetadataEnable, |
| unsigned int DynamicMetadataLinesBeforeActiveRequired, |
| unsigned int DynamicMetadataTransmittedBytes, |
| bool DCCEnable, |
| double UrgentLatencyPixelDataOnly, |
| double UrgentExtraLatency, |
| double TCalc, |
| unsigned int PDEAndMetaPTEBytesFrame, |
| unsigned int MetaRowByte, |
| unsigned int PixelPTEBytesPerRow, |
| double PrefetchSourceLinesY, |
| unsigned int SwathWidthY, |
| double BytePerPixelDETY, |
| double VInitPreFillY, |
| unsigned int MaxNumSwathY, |
| double PrefetchSourceLinesC, |
| double BytePerPixelDETC, |
| double VInitPreFillC, |
| unsigned int MaxNumSwathC, |
| unsigned int SwathHeightY, |
| unsigned int SwathHeightC, |
| double TWait, |
| bool XFCEnabled, |
| double XFCRemoteSurfaceFlipDelay, |
| bool InterlaceEnable, |
| bool ProgressiveToInterlaceUnitInOPP, |
| double *DSTXAfterScaler, |
| double *DSTYAfterScaler, |
| double *DestinationLinesForPrefetch, |
| double *PrefetchBandwidth, |
| double *DestinationLinesToRequestVMInVBlank, |
| double *DestinationLinesToRequestRowInVBlank, |
| double *VRatioPrefetchY, |
| double *VRatioPrefetchC, |
| double *RequiredPrefetchPixDataBW, |
| unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, |
| double *Tno_bw, |
| unsigned int *VUpdateOffsetPix, |
| double *VUpdateWidthPix, |
| double *VReadyOffsetPix); |
| static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); |
| static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); |
| static double CalculatePrefetchSourceLines( |
| struct display_mode_lib *mode_lib, |
| double VRatio, |
| double vtaps, |
| bool Interlace, |
| bool ProgressiveToInterlaceUnitInOPP, |
| unsigned int SwathHeight, |
| unsigned int ViewportYStart, |
| double *VInitPreFill, |
| unsigned int *MaxNumSwath); |
| static unsigned int CalculateVMAndRowBytes( |
| struct display_mode_lib *mode_lib, |
| bool DCCEnable, |
| unsigned int BlockHeight256Bytes, |
| unsigned int BlockWidth256Bytes, |
| enum source_format_class SourcePixelFormat, |
| unsigned int SurfaceTiling, |
| unsigned int BytePerPixel, |
| enum scan_direction_class ScanDirection, |
| unsigned int ViewportWidth, |
| unsigned int ViewportHeight, |
| unsigned int SwathWidthY, |
| bool GPUVMEnable, |
| unsigned int VMMPageSize, |
| unsigned int PTEBufferSizeInRequestsLuma, |
| unsigned int PDEProcessingBufIn64KBReqs, |
| unsigned int Pitch, |
| unsigned int DCCMetaPitch, |
| unsigned int *MacroTileWidth, |
| unsigned int *MetaRowByte, |
| unsigned int *PixelPTEBytesPerRow, |
| bool *PTEBufferSizeNotExceeded, |
| unsigned int *dpte_row_height, |
| unsigned int *meta_row_height); |
| static double CalculateTWait( |
| unsigned int PrefetchMode, |
| double DRAMClockChangeLatency, |
| double UrgentLatencyPixelDataOnly, |
| double SREnterPlusExitTime); |
| static double CalculateRemoteSurfaceFlipDelay( |
| struct display_mode_lib *mode_lib, |
| double VRatio, |
| double SwathWidth, |
| double Bpp, |
| double LineTime, |
| double XFCTSlvVupdateOffset, |
| double XFCTSlvVupdateWidth, |
| double XFCTSlvVreadyOffset, |
| double XFCXBUFLatencyTolerance, |
| double XFCFillBWOverhead, |
| double XFCSlvChunkSize, |
| double XFCBusTransportTime, |
| double TCalc, |
| double TWait, |
| double *SrcActiveDrainRate, |
| double *TInitXFill, |
| double *TslvChk); |
| static void CalculateActiveRowBandwidth( |
| bool GPUVMEnable, |
| enum source_format_class SourcePixelFormat, |
| double VRatio, |
| bool DCCEnable, |
| double LineTime, |
| unsigned int MetaRowByteLuma, |
| unsigned int MetaRowByteChroma, |
| unsigned int meta_row_height_luma, |
| unsigned int meta_row_height_chroma, |
| unsigned int PixelPTEBytesPerRowLuma, |
| unsigned int PixelPTEBytesPerRowChroma, |
| unsigned int dpte_row_height_luma, |
| unsigned int dpte_row_height_chroma, |
| double *meta_row_bw, |
| double *dpte_row_bw, |
| double *qual_row_bw); |
| static void CalculateFlipSchedule( |
| struct display_mode_lib *mode_lib, |
| double UrgentExtraLatency, |
| double UrgentLatencyPixelDataOnly, |
| unsigned int GPUVMMaxPageTableLevels, |
| bool GPUVMEnable, |
| double BandwidthAvailableForImmediateFlip, |
| unsigned int TotImmediateFlipBytes, |
| enum source_format_class SourcePixelFormat, |
| unsigned int ImmediateFlipBytes, |
| double LineTime, |
| double VRatio, |
| double Tno_bw, |
| double PDEAndMetaPTEBytesFrame, |
| unsigned int MetaRowByte, |
| unsigned int PixelPTEBytesPerRow, |
| bool DCCEnable, |
| unsigned int dpte_row_height, |
| unsigned int meta_row_height, |
| double qual_row_bw, |
| double *DestinationLinesToRequestVMInImmediateFlip, |
| double *DestinationLinesToRequestRowInImmediateFlip, |
| double *final_flip_bw, |
| bool *ImmediateFlipSupportedForPipe); |
| static double CalculateWriteBackDelay( |
| enum source_format_class WritebackPixelFormat, |
| double WritebackHRatio, |
| double WritebackVRatio, |
| unsigned int WritebackLumaHTaps, |
| unsigned int WritebackLumaVTaps, |
| unsigned int WritebackChromaHTaps, |
| unsigned int WritebackChromaVTaps, |
| unsigned int WritebackDestinationWidth); |
| |
| static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib); |
| static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( |
| struct display_mode_lib *mode_lib); |
| |
| void dml20_recalculate(struct display_mode_lib *mode_lib) |
| { |
| ModeSupportAndSystemConfiguration(mode_lib); |
| mode_lib->vba.FabricAndDRAMBandwidth = dml_min( |
| mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, |
| mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0; |
| PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); |
| dml20_DisplayPipeConfiguration(mode_lib); |
| dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); |
| } |
| |
| static double adjust_ReturnBW( |
| struct display_mode_lib *mode_lib, |
| double ReturnBW, |
| bool DCCEnabledAnyPlane, |
| double ReturnBandwidthToDCN) |
| { |
| double CriticalCompression; |
| |
| if (DCCEnabledAnyPlane |
| && ReturnBandwidthToDCN |
| > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) |
| ReturnBW = |
| dml_min( |
| ReturnBW, |
| ReturnBandwidthToDCN * 4 |
| * (1.0 |
| - mode_lib->vba.UrgentLatencyPixelDataOnly |
| / ((mode_lib->vba.ROBBufferSizeInKByte |
| - mode_lib->vba.PixelChunkSizeInKByte) |
| * 1024 |
| / ReturnBandwidthToDCN |
| - mode_lib->vba.DCFCLK |
| * mode_lib->vba.ReturnBusWidth |
| / 4) |
| + mode_lib->vba.UrgentLatencyPixelDataOnly)); |
| |
| CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK |
| * mode_lib->vba.UrgentLatencyPixelDataOnly |
| / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly |
| + (mode_lib->vba.ROBBufferSizeInKByte |
| - mode_lib->vba.PixelChunkSizeInKByte) |
| * 1024); |
| |
| if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) |
| ReturnBW = |
| dml_min( |
| ReturnBW, |
| 4.0 * ReturnBandwidthToDCN |
| * (mode_lib->vba.ROBBufferSizeInKByte |
| - mode_lib->vba.PixelChunkSizeInKByte) |
| * 1024 |
| * mode_lib->vba.ReturnBusWidth |
| * mode_lib->vba.DCFCLK |
| * mode_lib->vba.UrgentLatencyPixelDataOnly |
| / dml_pow( |
| (ReturnBandwidthToDCN |
| * mode_lib->vba.UrgentLatencyPixelDataOnly |
| + (mode_lib->vba.ROBBufferSizeInKByte |
| - mode_lib->vba.PixelChunkSizeInKByte) |
| * 1024), |
| 2)); |
| |
| return ReturnBW; |
| } |
| |
| static unsigned int dscceComputeDelay( |
| unsigned int bpc, |
| double bpp, |
| unsigned int sliceWidth, |
| unsigned int numSlices, |
| enum output_format_class pixelFormat) |
| { |
| // valid bpc = source bits per component in the set of {8, 10, 12} |
| // valid bpp = increments of 1/16 of a bit |
| // min = 6/7/8 in N420/N422/444, respectively |
| // max = such that compression is 1:1 |
| //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) |
| //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} |
| //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} |
| |
| // fixed value |
| unsigned int rcModelSize = 8192; |
| |
| // N422/N420 operate at 2 pixels per clock |
| unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, |
| Delay, pixels; |
| |
| if (pixelFormat == dm_n422 || pixelFormat == dm_420) |
| pixelsPerClock = 2; |
| // #all other modes operate at 1 pixel per clock |
| else |
| pixelsPerClock = 1; |
| |
| //initial transmit delay as per PPS |
| initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); |
| |
| //compute ssm delay |
| if (bpc == 8) |
| D = 81; |
| else if (bpc == 10) |
| D = 89; |
| else |
| D = 113; |
| |
| //divide by pixel per cycle to compute slice width as seen by DSC |
| w = sliceWidth / pixelsPerClock; |
| |
| //422 mode has an additional cycle of delay |
| if (pixelFormat == dm_s422) |
| s = 1; |
| else |
| s = 0; |
| |
| //main calculation for the dscce |
| ix = initalXmitDelay + 45; |
| wx = (w + 2) / 3; |
| p = 3 * wx - w; |
| l0 = ix / w; |
| a = ix + p * l0; |
| ax = (a + 2) / 3 + D + 6 + 1; |
| l = (ax + wx - 1) / wx; |
| if ((ix % w) == 0 && p != 0) |
| lstall = 1; |
| else |
| lstall = 0; |
| Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; |
| |
| //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels |
| pixels = Delay * 3 * pixelsPerClock; |
| return pixels; |
| } |
| |
| static unsigned int dscComputeDelay(enum output_format_class pixelFormat) |
| { |
| unsigned int Delay = 0; |
| |
| if (pixelFormat == dm_420) { |
| // sfr |
| Delay = Delay + 2; |
| // dsccif |
| Delay = Delay + 0; |
| // dscc - input deserializer |
| Delay = Delay + 3; |
| // dscc gets pixels every other cycle |
| Delay = Delay + 2; |
| // dscc - input cdc fifo |
| Delay = Delay + 12; |
| // dscc gets pixels every other cycle |
| Delay = Delay + 13; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output cdc fifo |
| Delay = Delay + 7; |
| // dscc gets pixels every other cycle |
| Delay = Delay + 3; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output serializer |
| Delay = Delay + 1; |
| // sft |
| Delay = Delay + 1; |
| } else if (pixelFormat == dm_n422) { |
| // sfr |
| Delay = Delay + 2; |
| // dsccif |
| Delay = Delay + 1; |
| // dscc - input deserializer |
| Delay = Delay + 5; |
| // dscc - input cdc fifo |
| Delay = Delay + 25; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output cdc fifo |
| Delay = Delay + 10; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output serializer |
| Delay = Delay + 1; |
| // sft |
| Delay = Delay + 1; |
| } else { |
| // sfr |
| Delay = Delay + 2; |
| // dsccif |
| Delay = Delay + 0; |
| // dscc - input deserializer |
| Delay = Delay + 3; |
| // dscc - input cdc fifo |
| Delay = Delay + 12; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output cdc fifo |
| Delay = Delay + 7; |
| // dscc - output serializer |
| Delay = Delay + 1; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // sft |
| Delay = Delay + 1; |
| } |
| |
| return Delay; |
| } |
| |
| static bool CalculatePrefetchSchedule( |
| struct display_mode_lib *mode_lib, |
| double DPPCLK, |
| double DISPCLK, |
| double PixelClock, |
| double DCFCLKDeepSleep, |
| unsigned int DSCDelay, |
| unsigned int DPPPerPlane, |
| bool ScalerEnabled, |
| unsigned int NumberOfCursors, |
| double DPPCLKDelaySubtotal, |
| double DPPCLKDelaySCL, |
| double DPPCLKDelaySCLLBOnly, |
| double DPPCLKDelayCNVCFormater, |
| double DPPCLKDelayCNVCCursor, |
| double DISPCLKDelaySubtotal, |
| unsigned int ScalerRecoutWidth, |
| enum output_format_class OutputFormat, |
| unsigned int VBlank, |
| unsigned int HTotal, |
| unsigned int MaxInterDCNTileRepeaters, |
| unsigned int VStartup, |
| unsigned int PageTableLevels, |
| bool GPUVMEnable, |
| bool DynamicMetadataEnable, |
| unsigned int DynamicMetadataLinesBeforeActiveRequired, |
| unsigned int DynamicMetadataTransmittedBytes, |
| bool DCCEnable, |
| double UrgentLatencyPixelDataOnly, |
| double UrgentExtraLatency, |
| double TCalc, |
| unsigned int PDEAndMetaPTEBytesFrame, |
| unsigned int MetaRowByte, |
| unsigned int PixelPTEBytesPerRow, |
| double PrefetchSourceLinesY, |
| unsigned int SwathWidthY, |
| double BytePerPixelDETY, |
| double VInitPreFillY, |
| unsigned int MaxNumSwathY, |
| double PrefetchSourceLinesC, |
| double BytePerPixelDETC, |
| double VInitPreFillC, |
| unsigned int MaxNumSwathC, |
| unsigned int SwathHeightY, |
| unsigned int SwathHeightC, |
| double TWait, |
| bool XFCEnabled, |
| double XFCRemoteSurfaceFlipDelay, |
| bool InterlaceEnable, |
| bool ProgressiveToInterlaceUnitInOPP, |
| double *DSTXAfterScaler, |
| double *DSTYAfterScaler, |
| double *DestinationLinesForPrefetch, |
| double *PrefetchBandwidth, |
| double *DestinationLinesToRequestVMInVBlank, |
| double *DestinationLinesToRequestRowInVBlank, |
| double *VRatioPrefetchY, |
| double *VRatioPrefetchC, |
| double *RequiredPrefetchPixDataBW, |
| unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, |
| double *Tno_bw, |
| unsigned int *VUpdateOffsetPix, |
| double *VUpdateWidthPix, |
| double *VReadyOffsetPix) |
| { |
| bool MyError = false; |
| unsigned int DPPCycles, DISPCLKCycles; |
| double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; |
| double Tdm, LineTime, Tsetup; |
| double dst_y_prefetch_equ; |
| double Tsw_oto; |
| double prefetch_bw_oto; |
| double Tvm_oto; |
| double Tr0_oto; |
| double Tpre_oto; |
| double dst_y_prefetch_oto; |
| double TimeForFetchingMetaPTE = 0; |
| double TimeForFetchingRowInVBlank = 0; |
| double LinesToRequestPrefetchPixelData = 0; |
| |
| if (ScalerEnabled) |
| DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; |
| else |
| DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; |
| |
| DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; |
| |
| DISPCLKCycles = DISPCLKDelaySubtotal; |
| |
| if (DPPCLK == 0.0 || DISPCLK == 0.0) |
| return true; |
| |
| *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK |
| + DSCDelay; |
| |
| if (DPPPerPlane > 1) |
| *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; |
| |
| if (OutputFormat == dm_420 || (InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) |
| *DSTYAfterScaler = 1; |
| else |
| *DSTYAfterScaler = 0; |
| |
| DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; |
| *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); |
| *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); |
| |
| *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); |
| TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); |
| *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) |
| * PixelClock; |
| |
| *VReadyOffsetPix = dml_max( |
| 150.0 / DPPCLK, |
| TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK) |
| * PixelClock; |
| |
| Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; |
| |
| LineTime = (double) HTotal / PixelClock; |
| |
| if (DynamicMetadataEnable) { |
| double Tdmbf, Tdmec, Tdmsks; |
| |
| Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); |
| Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; |
| Tdmec = LineTime; |
| if (DynamicMetadataLinesBeforeActiveRequired == 0) |
| Tdmsks = VBlank * LineTime / 2.0; |
| else |
| Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; |
| if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) |
| Tdmsks = Tdmsks / 2; |
| if (VStartup * LineTime |
| < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { |
| MyError = true; |
| *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait |
| + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; |
| } else |
| *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; |
| } else |
| Tdm = 0; |
| |
| if (GPUVMEnable) { |
| if (PageTableLevels == 4) |
| *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly; |
| else if (PageTableLevels == 3) |
| *Tno_bw = UrgentExtraLatency; |
| else |
| *Tno_bw = 0; |
| } else if (DCCEnable) |
| *Tno_bw = LineTime; |
| else |
| *Tno_bw = LineTime / 4; |
| |
| dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime |
| - (Tsetup + Tdm) / LineTime |
| - (*DSTYAfterScaler + *DSTXAfterScaler / HTotal); |
| |
| Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; |
| |
| prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow |
| + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) |
| + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) |
| / Tsw_oto; |
| |
| if (GPUVMEnable == true) { |
| Tvm_oto = |
| dml_max( |
| *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, |
| dml_max( |
| UrgentExtraLatency |
| + UrgentLatencyPixelDataOnly |
| * (PageTableLevels |
| - 1), |
| LineTime / 4.0)); |
| } else |
| Tvm_oto = LineTime / 4.0; |
| |
| if ((GPUVMEnable == true || DCCEnable == true)) { |
| Tr0_oto = dml_max( |
| (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, |
| dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4))); |
| } else |
| Tr0_oto = LineTime - Tvm_oto; |
| |
| Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; |
| |
| dst_y_prefetch_oto = Tpre_oto / LineTime; |
| |
| if (dst_y_prefetch_oto < dst_y_prefetch_equ) |
| *DestinationLinesForPrefetch = dst_y_prefetch_oto; |
| else |
| *DestinationLinesForPrefetch = dst_y_prefetch_equ; |
| |
| *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) |
| / 4; |
| |
| dml_print("DML: VStartup: %d\n", VStartup); |
| dml_print("DML: TCalc: %f\n", TCalc); |
| dml_print("DML: TWait: %f\n", TWait); |
| dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); |
| dml_print("DML: LineTime: %f\n", LineTime); |
| dml_print("DML: Tsetup: %f\n", Tsetup); |
| dml_print("DML: Tdm: %f\n", Tdm); |
| dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler); |
| dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler); |
| dml_print("DML: HTotal: %d\n", HTotal); |
| |
| *PrefetchBandwidth = 0; |
| *DestinationLinesToRequestVMInVBlank = 0; |
| *DestinationLinesToRequestRowInVBlank = 0; |
| *VRatioPrefetchY = 0; |
| *VRatioPrefetchC = 0; |
| *RequiredPrefetchPixDataBW = 0; |
| if (*DestinationLinesForPrefetch > 1) { |
| *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte |
| + 2 * PixelPTEBytesPerRow |
| + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) |
| + PrefetchSourceLinesC * SwathWidthY / 2 |
| * dml_ceil(BytePerPixelDETC, 2)) |
| / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); |
| if (GPUVMEnable) { |
| TimeForFetchingMetaPTE = |
| dml_max( |
| *Tno_bw |
| + (double) PDEAndMetaPTEBytesFrame |
| / *PrefetchBandwidth, |
| dml_max( |
| UrgentExtraLatency |
| + UrgentLatencyPixelDataOnly |
| * (PageTableLevels |
| - 1), |
| LineTime / 4)); |
| } else { |
| if (NumberOfCursors > 0 || XFCEnabled) |
| TimeForFetchingMetaPTE = LineTime / 4; |
| else |
| TimeForFetchingMetaPTE = 0.0; |
| } |
| |
| if ((GPUVMEnable == true || DCCEnable == true)) { |
| TimeForFetchingRowInVBlank = |
| dml_max( |
| (MetaRowByte + PixelPTEBytesPerRow) |
| / *PrefetchBandwidth, |
| dml_max( |
| UrgentLatencyPixelDataOnly, |
| dml_max( |
| LineTime |
| - TimeForFetchingMetaPTE, |
| LineTime |
| / 4.0))); |
| } else { |
| if (NumberOfCursors > 0 || XFCEnabled) |
| TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; |
| else |
| TimeForFetchingRowInVBlank = 0.0; |
| } |
| |
| *DestinationLinesToRequestVMInVBlank = dml_floor( |
| 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), |
| 1) / 4.0; |
| |
| *DestinationLinesToRequestRowInVBlank = dml_floor( |
| 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), |
| 1) / 4.0; |
| |
| LinesToRequestPrefetchPixelData = |
| *DestinationLinesForPrefetch |
| - ((NumberOfCursors > 0 || GPUVMEnable |
| || DCCEnable) ? |
| (*DestinationLinesToRequestVMInVBlank |
| + *DestinationLinesToRequestRowInVBlank) : |
| 0.0); |
| |
| if (LinesToRequestPrefetchPixelData > 0) { |
| |
| *VRatioPrefetchY = (double) PrefetchSourceLinesY |
| / LinesToRequestPrefetchPixelData; |
| *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); |
| if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { |
| if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { |
| *VRatioPrefetchY = |
| dml_max( |
| (double) PrefetchSourceLinesY |
| / LinesToRequestPrefetchPixelData, |
| (double) MaxNumSwathY |
| * SwathHeightY |
| / (LinesToRequestPrefetchPixelData |
| - (VInitPreFillY |
| - 3.0) |
| / 2.0)); |
| *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); |
| } else { |
| MyError = true; |
| *VRatioPrefetchY = 0; |
| } |
| } |
| |
| *VRatioPrefetchC = (double) PrefetchSourceLinesC |
| / LinesToRequestPrefetchPixelData; |
| *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); |
| |
| if ((SwathHeightC > 4)) { |
| if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { |
| *VRatioPrefetchC = |
| dml_max( |
| *VRatioPrefetchC, |
| (double) MaxNumSwathC |
| * SwathHeightC |
| / (LinesToRequestPrefetchPixelData |
| - (VInitPreFillC |
| - 3.0) |
| / 2.0)); |
| *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); |
| } else { |
| MyError = true; |
| *VRatioPrefetchC = 0; |
| } |
| } |
| |
| *RequiredPrefetchPixDataBW = |
| DPPPerPlane |
| * ((double) PrefetchSourceLinesY |
| / LinesToRequestPrefetchPixelData |
| * dml_ceil( |
| BytePerPixelDETY, |
| 1) |
| + (double) PrefetchSourceLinesC |
| / LinesToRequestPrefetchPixelData |
| * dml_ceil( |
| BytePerPixelDETC, |
| 2) |
| / 2) |
| * SwathWidthY / LineTime; |
| } else { |
| MyError = true; |
| *VRatioPrefetchY = 0; |
| *VRatioPrefetchC = 0; |
| *RequiredPrefetchPixDataBW = 0; |
| } |
| |
| } else { |
| MyError = true; |
| } |
| |
| if (MyError) { |
| *PrefetchBandwidth = 0; |
| TimeForFetchingMetaPTE = 0; |
| TimeForFetchingRowInVBlank = 0; |
| *DestinationLinesToRequestVMInVBlank = 0; |
| *DestinationLinesToRequestRowInVBlank = 0; |
| *DestinationLinesForPrefetch = 0; |
| LinesToRequestPrefetchPixelData = 0; |
| *VRatioPrefetchY = 0; |
| *VRatioPrefetchC = 0; |
| *RequiredPrefetchPixDataBW = 0; |
| } |
| |
| return MyError; |
| } |
| |
| static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) |
| { |
| return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); |
| } |
| |
| static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) |
| { |
| return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); |
| } |
| |
| static double CalculatePrefetchSourceLines( |
| struct display_mode_lib *mode_lib, |
| double VRatio, |
| double vtaps, |
| bool Interlace, |
| bool ProgressiveToInterlaceUnitInOPP, |
| unsigned int SwathHeight, |
| unsigned int ViewportYStart, |
| double *VInitPreFill, |
| unsigned int *MaxNumSwath) |
| { |
| unsigned int MaxPartialSwath; |
| |
| if (ProgressiveToInterlaceUnitInOPP) |
| *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); |
| else |
| *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); |
| |
| if (!mode_lib->vba.IgnoreViewportPositioning) { |
| |
| *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; |
| |
| if (*VInitPreFill > 1.0) |
| MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; |
| else |
| MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) |
| % SwathHeight; |
| MaxPartialSwath = dml_max(1U, MaxPartialSwath); |
| |
| } else { |
| |
| if (ViewportYStart != 0) |
| dml_print( |
| "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); |
| |
| *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); |
| |
| if (*VInitPreFill > 1.0) |
| MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; |
| else |
| MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) |
| % SwathHeight; |
| } |
| |
| return *MaxNumSwath * SwathHeight + MaxPartialSwath; |
| } |
| |
| static unsigned int CalculateVMAndRowBytes( |
| struct display_mode_lib *mode_lib, |
| bool DCCEnable, |
| unsigned int BlockHeight256Bytes, |
| unsigned int BlockWidth256Bytes, |
| enum source_format_class SourcePixelFormat, |
| unsigned int SurfaceTiling, |
| unsigned int BytePerPixel, |
| enum scan_direction_class ScanDirection, |
| unsigned int ViewportWidth, |
| unsigned int ViewportHeight, |
| unsigned int SwathWidth, |
| bool GPUVMEnable, |
| unsigned int VMMPageSize, |
| unsigned int PTEBufferSizeInRequestsLuma, |
| unsigned int PDEProcessingBufIn64KBReqs, |
| unsigned int Pitch, |
| unsigned int DCCMetaPitch, |
| unsigned int *MacroTileWidth, |
| unsigned int *MetaRowByte, |
| unsigned int *PixelPTEBytesPerRow, |
| bool *PTEBufferSizeNotExceeded, |
| unsigned int *dpte_row_height, |
| unsigned int *meta_row_height) |
| { |
| unsigned int MetaRequestHeight; |
| unsigned int MetaRequestWidth; |
| unsigned int MetaSurfWidth; |
| unsigned int MetaSurfHeight; |
| unsigned int MPDEBytesFrame; |
| unsigned int MetaPTEBytesFrame; |
| unsigned int DCCMetaSurfaceBytes; |
| |
| unsigned int MacroTileSizeBytes; |
| unsigned int MacroTileHeight; |
| unsigned int DPDE0BytesFrame; |
| unsigned int ExtraDPDEBytesFrame; |
| unsigned int PDEAndMetaPTEBytesFrame; |
| |
| if (DCCEnable == true) { |
| MetaRequestHeight = 8 * BlockHeight256Bytes; |
| MetaRequestWidth = 8 * BlockWidth256Bytes; |
| if (ScanDirection == dm_horz) { |
| *meta_row_height = MetaRequestHeight; |
| MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) |
| + MetaRequestWidth; |
| *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; |
| } else { |
| *meta_row_height = MetaRequestWidth; |
| MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) |
| + MetaRequestHeight; |
| *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; |
| } |
| if (ScanDirection == dm_horz) { |
| DCCMetaSurfaceBytes = DCCMetaPitch |
| * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) |
| + 64 * BlockHeight256Bytes) * BytePerPixel |
| / 256; |
| } else { |
| DCCMetaSurfaceBytes = DCCMetaPitch |
| * (dml_ceil( |
| (double) ViewportHeight - 1, |
| 64 * BlockHeight256Bytes) |
| + 64 * BlockHeight256Bytes) * BytePerPixel |
| / 256; |
| } |
| if (GPUVMEnable == true) { |
| MetaPTEBytesFrame = (dml_ceil( |
| (double) (DCCMetaSurfaceBytes - VMMPageSize) |
| / (8 * VMMPageSize), |
| 1) + 1) * 64; |
| MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1); |
| } else { |
| MetaPTEBytesFrame = 0; |
| MPDEBytesFrame = 0; |
| } |
| } else { |
| MetaPTEBytesFrame = 0; |
| MPDEBytesFrame = 0; |
| *MetaRowByte = 0; |
| } |
| |
| if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { |
| MacroTileSizeBytes = 256; |
| MacroTileHeight = BlockHeight256Bytes; |
| } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x |
| || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { |
| MacroTileSizeBytes = 4096; |
| MacroTileHeight = 4 * BlockHeight256Bytes; |
| } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t |
| || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d |
| || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x |
| || SurfaceTiling == dm_sw_64kb_r_x) { |
| MacroTileSizeBytes = 65536; |
| MacroTileHeight = 16 * BlockHeight256Bytes; |
| } else { |
| MacroTileSizeBytes = 262144; |
| MacroTileHeight = 32 * BlockHeight256Bytes; |
| } |
| *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; |
| |
| if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) { |
| if (ScanDirection == dm_horz) { |
| DPDE0BytesFrame = |
| 64 |
| * (dml_ceil( |
| ((Pitch |
| * (dml_ceil( |
| ViewportHeight |
| - 1, |
| MacroTileHeight) |
| + MacroTileHeight) |
| * BytePerPixel) |
| - MacroTileSizeBytes) |
| / (8 |
| * 2097152), |
| 1) + 1); |
| } else { |
| DPDE0BytesFrame = |
| 64 |
| * (dml_ceil( |
| ((Pitch |
| * (dml_ceil( |
| (double) SwathWidth |
| - 1, |
| MacroTileHeight) |
| + MacroTileHeight) |
| * BytePerPixel) |
| - MacroTileSizeBytes) |
| / (8 |
| * 2097152), |
| 1) + 1); |
| } |
| ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2); |
| } else { |
| DPDE0BytesFrame = 0; |
| ExtraDPDEBytesFrame = 0; |
| } |
| |
| PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame |
| + ExtraDPDEBytesFrame; |
| |
| if (GPUVMEnable == true) { |
| unsigned int PTERequestSize; |
| unsigned int PixelPTEReqHeight; |
| unsigned int PixelPTEReqWidth; |
| double FractionOfPTEReturnDrop; |
| unsigned int EffectivePDEProcessingBufIn64KBReqs; |
| |
| if (SurfaceTiling == dm_sw_linear) { |
| PixelPTEReqHeight = 1; |
| PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; |
| PTERequestSize = 64; |
| FractionOfPTEReturnDrop = 0; |
| } else if (MacroTileSizeBytes == 4096) { |
| PixelPTEReqHeight = MacroTileHeight; |
| PixelPTEReqWidth = 8 * *MacroTileWidth; |
| PTERequestSize = 64; |
| if (ScanDirection == dm_horz) |
| FractionOfPTEReturnDrop = 0; |
| else |
| FractionOfPTEReturnDrop = 7 / 8; |
| } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { |
| PixelPTEReqHeight = 16 * BlockHeight256Bytes; |
| PixelPTEReqWidth = 16 * BlockWidth256Bytes; |
| PTERequestSize = 128; |
| FractionOfPTEReturnDrop = 0; |
| } else { |
| PixelPTEReqHeight = MacroTileHeight; |
| PixelPTEReqWidth = 8 * *MacroTileWidth; |
| PTERequestSize = 64; |
| FractionOfPTEReturnDrop = 0; |
| } |
| |
| if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) |
| EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; |
| else |
| EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; |
| |
| if (SurfaceTiling == dm_sw_linear) { |
| *dpte_row_height = |
| dml_min( |
| 128, |
| 1 |
| << (unsigned int) dml_floor( |
| dml_log2( |
| dml_min( |
| (double) PTEBufferSizeInRequestsLuma |
| * PixelPTEReqWidth, |
| EffectivePDEProcessingBufIn64KBReqs |
| * 65536.0 |
| / BytePerPixel) |
| / Pitch), |
| 1)); |
| *PixelPTEBytesPerRow = PTERequestSize |
| * (dml_ceil( |
| (double) (Pitch * *dpte_row_height - 1) |
| / PixelPTEReqWidth, |
| 1) + 1); |
| } else if (ScanDirection == dm_horz) { |
| *dpte_row_height = PixelPTEReqHeight; |
| *PixelPTEBytesPerRow = PTERequestSize |
| * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) |
| + 1); |
| } else { |
| *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); |
| *PixelPTEBytesPerRow = PTERequestSize |
| * (dml_ceil( |
| ((double) SwathWidth - 1) |
| / PixelPTEReqHeight, |
| 1) + 1); |
| } |
| if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) |
| <= 64 * PTEBufferSizeInRequestsLuma) { |
| *PTEBufferSizeNotExceeded = true; |
| } else { |
| *PTEBufferSizeNotExceeded = false; |
| } |
| } else { |
| *PixelPTEBytesPerRow = 0; |
| *PTEBufferSizeNotExceeded = true; |
| } |
| |
| return PDEAndMetaPTEBytesFrame; |
| } |
| |
| static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( |
| struct display_mode_lib *mode_lib) |
| { |
| unsigned int j, k; |
| |
| mode_lib->vba.WritebackDISPCLK = 0.0; |
| mode_lib->vba.DISPCLKWithRamping = 0; |
| mode_lib->vba.DISPCLKWithoutRamping = 0; |
| mode_lib->vba.GlobalDPPCLK = 0.0; |
| |
| // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation |
| // |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.WritebackEnable[k]) { |
| mode_lib->vba.WritebackDISPCLK = |
| dml_max( |
| mode_lib->vba.WritebackDISPCLK, |
| CalculateWriteBackDISPCLK( |
| mode_lib->vba.WritebackPixelFormat[k], |
| mode_lib->vba.PixelClock[k], |
| mode_lib->vba.WritebackHRatio[k], |
| mode_lib->vba.WritebackVRatio[k], |
| mode_lib->vba.WritebackLumaHTaps[k], |
| mode_lib->vba.WritebackLumaVTaps[k], |
| mode_lib->vba.WritebackChromaHTaps[k], |
| mode_lib->vba.WritebackChromaVTaps[k], |
| mode_lib->vba.WritebackDestinationWidth[k], |
| mode_lib->vba.HTotal[k], |
| mode_lib->vba.WritebackChromaLineBufferWidth)); |
| } |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.HRatio[k] > 1) { |
| mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( |
| mode_lib->vba.MaxDCHUBToPSCLThroughput, |
| mode_lib->vba.MaxPSCLToLBThroughput |
| * mode_lib->vba.HRatio[k] |
| / dml_ceil( |
| mode_lib->vba.htaps[k] |
| / 6.0, |
| 1)); |
| } else { |
| mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( |
| mode_lib->vba.MaxDCHUBToPSCLThroughput, |
| mode_lib->vba.MaxPSCLToLBThroughput); |
| } |
| |
| mode_lib->vba.DPPCLKUsingSingleDPPLuma = |
| mode_lib->vba.PixelClock[k] |
| * dml_max( |
| mode_lib->vba.vtaps[k] / 6.0 |
| * dml_min( |
| 1.0, |
| mode_lib->vba.HRatio[k]), |
| dml_max( |
| mode_lib->vba.HRatio[k] |
| * mode_lib->vba.VRatio[k] |
| / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], |
| 1.0)); |
| |
| if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) |
| && mode_lib->vba.DPPCLKUsingSingleDPPLuma |
| < 2 * mode_lib->vba.PixelClock[k]) { |
| mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; |
| } |
| |
| if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 |
| && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { |
| mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; |
| mode_lib->vba.DPPCLKUsingSingleDPP[k] = |
| mode_lib->vba.DPPCLKUsingSingleDPPLuma; |
| } else { |
| if (mode_lib->vba.HRatio[k] > 1) { |
| mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = |
| dml_min( |
| mode_lib->vba.MaxDCHUBToPSCLThroughput, |
| mode_lib->vba.MaxPSCLToLBThroughput |
| * mode_lib->vba.HRatio[k] |
| / 2 |
| / dml_ceil( |
| mode_lib->vba.HTAPsChroma[k] |
| / 6.0, |
| 1.0)); |
| } else { |
| mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( |
| mode_lib->vba.MaxDCHUBToPSCLThroughput, |
| mode_lib->vba.MaxPSCLToLBThroughput); |
| } |
| mode_lib->vba.DPPCLKUsingSingleDPPChroma = |
| mode_lib->vba.PixelClock[k] |
| * dml_max( |
| mode_lib->vba.VTAPsChroma[k] |
| / 6.0 |
| * dml_min( |
| 1.0, |
| mode_lib->vba.HRatio[k] |
| / 2), |
| dml_max( |
| mode_lib->vba.HRatio[k] |
| * mode_lib->vba.VRatio[k] |
| / 4 |
| / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], |
| 1.0)); |
| |
| if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) |
| && mode_lib->vba.DPPCLKUsingSingleDPPChroma |
| < 2 * mode_lib->vba.PixelClock[k]) { |
| mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 |
| * mode_lib->vba.PixelClock[k]; |
| } |
| |
| mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( |
| mode_lib->vba.DPPCLKUsingSingleDPPLuma, |
| mode_lib->vba.DPPCLKUsingSingleDPPChroma); |
| } |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.BlendingAndTiming[k] != k) |
| continue; |
| if (mode_lib->vba.ODMCombineEnabled[k]) { |
| mode_lib->vba.DISPCLKWithRamping = |
| dml_max( |
| mode_lib->vba.DISPCLKWithRamping, |
| mode_lib->vba.PixelClock[k] / 2 |
| * (1 |
| + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading |
| / 100) |
| * (1 |
| + mode_lib->vba.DISPCLKRampingMargin |
| / 100)); |
| mode_lib->vba.DISPCLKWithoutRamping = |
| dml_max( |
| mode_lib->vba.DISPCLKWithoutRamping, |
| mode_lib->vba.PixelClock[k] / 2 |
| * (1 |
| + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading |
| / 100)); |
| } else if (!mode_lib->vba.ODMCombineEnabled[k]) { |
| mode_lib->vba.DISPCLKWithRamping = |
| dml_max( |
| mode_lib->vba.DISPCLKWithRamping, |
| mode_lib->vba.PixelClock[k] |
| * (1 |
| + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading |
| / 100) |
| * (1 |
| + mode_lib->vba.DISPCLKRampingMargin |
| / 100)); |
| mode_lib->vba.DISPCLKWithoutRamping = |
| dml_max( |
| mode_lib->vba.DISPCLKWithoutRamping, |
| mode_lib->vba.PixelClock[k] |
| * (1 |
| + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading |
| / 100)); |
| } |
| } |
| |
| mode_lib->vba.DISPCLKWithRamping = dml_max( |
| mode_lib->vba.DISPCLKWithRamping, |
| mode_lib->vba.WritebackDISPCLK); |
| mode_lib->vba.DISPCLKWithoutRamping = dml_max( |
| mode_lib->vba.DISPCLKWithoutRamping, |
| mode_lib->vba.WritebackDISPCLK); |
| |
| ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); |
| mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( |
| mode_lib->vba.DISPCLKWithRamping, |
| mode_lib->vba.DISPCLKDPPCLKVCOSpeed); |
| mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( |
| mode_lib->vba.DISPCLKWithoutRamping, |
| mode_lib->vba.DISPCLKDPPCLKVCOSpeed); |
| mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( |
| mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz, |
| mode_lib->vba.DISPCLKDPPCLKVCOSpeed); |
| if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity |
| > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { |
| mode_lib->vba.DISPCLK_calculated = |
| mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; |
| } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity |
| > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { |
| mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; |
| } else { |
| mode_lib->vba.DISPCLK_calculated = |
| mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; |
| } |
| DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.DPPPerPlane[k] == 0) { |
| mode_lib->vba.DPPCLK_calculated[k] = 0; |
| } else { |
| mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] |
| / mode_lib->vba.DPPPerPlane[k] |
| * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); |
| } |
| mode_lib->vba.GlobalDPPCLK = dml_max( |
| mode_lib->vba.GlobalDPPCLK, |
| mode_lib->vba.DPPCLK_calculated[k]); |
| } |
| mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( |
| mode_lib->vba.GlobalDPPCLK, |
| mode_lib->vba.DISPCLKDPPCLKVCOSpeed); |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 |
| * dml_ceil( |
| mode_lib->vba.DPPCLK_calculated[k] * 255 |
| / mode_lib->vba.GlobalDPPCLK, |
| 1); |
| DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); |
| } |
| |
| // Urgent Watermark |
| mode_lib->vba.DCCEnabledAnyPlane = false; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) |
| if (mode_lib->vba.DCCEnable[k]) |
| mode_lib->vba.DCCEnabledAnyPlane = true; |
| |
| mode_lib->vba.ReturnBandwidthToDCN = dml_min( |
| mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, |
| mode_lib->vba.FabricAndDRAMBandwidth * 1000) |
| * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; |
| |
| mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; |
| mode_lib->vba.ReturnBW = adjust_ReturnBW( |
| mode_lib, |
| mode_lib->vba.ReturnBW, |
| mode_lib->vba.DCCEnabledAnyPlane, |
| mode_lib->vba.ReturnBandwidthToDCN); |
| |
| // Let's do this calculation again?? |
| mode_lib->vba.ReturnBandwidthToDCN = dml_min( |
| mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, |
| mode_lib->vba.FabricAndDRAMBandwidth * 1000); |
| mode_lib->vba.ReturnBW = adjust_ReturnBW( |
| mode_lib, |
| mode_lib->vba.ReturnBW, |
| mode_lib->vba.DCCEnabledAnyPlane, |
| mode_lib->vba.ReturnBandwidthToDCN); |
| |
| DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); |
| DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); |
| DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| bool MainPlaneDoesODMCombine = false; |
| |
| if (mode_lib->vba.SourceScan[k] == dm_horz) |
| mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; |
| else |
| mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; |
| |
| if (mode_lib->vba.ODMCombineEnabled[k] == true) |
| MainPlaneDoesODMCombine = true; |
| for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) |
| if (mode_lib->vba.BlendingAndTiming[k] == j |
| && mode_lib->vba.ODMCombineEnabled[j] == true) |
| MainPlaneDoesODMCombine = true; |
| |
| if (MainPlaneDoesODMCombine == true) |
| mode_lib->vba.SwathWidthY[k] = dml_min( |
| (double) mode_lib->vba.SwathWidthSingleDPPY[k], |
| dml_round( |
| mode_lib->vba.HActive[k] / 2.0 |
| * mode_lib->vba.HRatio[k])); |
| else { |
| if (mode_lib->vba.DPPPerPlane[k] == 0) { |
| mode_lib->vba.SwathWidthY[k] = 0; |
| } else { |
| mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] |
| / mode_lib->vba.DPPPerPlane[k]; |
| } |
| } |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { |
| mode_lib->vba.BytePerPixelDETY[k] = 8; |
| mode_lib->vba.BytePerPixelDETC[k] = 0; |
| } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { |
| mode_lib->vba.BytePerPixelDETY[k] = 4; |
| mode_lib->vba.BytePerPixelDETC[k] = 0; |
| } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { |
| mode_lib->vba.BytePerPixelDETY[k] = 2; |
| mode_lib->vba.BytePerPixelDETC[k] = 0; |
| } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { |
| mode_lib->vba.BytePerPixelDETY[k] = 1; |
| mode_lib->vba.BytePerPixelDETC[k] = 0; |
| } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { |
| mode_lib->vba.BytePerPixelDETY[k] = 1; |
| mode_lib->vba.BytePerPixelDETC[k] = 2; |
| } else { // dm_420_10 |
| mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; |
| mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; |
| } |
| } |
| |
| mode_lib->vba.TotalDataReadBandwidth = 0.0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] |
| * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) |
| / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) |
| * mode_lib->vba.VRatio[k]; |
| mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] |
| / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) |
| / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) |
| * mode_lib->vba.VRatio[k] / 2; |
| DTRACE( |
| " read_bw[%i] = %fBps", |
| k, |
| mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k]); |
| mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k]; |
| } |
| |
| mode_lib->vba.TotalDCCActiveDPP = 0; |
| mode_lib->vba.TotalActiveDPP = 0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP |
| + mode_lib->vba.DPPPerPlane[k]; |
| if (mode_lib->vba.DCCEnable[k]) |
| mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP |
| + mode_lib->vba.DPPPerPlane[k]; |
| } |
| |
| mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = |
| (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK |
| + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly |
| * mode_lib->vba.NumberOfChannels |
| / mode_lib->vba.ReturnBW; |
| |
| mode_lib->vba.LastPixelOfLineExtraWatermark = 0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| double DataFabricLineDeliveryTimeLuma, DataFabricLineDeliveryTimeChroma; |
| |
| if (mode_lib->vba.VRatio[k] <= 1.0) |
| mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = |
| (double) mode_lib->vba.SwathWidthY[k] |
| * mode_lib->vba.DPPPerPlane[k] |
| / mode_lib->vba.HRatio[k] |
| / mode_lib->vba.PixelClock[k]; |
| else |
| mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k] = |
| (double) mode_lib->vba.SwathWidthY[k] |
| / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] |
| / mode_lib->vba.DPPCLK[k]; |
| |
| DataFabricLineDeliveryTimeLuma = mode_lib->vba.SwathWidthSingleDPPY[k] |
| * mode_lib->vba.SwathHeightY[k] |
| * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) |
| / (mode_lib->vba.ReturnBW * mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| / mode_lib->vba.TotalDataReadBandwidth); |
| mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max( |
| mode_lib->vba.LastPixelOfLineExtraWatermark, |
| DataFabricLineDeliveryTimeLuma |
| - mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]); |
| |
| if (mode_lib->vba.BytePerPixelDETC[k] == 0) |
| mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = 0.0; |
| else if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) |
| mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = |
| mode_lib->vba.SwathWidthY[k] / 2.0 |
| * mode_lib->vba.DPPPerPlane[k] |
| / (mode_lib->vba.HRatio[k] / 2.0) |
| / mode_lib->vba.PixelClock[k]; |
| else |
| mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k] = |
| mode_lib->vba.SwathWidthY[k] / 2.0 |
| / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] |
| / mode_lib->vba.DPPCLK[k]; |
| |
| DataFabricLineDeliveryTimeChroma = mode_lib->vba.SwathWidthSingleDPPY[k] / 2.0 |
| * mode_lib->vba.SwathHeightC[k] |
| * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) |
| / (mode_lib->vba.ReturnBW |
| * mode_lib->vba.ReadBandwidthPlaneChroma[k] |
| / mode_lib->vba.TotalDataReadBandwidth); |
| mode_lib->vba.LastPixelOfLineExtraWatermark = |
| dml_max( |
| mode_lib->vba.LastPixelOfLineExtraWatermark, |
| DataFabricLineDeliveryTimeChroma |
| - mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); |
| } |
| |
| mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency |
| + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte |
| + mode_lib->vba.TotalDCCActiveDPP |
| * mode_lib->vba.MetaChunkSize) * 1024.0 |
| / mode_lib->vba.ReturnBW; |
| |
| if (mode_lib->vba.GPUVMEnable) |
| mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP |
| * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW; |
| |
| mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly |
| + mode_lib->vba.LastPixelOfLineExtraWatermark |
| + mode_lib->vba.UrgentExtraLatency; |
| |
| DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); |
| DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); |
| |
| mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; |
| |
| mode_lib->vba.TotalActiveWriteback = 0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.WritebackEnable[k]) |
| mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k]; |
| } |
| |
| if (mode_lib->vba.TotalActiveWriteback <= 1) |
| mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; |
| else |
| mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency |
| + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 |
| / mode_lib->vba.SOCCLK; |
| |
| DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); |
| |
| // NB P-State/DRAM Clock Change Watermark |
| mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency |
| + mode_lib->vba.UrgentWatermark; |
| |
| DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); |
| |
| DTRACE(" calculating wb pstate watermark"); |
| DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); |
| DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); |
| |
| if (mode_lib->vba.TotalActiveWriteback <= 1) |
| mode_lib->vba.WritebackDRAMClockChangeWatermark = |
| mode_lib->vba.DRAMClockChangeLatency |
| + mode_lib->vba.WritebackLatency; |
| else |
| mode_lib->vba.WritebackDRAMClockChangeWatermark = |
| mode_lib->vba.DRAMClockChangeLatency |
| + mode_lib->vba.WritebackLatency |
| + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 |
| / mode_lib->vba.SOCCLK; |
| |
| DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); |
| |
| // Stutter Efficiency |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] |
| / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; |
| mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( |
| mode_lib->vba.LinesInDETY[k], |
| mode_lib->vba.SwathHeightY[k]); |
| mode_lib->vba.FullDETBufferingTimeY[k] = |
| mode_lib->vba.LinesInDETYRoundedDownToSwath[k] |
| * (mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]) |
| / mode_lib->vba.VRatio[k]; |
| if (mode_lib->vba.BytePerPixelDETC[k] > 0) { |
| mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] |
| / mode_lib->vba.BytePerPixelDETC[k] |
| / (mode_lib->vba.SwathWidthY[k] / 2); |
| mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( |
| mode_lib->vba.LinesInDETC[k], |
| mode_lib->vba.SwathHeightC[k]); |
| mode_lib->vba.FullDETBufferingTimeC[k] = |
| mode_lib->vba.LinesInDETCRoundedDownToSwath[k] |
| * (mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]) |
| / (mode_lib->vba.VRatio[k] / 2); |
| } else { |
| mode_lib->vba.LinesInDETC[k] = 0; |
| mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; |
| mode_lib->vba.FullDETBufferingTimeC[k] = 999999; |
| } |
| } |
| |
| mode_lib->vba.MinFullDETBufferingTime = 999999.0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.FullDETBufferingTimeY[k] |
| < mode_lib->vba.MinFullDETBufferingTime) { |
| mode_lib->vba.MinFullDETBufferingTime = |
| mode_lib->vba.FullDETBufferingTimeY[k]; |
| mode_lib->vba.FrameTimeForMinFullDETBufferingTime = |
| (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]; |
| } |
| if (mode_lib->vba.FullDETBufferingTimeC[k] |
| < mode_lib->vba.MinFullDETBufferingTime) { |
| mode_lib->vba.MinFullDETBufferingTime = |
| mode_lib->vba.FullDETBufferingTimeC[k]; |
| mode_lib->vba.FrameTimeForMinFullDETBufferingTime = |
| (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]; |
| } |
| } |
| |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.DCCEnable[k]) { |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond = |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond |
| + mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| / mode_lib->vba.DCCRate[k] |
| / 1000 |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k] |
| / mode_lib->vba.DCCRate[k] |
| / 1000; |
| } else { |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond = |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond |
| + mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| / 1000 |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k] |
| / 1000; |
| } |
| if (mode_lib->vba.DCCEnable[k]) { |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond = |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond |
| + mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| / 1000 / 256 |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k] |
| / 1000 / 256; |
| } |
| if (mode_lib->vba.GPUVMEnable) { |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond = |
| mode_lib->vba.AverageReadBandwidthGBytePerSecond |
| + mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| / 1000 / 512 |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k] |
| / 1000 / 512; |
| } |
| } |
| |
| mode_lib->vba.PartOfBurstThatFitsInROB = |
| dml_min( |
| mode_lib->vba.MinFullDETBufferingTime |
| * mode_lib->vba.TotalDataReadBandwidth, |
| mode_lib->vba.ROBBufferSizeInKByte * 1024 |
| * mode_lib->vba.TotalDataReadBandwidth |
| / (mode_lib->vba.AverageReadBandwidthGBytePerSecond |
| * 1000)); |
| mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB |
| * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) |
| / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW |
| + (mode_lib->vba.MinFullDETBufferingTime |
| * mode_lib->vba.TotalDataReadBandwidth |
| - mode_lib->vba.PartOfBurstThatFitsInROB) |
| / (mode_lib->vba.DCFCLK * 64); |
| if (mode_lib->vba.TotalActiveWriteback == 0) { |
| mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 |
| - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) |
| / mode_lib->vba.MinFullDETBufferingTime) * 100; |
| } else { |
| mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; |
| } |
| |
| mode_lib->vba.SmallestVBlank = 999999; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { |
| mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] |
| - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]; |
| } else { |
| mode_lib->vba.VBlankTime = 0; |
| } |
| mode_lib->vba.SmallestVBlank = dml_min( |
| mode_lib->vba.SmallestVBlank, |
| mode_lib->vba.VBlankTime); |
| } |
| |
| mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 |
| * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime |
| - mode_lib->vba.SmallestVBlank) |
| + mode_lib->vba.SmallestVBlank) |
| / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; |
| |
| // dml_ml->vba.DCFCLK Deep Sleep |
| mode_lib->vba.DCFCLKDeepSleep = 8.0; |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { |
| if (mode_lib->vba.BytePerPixelDETC[k] > 0) { |
| mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = |
| dml_max( |
| 1.1 * mode_lib->vba.SwathWidthY[k] |
| * dml_ceil( |
| mode_lib->vba.BytePerPixelDETY[k], |
| 1) / 32 |
| / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], |
| 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 |
| * dml_ceil( |
| mode_lib->vba.BytePerPixelDETC[k], |
| 2) / 32 |
| / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); |
| } else |
| mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k] |
| * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 |
| / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; |
| mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( |
| mode_lib->vba.DCFCLKDeepSleepPerPlane[k], |
| mode_lib->vba.PixelClock[k] / 16.0); |
| mode_lib->vba.DCFCLKDeepSleep = dml_max( |
| mode_lib->vba.DCFCLKDeepSleep, |
| mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); |
| |
| DTRACE( |
| " dcfclk_deepsleep_per_plane[%i] = %fMHz", |
| k, |
| mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); |
| } |
| |
| DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep); |
| |
| // Stutter Watermark |
| mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime |
| + mode_lib->vba.LastPixelOfLineExtraWatermark |
| + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep; |
| mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime |
| + mode_lib->vba.LastPixelOfLineExtraWatermark |
| + mode_lib->vba.UrgentExtraLatency; |
| |
| DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); |
| DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); |
| |
| // Urgent Latency Supported |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.EffectiveDETPlusLBLinesLuma = |
| dml_floor( |
| mode_lib->vba.LinesInDETY[k] |
| + dml_min( |
| mode_lib->vba.LinesInDETY[k] |
| * mode_lib->vba.DPPCLK[k] |
| * mode_lib->vba.BytePerPixelDETY[k] |
| * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] |
| / (mode_lib->vba.ReturnBW |
| / mode_lib->vba.DPPPerPlane[k]), |
| (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), |
| mode_lib->vba.SwathHeightY[k]); |
| |
| mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma |
| * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) |
| / mode_lib->vba.VRatio[k] |
| - mode_lib->vba.EffectiveDETPlusLBLinesLuma |
| * mode_lib->vba.SwathWidthY[k] |
| * mode_lib->vba.BytePerPixelDETY[k] |
| / (mode_lib->vba.ReturnBW |
| / mode_lib->vba.DPPPerPlane[k]); |
| |
| if (mode_lib->vba.BytePerPixelDETC[k] > 0) { |
| mode_lib->vba.EffectiveDETPlusLBLinesChroma = |
| dml_floor( |
| mode_lib->vba.LinesInDETC[k] |
| + dml_min( |
| mode_lib->vba.LinesInDETC[k] |
| * mode_lib->vba.DPPCLK[k] |
| * mode_lib->vba.BytePerPixelDETC[k] |
| * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] |
| / (mode_lib->vba.ReturnBW |
| / mode_lib->vba.DPPPerPlane[k]), |
| (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), |
| mode_lib->vba.SwathHeightC[k]); |
| mode_lib->vba.UrgentLatencySupportUsChroma = |
| mode_lib->vba.EffectiveDETPlusLBLinesChroma |
| * (mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]) |
| / (mode_lib->vba.VRatio[k] / 2) |
| - mode_lib->vba.EffectiveDETPlusLBLinesChroma |
| * (mode_lib->vba.SwathWidthY[k] |
| / 2) |
| * mode_lib->vba.BytePerPixelDETC[k] |
| / (mode_lib->vba.ReturnBW |
| / mode_lib->vba.DPPPerPlane[k]); |
| mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( |
| mode_lib->vba.UrgentLatencySupportUsLuma, |
| mode_lib->vba.UrgentLatencySupportUsChroma); |
| } else { |
| mode_lib->vba.UrgentLatencySupportUs[k] = |
| mode_lib->vba.UrgentLatencySupportUsLuma; |
| } |
| } |
| |
| mode_lib->vba.MinUrgentLatencySupportUs = 999999; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.MinUrgentLatencySupportUs = dml_min( |
| mode_lib->vba.MinUrgentLatencySupportUs, |
| mode_lib->vba.UrgentLatencySupportUs[k]); |
| } |
| |
| // Non-Urgent Latency Tolerance |
| mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs |
| - mode_lib->vba.UrgentWatermark; |
| |
| // DSCCLK |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { |
| mode_lib->vba.DSCCLK_calculated[k] = 0.0; |
| } else { |
| if (mode_lib->vba.OutputFormat[k] == dm_420 |
| || mode_lib->vba.OutputFormat[k] == dm_n422) |
| mode_lib->vba.DSCFormatFactor = 2; |
| else |
| mode_lib->vba.DSCFormatFactor = 1; |
| if (mode_lib->vba.ODMCombineEnabled[k]) |
| mode_lib->vba.DSCCLK_calculated[k] = |
| mode_lib->vba.PixelClockBackEnd[k] / 6 |
| / mode_lib->vba.DSCFormatFactor |
| / (1 |
| - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading |
| / 100); |
| else |
| mode_lib->vba.DSCCLK_calculated[k] = |
| mode_lib->vba.PixelClockBackEnd[k] / 3 |
| / mode_lib->vba.DSCFormatFactor |
| / (1 |
| - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading |
| / 100); |
| } |
| } |
| |
| // DSC Delay |
| // TODO |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| double bpp = mode_lib->vba.OutputBpp[k]; |
| unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; |
| |
| if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { |
| if (!mode_lib->vba.ODMCombineEnabled[k]) { |
| mode_lib->vba.DSCDelay[k] = |
| dscceComputeDelay( |
| mode_lib->vba.DSCInputBitPerComponent[k], |
| bpp, |
| dml_ceil( |
| (double) mode_lib->vba.HActive[k] |
| / mode_lib->vba.NumberOfDSCSlices[k], |
| 1), |
| slices, |
| mode_lib->vba.OutputFormat[k]) |
| + dscComputeDelay( |
| mode_lib->vba.OutputFormat[k]); |
| } else { |
| mode_lib->vba.DSCDelay[k] = |
| 2 |
| * (dscceComputeDelay( |
| mode_lib->vba.DSCInputBitPerComponent[k], |
| bpp, |
| dml_ceil( |
| (double) mode_lib->vba.HActive[k] |
| / mode_lib->vba.NumberOfDSCSlices[k], |
| 1), |
| slices / 2.0, |
| mode_lib->vba.OutputFormat[k]) |
| + dscComputeDelay( |
| mode_lib->vba.OutputFormat[k])); |
| } |
| mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] |
| * mode_lib->vba.PixelClock[k] |
| / mode_lib->vba.PixelClockBackEnd[k]; |
| } else { |
| mode_lib->vba.DSCDelay[k] = 0; |
| } |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) |
| for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes |
| if (j != k && mode_lib->vba.BlendingAndTiming[k] == j |
| && mode_lib->vba.DSCEnabled[j]) |
| mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; |
| |
| // Prefetch |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| unsigned int PDEAndMetaPTEBytesFrameY; |
| unsigned int PixelPTEBytesPerRowY; |
| unsigned int MetaRowByteY; |
| unsigned int MetaRowByteC; |
| unsigned int PDEAndMetaPTEBytesFrameC; |
| unsigned int PixelPTEBytesPerRowC; |
| |
| Calculate256BBlockSizes( |
| mode_lib->vba.SourcePixelFormat[k], |
| mode_lib->vba.SurfaceTiling[k], |
| dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), |
| dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), |
| &mode_lib->vba.BlockHeight256BytesY[k], |
| &mode_lib->vba.BlockHeight256BytesC[k], |
| &mode_lib->vba.BlockWidth256BytesY[k], |
| &mode_lib->vba.BlockWidth256BytesC[k]); |
| PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( |
| mode_lib, |
| mode_lib->vba.DCCEnable[k], |
| mode_lib->vba.BlockHeight256BytesY[k], |
| mode_lib->vba.BlockWidth256BytesY[k], |
| mode_lib->vba.SourcePixelFormat[k], |
| mode_lib->vba.SurfaceTiling[k], |
| dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), |
| mode_lib->vba.SourceScan[k], |
| mode_lib->vba.ViewportWidth[k], |
| mode_lib->vba.ViewportHeight[k], |
| mode_lib->vba.SwathWidthY[k], |
| mode_lib->vba.GPUVMEnable, |
| mode_lib->vba.VMMPageSize, |
| mode_lib->vba.PTEBufferSizeInRequestsLuma, |
| mode_lib->vba.PDEProcessingBufIn64KBReqs, |
| mode_lib->vba.PitchY[k], |
| mode_lib->vba.DCCMetaPitchY[k], |
| &mode_lib->vba.MacroTileWidthY[k], |
| &MetaRowByteY, |
| &PixelPTEBytesPerRowY, |
| &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], |
| &mode_lib->vba.dpte_row_height[k], |
| &mode_lib->vba.meta_row_height[k]); |
| mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( |
| mode_lib, |
| mode_lib->vba.VRatio[k], |
| mode_lib->vba.vtaps[k], |
| mode_lib->vba.Interlace[k], |
| mode_lib->vba.ProgressiveToInterlaceUnitInOPP, |
| mode_lib->vba.SwathHeightY[k], |
| mode_lib->vba.ViewportYStartY[k], |
| &mode_lib->vba.VInitPreFillY[k], |
| &mode_lib->vba.MaxNumSwathY[k]); |
| |
| if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 |
| && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 |
| && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 |
| && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { |
| PDEAndMetaPTEBytesFrameC = |
| CalculateVMAndRowBytes( |
| mode_lib, |
| mode_lib->vba.DCCEnable[k], |
| mode_lib->vba.BlockHeight256BytesC[k], |
| mode_lib->vba.BlockWidth256BytesC[k], |
| mode_lib->vba.SourcePixelFormat[k], |
| mode_lib->vba.SurfaceTiling[k], |
| dml_ceil( |
| mode_lib->vba.BytePerPixelDETC[k], |
| 2), |
| mode_lib->vba.SourceScan[k], |
| mode_lib->vba.ViewportWidth[k] / 2, |
| mode_lib->vba.ViewportHeight[k] / 2, |
| mode_lib->vba.SwathWidthY[k] / 2, |
| mode_lib->vba.GPUVMEnable, |
| mode_lib->vba.VMMPageSize, |
| mode_lib->vba.PTEBufferSizeInRequestsLuma, |
| mode_lib->vba.PDEProcessingBufIn64KBReqs, |
| mode_lib->vba.PitchC[k], |
| 0, |
| &mode_lib->vba.MacroTileWidthC[k], |
| &MetaRowByteC, |
| &PixelPTEBytesPerRowC, |
| &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], |
| &mode_lib->vba.dpte_row_height_chroma[k], |
| &mode_lib->vba.meta_row_height_chroma[k]); |
| mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( |
| mode_lib, |
| mode_lib->vba.VRatio[k] / 2, |
| mode_lib->vba.VTAPsChroma[k], |
| mode_lib->vba.Interlace[k], |
| mode_lib->vba.ProgressiveToInterlaceUnitInOPP, |
| mode_lib->vba.SwathHeightC[k], |
| mode_lib->vba.ViewportYStartC[k], |
| &mode_lib->vba.VInitPreFillC[k], |
| &mode_lib->vba.MaxNumSwathC[k]); |
| } else { |
| PixelPTEBytesPerRowC = 0; |
| PDEAndMetaPTEBytesFrameC = 0; |
| MetaRowByteC = 0; |
| mode_lib->vba.MaxNumSwathC[k] = 0; |
| mode_lib->vba.PrefetchSourceLinesC[k] = 0; |
| } |
| |
| mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; |
| mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY |
| + PDEAndMetaPTEBytesFrameC; |
| mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; |
| |
| CalculateActiveRowBandwidth( |
| mode_lib->vba.GPUVMEnable, |
| mode_lib->vba.SourcePixelFormat[k], |
| mode_lib->vba.VRatio[k], |
| mode_lib->vba.DCCEnable[k], |
| mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], |
| MetaRowByteY, |
| MetaRowByteC, |
| mode_lib->vba.meta_row_height[k], |
| mode_lib->vba.meta_row_height_chroma[k], |
| PixelPTEBytesPerRowY, |
| PixelPTEBytesPerRowC, |
| mode_lib->vba.dpte_row_height[k], |
| mode_lib->vba.dpte_row_height_chroma[k], |
| &mode_lib->vba.meta_row_bw[k], |
| &mode_lib->vba.dpte_row_bw[k], |
| &mode_lib->vba.qual_row_bw[k]); |
| } |
| |
| mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.BlendingAndTiming[k] == k) { |
| if (mode_lib->vba.WritebackEnable[k] == true) { |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = |
| mode_lib->vba.WritebackLatency |
| + CalculateWriteBackDelay( |
| mode_lib->vba.WritebackPixelFormat[k], |
| mode_lib->vba.WritebackHRatio[k], |
| mode_lib->vba.WritebackVRatio[k], |
| mode_lib->vba.WritebackLumaHTaps[k], |
| mode_lib->vba.WritebackLumaVTaps[k], |
| mode_lib->vba.WritebackChromaHTaps[k], |
| mode_lib->vba.WritebackChromaVTaps[k], |
| mode_lib->vba.WritebackDestinationWidth[k]) |
| / mode_lib->vba.DISPCLK; |
| } else |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; |
| for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { |
| if (mode_lib->vba.BlendingAndTiming[j] == k |
| && mode_lib->vba.WritebackEnable[j] == true) { |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = |
| dml_max( |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], |
| mode_lib->vba.WritebackLatency |
| + CalculateWriteBackDelay( |
| mode_lib->vba.WritebackPixelFormat[j], |
| mode_lib->vba.WritebackHRatio[j], |
| mode_lib->vba.WritebackVRatio[j], |
| mode_lib->vba.WritebackLumaHTaps[j], |
| mode_lib->vba.WritebackLumaVTaps[j], |
| mode_lib->vba.WritebackChromaHTaps[j], |
| mode_lib->vba.WritebackChromaVTaps[j], |
| mode_lib->vba.WritebackDestinationWidth[j]) |
| / mode_lib->vba.DISPCLK); |
| } |
| } |
| } |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) |
| for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) |
| if (mode_lib->vba.BlendingAndTiming[k] == j) |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; |
| |
| mode_lib->vba.VStartupLines = 13; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.MaxVStartupLines[k] = |
| mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] |
| - dml_max( |
| 1.0, |
| dml_ceil( |
| mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] |
| / (mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]), |
| 1)); |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) |
| mode_lib->vba.MaximumMaxVStartupLines = dml_max( |
| mode_lib->vba.MaximumMaxVStartupLines, |
| mode_lib->vba.MaxVStartupLines[k]); |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.cursor_bw[k] = 0.0; |
| for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) |
| mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] |
| * mode_lib->vba.CursorBPP[k][j] / 8.0 |
| / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) |
| * mode_lib->vba.VRatio[k]; |
| } |
| |
| do { |
| double MaxTotalRDBandwidth = 0; |
| bool DestinationLineTimesForPrefetchLessThan2 = false; |
| bool VRatioPrefetchMoreThan4 = false; |
| bool prefetch_vm_bw_valid = true; |
| bool prefetch_row_bw_valid = true; |
| double TWait = CalculateTWait( |
| mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], |
| mode_lib->vba.DRAMClockChangeLatency, |
| mode_lib->vba.UrgentLatencyPixelDataOnly, |
| mode_lib->vba.SREnterPlusExitTime); |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.XFCEnabled[k] == true) { |
| mode_lib->vba.XFCRemoteSurfaceFlipDelay = |
| CalculateRemoteSurfaceFlipDelay( |
| mode_lib, |
| mode_lib->vba.VRatio[k], |
| mode_lib->vba.SwathWidthY[k], |
| dml_ceil( |
| mode_lib->vba.BytePerPixelDETY[k], |
| 1), |
| mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k], |
| mode_lib->vba.XFCTSlvVupdateOffset, |
| mode_lib->vba.XFCTSlvVupdateWidth, |
| mode_lib->vba.XFCTSlvVreadyOffset, |
| mode_lib->vba.XFCXBUFLatencyTolerance, |
| mode_lib->vba.XFCFillBWOverhead, |
| mode_lib->vba.XFCSlvChunkSize, |
| mode_lib->vba.XFCBusTransportTime, |
| mode_lib->vba.TCalc, |
| TWait, |
| &mode_lib->vba.SrcActiveDrainRate, |
| &mode_lib->vba.TInitXFill, |
| &mode_lib->vba.TslvChk); |
| } else { |
| mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; |
| } |
| mode_lib->vba.ErrorResult[k] = |
| CalculatePrefetchSchedule( |
| mode_lib, |
| mode_lib->vba.DPPCLK[k], |
| mode_lib->vba.DISPCLK, |
| mode_lib->vba.PixelClock[k], |
| mode_lib->vba.DCFCLKDeepSleep, |
| mode_lib->vba.DSCDelay[k], |
| mode_lib->vba.DPPPerPlane[k], |
| mode_lib->vba.ScalerEnabled[k], |
| mode_lib->vba.NumberOfCursors[k], |
| mode_lib->vba.DPPCLKDelaySubtotal, |
| mode_lib->vba.DPPCLKDelaySCL, |
| mode_lib->vba.DPPCLKDelaySCLLBOnly, |
| mode_lib->vba.DPPCLKDelayCNVCFormater, |
| mode_lib->vba.DPPCLKDelayCNVCCursor, |
| mode_lib->vba.DISPCLKDelaySubtotal, |
| (unsigned int) (mode_lib->vba.SwathWidthY[k] |
| / mode_lib->vba.HRatio[k]), |
| mode_lib->vba.OutputFormat[k], |
| mode_lib->vba.VTotal[k] |
| - mode_lib->vba.VActive[k], |
| mode_lib->vba.HTotal[k], |
| mode_lib->vba.MaxInterDCNTileRepeaters, |
| dml_min( |
| mode_lib->vba.VStartupLines, |
| mode_lib->vba.MaxVStartupLines[k]), |
| mode_lib->vba.GPUVMMaxPageTableLevels, |
| mode_lib->vba.GPUVMEnable, |
| mode_lib->vba.DynamicMetadataEnable[k], |
| mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], |
| mode_lib->vba.DynamicMetadataTransmittedBytes[k], |
| mode_lib->vba.DCCEnable[k], |
| mode_lib->vba.UrgentLatencyPixelDataOnly, |
| mode_lib->vba.UrgentExtraLatency, |
| mode_lib->vba.TCalc, |
| mode_lib->vba.PDEAndMetaPTEBytesFrame[k], |
| mode_lib->vba.MetaRowByte[k], |
| mode_lib->vba.PixelPTEBytesPerRow[k], |
| mode_lib->vba.PrefetchSourceLinesY[k], |
| mode_lib->vba.SwathWidthY[k], |
| mode_lib->vba.BytePerPixelDETY[k], |
| mode_lib->vba.VInitPreFillY[k], |
| mode_lib->vba.MaxNumSwathY[k], |
| mode_lib->vba.PrefetchSourceLinesC[k], |
| mode_lib->vba.BytePerPixelDETC[k], |
| mode_lib->vba.VInitPreFillC[k], |
| mode_lib->vba.MaxNumSwathC[k], |
| mode_lib->vba.SwathHeightY[k], |
| mode_lib->vba.SwathHeightC[k], |
| TWait, |
| mode_lib->vba.XFCEnabled[k], |
| mode_lib->vba.XFCRemoteSurfaceFlipDelay, |
| mode_lib->vba.Interlace[k], |
| mode_lib->vba.ProgressiveToInterlaceUnitInOPP, |
| &mode_lib->vba.DSTXAfterScaler[k], |
| &mode_lib->vba.DSTYAfterScaler[k], |
| &mode_lib->vba.DestinationLinesForPrefetch[k], |
| &mode_lib->vba.PrefetchBandwidth[k], |
| &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], |
| &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], |
| &mode_lib->vba.VRatioPrefetchY[k], |
| &mode_lib->vba.VRatioPrefetchC[k], |
| &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k], |
| &mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, |
| &mode_lib->vba.Tno_bw[k], |
| &mode_lib->vba.VUpdateOffsetPix[k], |
| &mode_lib->vba.VUpdateWidthPix[k], |
| &mode_lib->vba.VReadyOffsetPix[k]); |
| if (mode_lib->vba.BlendingAndTiming[k] == k) { |
| mode_lib->vba.VStartup[k] = dml_min( |
| mode_lib->vba.VStartupLines, |
| mode_lib->vba.MaxVStartupLines[k]); |
| if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata |
| != 0) { |
| mode_lib->vba.VStartup[k] = |
| mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; |
| } |
| } else { |
| mode_lib->vba.VStartup[k] = |
| dml_min( |
| mode_lib->vba.VStartupLines, |
| mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); |
| } |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| |
| if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) |
| mode_lib->vba.prefetch_vm_bw[k] = 0; |
| else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { |
| mode_lib->vba.prefetch_vm_bw[k] = |
| (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] |
| / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] |
| * mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]); |
| } else { |
| mode_lib->vba.prefetch_vm_bw[k] = 0; |
| prefetch_vm_bw_valid = false; |
| } |
| if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] |
| == 0) |
| mode_lib->vba.prefetch_row_bw[k] = 0; |
| else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { |
| mode_lib->vba.prefetch_row_bw[k] = |
| (double) (mode_lib->vba.MetaRowByte[k] |
| + mode_lib->vba.PixelPTEBytesPerRow[k]) |
| / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] |
| * mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k]); |
| } else { |
| mode_lib->vba.prefetch_row_bw[k] = 0; |
| prefetch_row_bw_valid = false; |
| } |
| |
| MaxTotalRDBandwidth = |
| MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] |
| + dml_max( |
| mode_lib->vba.prefetch_vm_bw[k], |
| dml_max( |
| mode_lib->vba.prefetch_row_bw[k], |
| dml_max( |
| mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k], |
| mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]) |
| + mode_lib->vba.meta_row_bw[k] |
| + mode_lib->vba.dpte_row_bw[k])); |
| |
| if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) |
| DestinationLineTimesForPrefetchLessThan2 = true; |
| if (mode_lib->vba.VRatioPrefetchY[k] > 4 |
| || mode_lib->vba.VRatioPrefetchC[k] > 4) |
| VRatioPrefetchMoreThan4 = true; |
| } |
| |
| if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid |
| && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 |
| && !DestinationLineTimesForPrefetchLessThan2) |
| mode_lib->vba.PrefetchModeSupported = true; |
| else { |
| mode_lib->vba.PrefetchModeSupported = false; |
| dml_print( |
| "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); |
| } |
| |
| if (mode_lib->vba.PrefetchModeSupported == true) { |
| double final_flip_bw[DC__NUM_DPP__MAX]; |
| unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; |
| double total_dcn_read_bw_with_flip = 0; |
| |
| mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| mode_lib->vba.BandwidthAvailableForImmediateFlip = |
| mode_lib->vba.BandwidthAvailableForImmediateFlip |
| - mode_lib->vba.cursor_bw[k] |
| - dml_max( |
| mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k] |
| + mode_lib->vba.qual_row_bw[k], |
| mode_lib->vba.PrefetchBandwidth[k]); |
| } |
| |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| ImmediateFlipBytes[k] = 0; |
| if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 |
| && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { |
| ImmediateFlipBytes[k] = |
| mode_lib->vba.PDEAndMetaPTEBytesFrame[k] |
| + mode_lib->vba.MetaRowByte[k] |
| + mode_lib->vba.PixelPTEBytesPerRow[k]; |
| } |
| } |
| mode_lib->vba.TotImmediateFlipBytes = 0; |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 |
| && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { |
| mode_lib->vba.TotImmediateFlipBytes = |
| mode_lib->vba.TotImmediateFlipBytes |
| + ImmediateFlipBytes[k]; |
| } |
| } |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| CalculateFlipSchedule( |
| mode_lib, |
| mode_lib->vba.UrgentExtraLatency, |
| mode_lib->vba.UrgentLatencyPixelDataOnly, |
| mode_lib->vba.GPUVMMaxPageTableLevels, |
| mode_lib->vba.GPUVMEnable, |
| mode_lib->vba.BandwidthAvailableForImmediateFlip, |
| mode_lib->vba.TotImmediateFlipBytes, |
| mode_lib->vba.SourcePixelFormat[k], |
| ImmediateFlipBytes[k], |
| mode_lib->vba.HTotal[k] |
| / mode_lib->vba.PixelClock[k], |
| mode_lib->vba.VRatio[k], |
| mode_lib->vba.Tno_bw[k], |
| mode_lib->vba.PDEAndMetaPTEBytesFrame[k], |
| mode_lib->vba.MetaRowByte[k], |
| mode_lib->vba.PixelPTEBytesPerRow[k], |
| mode_lib->vba.DCCEnable[k], |
| mode_lib->vba.dpte_row_height[k], |
| mode_lib->vba.meta_row_height[k], |
| mode_lib->vba.qual_row_bw[k], |
| &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], |
| &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], |
| &final_flip_bw[k], |
| &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); |
| } |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| total_dcn_read_bw_with_flip = |
| total_dcn_read_bw_with_flip |
| + mode_lib->vba.cursor_bw[k] |
| + dml_max( |
| mode_lib->vba.prefetch_vm_bw[k], |
| dml_max( |
| mode_lib->vba.prefetch_row_bw[k], |
| final_flip_bw[k] |
| + dml_max( |
| mode_lib->vba.ReadBandwidthPlaneLuma[k] |
| + mode_lib->vba.ReadBandwidthPlaneChroma[k], |
| mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]))); |
| } |
| mode_lib->vba.ImmediateFlipSupported = true; |
| if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { |
| mode_lib->vba.ImmediateFlipSupported = false; |
| } |
| for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { |
| if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { |
| mode_lib->vba.ImmediateFlipSupported = false; |
| } |
| } |
| } else { |
| mode_lib->vba.ImmediateFlipSupported
|