These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / gpu / drm / i915 / intel_pm.c
index 555b896..f091ad1 100644 (file)
 #define INTEL_RC6p_ENABLE                      (1<<1)
 #define INTEL_RC6pp_ENABLE                     (1<<2)
 
-static void gen9_init_clock_gating(struct drm_device *dev)
+static void bxt_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       /* WaEnableLbsSlaRetryTimerDecrement:skl */
-       I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
-                  GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
-}
-
-static void skl_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       gen9_init_clock_gating(dev);
-
-       if (INTEL_REVID(dev) == SKL_REVID_A0) {
-               /*
-                * WaDisableSDEUnitClockGating:skl
-                * WaSetGAPSunitClckGateDisable:skl
-                */
-               I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-                          GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
-                          GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
-       }
-
-       if (INTEL_REVID(dev) <= SKL_REVID_D0) {
-               /* WaDisableHDCInvalidation:skl */
-               I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-                          BDW_DISABLE_HDC_INVALIDATION);
-
-               /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
-               I915_WRITE(FF_SLICE_CS_CHICKEN2,
-                          I915_READ(FF_SLICE_CS_CHICKEN2) |
-                          GEN9_TSG_BARRIER_ACK_DISABLE);
-       }
+       /* WaDisableSDEUnitClockGating:bxt */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
-       if (INTEL_REVID(dev) <= SKL_REVID_E0)
-               /* WaDisableLSQCROPERFforOCL:skl */
-               I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
-                          GEN8_LQSC_RO_PERF_DIS);
+       /*
+        * FIXME:
+        * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
+        */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
@@ -311,22 +285,26 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 
        if (IS_VALLEYVIEW(dev)) {
                I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
-               if (IS_CHERRYVIEW(dev))
-                       chv_set_memory_pm5(dev_priv, enable);
+               POSTING_READ(FW_BLC_SELF_VLV);
+               dev_priv->wm.vlv.cxsr = enable;
        } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
                I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
+               POSTING_READ(FW_BLC_SELF);
        } else if (IS_PINEVIEW(dev)) {
                val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
                val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
                I915_WRITE(DSPFW3, val);
+               POSTING_READ(DSPFW3);
        } else if (IS_I945G(dev) || IS_I945GM(dev)) {
                val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
                               _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
                I915_WRITE(FW_BLC_SELF, val);
+               POSTING_READ(FW_BLC_SELF);
        } else if (IS_I915GM(dev)) {
                val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
                               _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
                I915_WRITE(INSTPM, val);
+               POSTING_READ(INSTPM);
        } else {
                return;
        }
@@ -651,12 +629,9 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)
 
        crtc = single_enabled_crtc(dev);
        if (crtc) {
-               const struct drm_display_mode *adjusted_mode;
+               const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
                int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
-               int clock;
-
-               adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
-               clock = adjusted_mode->crtc_clock;
+               int clock = adjusted_mode->crtc_clock;
 
                /* Display SR */
                wm = intel_calculate_wm(clock, &pineview_display_wm,
@@ -900,223 +875,480 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
                           FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI));
        }
 
-       POSTING_READ(DSPFW1);
+       /* zero (unused) WM1 watermarks */
+       I915_WRITE(DSPFW4, 0);
+       I915_WRITE(DSPFW5, 0);
+       I915_WRITE(DSPFW6, 0);
+       I915_WRITE(DSPHOWM1, 0);
 
-       dev_priv->wm.vlv = *wm;
+       POSTING_READ(DSPFW1);
 }
 
 #undef FW_WM_VLV
 
-static uint8_t vlv_compute_drain_latency(struct drm_crtc *crtc,
-                                        struct drm_plane *plane)
+enum vlv_wm_level {
+       VLV_WM_LEVEL_PM2,
+       VLV_WM_LEVEL_PM5,
+       VLV_WM_LEVEL_DDR_DVFS,
+};
+
+/* latency must be in 0.1us units. */
+static unsigned int vlv_wm_method2(unsigned int pixel_rate,
+                                  unsigned int pipe_htotal,
+                                  unsigned int horiz_pixels,
+                                  unsigned int bytes_per_pixel,
+                                  unsigned int latency)
 {
-       struct drm_device *dev = crtc->dev;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int entries, prec_mult, drain_latency, pixel_size;
-       int clock = intel_crtc->config->base.adjusted_mode.crtc_clock;
-       const int high_precision = IS_CHERRYVIEW(dev) ? 16 : 64;
+       unsigned int ret;
 
-       /*
-        * FIXME the plane might have an fb
-        * but be invisible (eg. due to clipping)
-        */
-       if (!intel_crtc->active || !plane->state->fb)
-               return 0;
+       ret = (latency * pixel_rate) / (pipe_htotal * 10000);
+       ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
+       ret = DIV_ROUND_UP(ret, 64);
 
-       if (WARN(clock == 0, "Pixel clock is zero!\n"))
-               return 0;
+       return ret;
+}
 
-       pixel_size = drm_format_plane_cpp(plane->state->fb->pixel_format, 0);
+static void vlv_setup_wm_latency(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (WARN(pixel_size == 0, "Pixel size is zero!\n"))
-               return 0;
+       /* all latencies in usec */
+       dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
 
-       entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
+       dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
 
-       prec_mult = high_precision;
-       drain_latency = 64 * prec_mult * 4 / entries;
+       if (IS_CHERRYVIEW(dev_priv)) {
+               dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
+               dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
 
-       if (drain_latency > DRAIN_LATENCY_MASK) {
-               prec_mult /= 2;
-               drain_latency = 64 * prec_mult * 4 / entries;
+               dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
        }
-
-       if (drain_latency > DRAIN_LATENCY_MASK)
-               drain_latency = DRAIN_LATENCY_MASK;
-
-       return drain_latency | (prec_mult == high_precision ?
-                               DDL_PRECISION_HIGH : DDL_PRECISION_LOW);
 }
 
-static int vlv_compute_wm(struct intel_crtc *crtc,
-                         struct intel_plane *plane,
-                         int fifo_size)
+static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
+                                    struct intel_crtc *crtc,
+                                    const struct intel_plane_state *state,
+                                    int level)
 {
-       int clock, entries, pixel_size;
+       struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+       int clock, htotal, pixel_size, width, wm;
 
-       /*
-        * FIXME the plane might have an fb
-        * but be invisible (eg. due to clipping)
-        */
-       if (!crtc->active || !plane->base.state->fb)
+       if (dev_priv->wm.pri_latency[level] == 0)
+               return USHRT_MAX;
+
+       if (!state->visible)
                return 0;
 
-       pixel_size = drm_format_plane_cpp(plane->base.state->fb->pixel_format, 0);
+       pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
        clock = crtc->config->base.adjusted_mode.crtc_clock;
+       htotal = crtc->config->base.adjusted_mode.crtc_htotal;
+       width = crtc->config->pipe_src_w;
+       if (WARN_ON(htotal == 0))
+               htotal = 1;
 
-       entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
+       if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+               /*
+                * FIXME the formula gives values that are
+                * too big for the cursor FIFO, and hence we
+                * would never be able to use cursors. For
+                * now just hardcode the watermark.
+                */
+               wm = 63;
+       } else {
+               wm = vlv_wm_method2(clock, htotal, width, pixel_size,
+                                   dev_priv->wm.pri_latency[level] * 10);
+       }
 
-       /*
-        * Set up the watermark such that we don't start issuing memory
-        * requests until we are within PND's max deadline value (256us).
-        * Idea being to be idle as long as possible while still taking
-        * advatange of PND's deadline scheduling. The limit of 8
-        * cachelines (used when the FIFO will anyway drain in less time
-        * than 256us) should match what we would be done if trickle
-        * feed were enabled.
-        */
-       return fifo_size - clamp(DIV_ROUND_UP(256 * entries, 64), 0, fifo_size - 8);
+       return min_t(int, wm, USHRT_MAX);
 }
 
-static bool vlv_compute_sr_wm(struct drm_device *dev,
-                             struct vlv_wm_values *wm)
+static void vlv_compute_fifo(struct intel_crtc *crtc)
 {
-       struct drm_i915_private *dev_priv = to_i915(dev);
-       struct drm_crtc *crtc;
-       enum pipe pipe = INVALID_PIPE;
-       int num_planes = 0;
-       int fifo_size = 0;
+       struct drm_device *dev = crtc->base.dev;
+       struct vlv_wm_state *wm_state = &crtc->wm_state;
        struct intel_plane *plane;
+       unsigned int total_rate = 0;
+       const int fifo_size = 512 - 1;
+       int fifo_extra, fifo_left = fifo_size;
 
-       wm->sr.cursor = wm->sr.plane = 0;
+       for_each_intel_plane_on_crtc(dev, crtc, plane) {
+               struct intel_plane_state *state =
+                       to_intel_plane_state(plane->base.state);
 
-       crtc = single_enabled_crtc(dev);
-       /* maxfifo not supported on pipe C */
-       if (crtc && to_intel_crtc(crtc)->pipe != PIPE_C) {
-               pipe = to_intel_crtc(crtc)->pipe;
-               num_planes = !!wm->pipe[pipe].primary +
-                       !!wm->pipe[pipe].sprite[0] +
-                       !!wm->pipe[pipe].sprite[1];
-               fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
+               if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
+                       continue;
+
+               if (state->visible) {
+                       wm_state->num_active_planes++;
+                       total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0);
+               }
        }
 
-       if (fifo_size == 0 || num_planes > 1)
-               return false;
+       for_each_intel_plane_on_crtc(dev, crtc, plane) {
+               struct intel_plane_state *state =
+                       to_intel_plane_state(plane->base.state);
+               unsigned int rate;
 
-       wm->sr.cursor = vlv_compute_wm(to_intel_crtc(crtc),
-                                      to_intel_plane(crtc->cursor), 0x3f);
+               if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+                       plane->wm.fifo_size = 63;
+                       continue;
+               }
+
+               if (!state->visible) {
+                       plane->wm.fifo_size = 0;
+                       continue;
+               }
+
+               rate = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
+               plane->wm.fifo_size = fifo_size * rate / total_rate;
+               fifo_left -= plane->wm.fifo_size;
+       }
+
+       fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1);
+
+       /* spread the remainder evenly */
+       for_each_intel_plane_on_crtc(dev, crtc, plane) {
+               int plane_extra;
+
+               if (fifo_left == 0)
+                       break;
 
-       list_for_each_entry(plane, &dev->mode_config.plane_list, base.head) {
                if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
                        continue;
 
-               if (plane->pipe != pipe)
+               /* give it all to the first plane if none are active */
+               if (plane->wm.fifo_size == 0 &&
+                   wm_state->num_active_planes)
+                       continue;
+
+               plane_extra = min(fifo_extra, fifo_left);
+               plane->wm.fifo_size += plane_extra;
+               fifo_left -= plane_extra;
+       }
+
+       WARN_ON(fifo_left != 0);
+}
+
+static void vlv_invert_wms(struct intel_crtc *crtc)
+{
+       struct vlv_wm_state *wm_state = &crtc->wm_state;
+       int level;
+
+       for (level = 0; level < wm_state->num_levels; level++) {
+               struct drm_device *dev = crtc->base.dev;
+               const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
+               struct intel_plane *plane;
+
+               wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
+               wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
+
+               for_each_intel_plane_on_crtc(dev, crtc, plane) {
+                       switch (plane->base.type) {
+                               int sprite;
+                       case DRM_PLANE_TYPE_CURSOR:
+                               wm_state->wm[level].cursor = plane->wm.fifo_size -
+                                       wm_state->wm[level].cursor;
+                               break;
+                       case DRM_PLANE_TYPE_PRIMARY:
+                               wm_state->wm[level].primary = plane->wm.fifo_size -
+                                       wm_state->wm[level].primary;
+                               break;
+                       case DRM_PLANE_TYPE_OVERLAY:
+                               sprite = plane->plane;
+                               wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
+                                       wm_state->wm[level].sprite[sprite];
+                               break;
+                       }
+               }
+       }
+}
+
+static void vlv_compute_wm(struct intel_crtc *crtc)
+{
+       struct drm_device *dev = crtc->base.dev;
+       struct vlv_wm_state *wm_state = &crtc->wm_state;
+       struct intel_plane *plane;
+       int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
+       int level;
+
+       memset(wm_state, 0, sizeof(*wm_state));
+
+       wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed;
+       wm_state->num_levels = to_i915(dev)->wm.max_level + 1;
+
+       wm_state->num_active_planes = 0;
+
+       vlv_compute_fifo(crtc);
+
+       if (wm_state->num_active_planes != 1)
+               wm_state->cxsr = false;
+
+       if (wm_state->cxsr) {
+               for (level = 0; level < wm_state->num_levels; level++) {
+                       wm_state->sr[level].plane = sr_fifo_size;
+                       wm_state->sr[level].cursor = 63;
+               }
+       }
+
+       for_each_intel_plane_on_crtc(dev, crtc, plane) {
+               struct intel_plane_state *state =
+                       to_intel_plane_state(plane->base.state);
+
+               if (!state->visible)
+                       continue;
+
+               /* normal watermarks */
+               for (level = 0; level < wm_state->num_levels; level++) {
+                       int wm = vlv_compute_wm_level(plane, crtc, state, level);
+                       int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
+
+                       /* hack */
+                       if (WARN_ON(level == 0 && wm > max_wm))
+                               wm = max_wm;
+
+                       if (wm > plane->wm.fifo_size)
+                               break;
+
+                       switch (plane->base.type) {
+                               int sprite;
+                       case DRM_PLANE_TYPE_CURSOR:
+                               wm_state->wm[level].cursor = wm;
+                               break;
+                       case DRM_PLANE_TYPE_PRIMARY:
+                               wm_state->wm[level].primary = wm;
+                               break;
+                       case DRM_PLANE_TYPE_OVERLAY:
+                               sprite = plane->plane;
+                               wm_state->wm[level].sprite[sprite] = wm;
+                               break;
+                       }
+               }
+
+               wm_state->num_levels = level;
+
+               if (!wm_state->cxsr)
                        continue;
 
-               wm->sr.plane = vlv_compute_wm(to_intel_crtc(crtc),
-                                             plane, fifo_size);
-               if (wm->sr.plane != 0)
+               /* maxfifo watermarks */
+               switch (plane->base.type) {
+                       int sprite, level;
+               case DRM_PLANE_TYPE_CURSOR:
+                       for (level = 0; level < wm_state->num_levels; level++)
+                               wm_state->sr[level].cursor =
+                                       wm_state->wm[level].cursor;
+                       break;
+               case DRM_PLANE_TYPE_PRIMARY:
+                       for (level = 0; level < wm_state->num_levels; level++)
+                               wm_state->sr[level].plane =
+                                       min(wm_state->sr[level].plane,
+                                           wm_state->wm[level].primary);
                        break;
+               case DRM_PLANE_TYPE_OVERLAY:
+                       sprite = plane->plane;
+                       for (level = 0; level < wm_state->num_levels; level++)
+                               wm_state->sr[level].plane =
+                                       min(wm_state->sr[level].plane,
+                                           wm_state->wm[level].sprite[sprite]);
+                       break;
+               }
        }
 
-       return true;
+       /* clear any (partially) filled invalid levels */
+       for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) {
+               memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
+               memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
+       }
+
+       vlv_invert_wms(crtc);
 }
 
-static void valleyview_update_wm(struct drm_crtc *crtc)
+#define VLV_FIFO(plane, value) \
+       (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
+
+static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc)
 {
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       enum pipe pipe = intel_crtc->pipe;
-       bool cxsr_enabled;
-       struct vlv_wm_values wm = dev_priv->wm.vlv;
+       struct drm_device *dev = crtc->base.dev;
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct intel_plane *plane;
+       int sprite0_start = 0, sprite1_start = 0, fifo_size = 0;
 
-       wm.ddl[pipe].primary = vlv_compute_drain_latency(crtc, crtc->primary);
-       wm.pipe[pipe].primary = vlv_compute_wm(intel_crtc,
-                                              to_intel_plane(crtc->primary),
-                                              vlv_get_fifo_size(dev, pipe, 0));
+       for_each_intel_plane_on_crtc(dev, crtc, plane) {
+               if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+                       WARN_ON(plane->wm.fifo_size != 63);
+                       continue;
+               }
 
-       wm.ddl[pipe].cursor = vlv_compute_drain_latency(crtc, crtc->cursor);
-       wm.pipe[pipe].cursor = vlv_compute_wm(intel_crtc,
-                                             to_intel_plane(crtc->cursor),
-                                             0x3f);
+               if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
+                       sprite0_start = plane->wm.fifo_size;
+               else if (plane->plane == 0)
+                       sprite1_start = sprite0_start + plane->wm.fifo_size;
+               else
+                       fifo_size = sprite1_start + plane->wm.fifo_size;
+       }
 
-       cxsr_enabled = vlv_compute_sr_wm(dev, &wm);
+       WARN_ON(fifo_size != 512 - 1);
 
-       if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0)
-               return;
+       DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n",
+                     pipe_name(crtc->pipe), sprite0_start,
+                     sprite1_start, fifo_size);
 
-       DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
-                     "SR: plane=%d, cursor=%d\n", pipe_name(pipe),
-                     wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
-                     wm.sr.plane, wm.sr.cursor);
+       switch (crtc->pipe) {
+               uint32_t dsparb, dsparb2, dsparb3;
+       case PIPE_A:
+               dsparb = I915_READ(DSPARB);
+               dsparb2 = I915_READ(DSPARB2);
 
-       /*
-        * FIXME DDR DVFS introduces massive memory latencies which
-        * are not known to system agent so any deadline specified
-        * by the display may not be respected. To support DDR DVFS
-        * the watermark code needs to be rewritten to essentially
-        * bypass deadline mechanism and rely solely on the
-        * watermarks. For now disable DDR DVFS.
-        */
-       if (IS_CHERRYVIEW(dev_priv))
-               chv_set_memory_dvfs(dev_priv, false);
+               dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
+                           VLV_FIFO(SPRITEB, 0xff));
+               dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
+                          VLV_FIFO(SPRITEB, sprite1_start));
 
-       if (!cxsr_enabled)
-               intel_set_memory_cxsr(dev_priv, false);
+               dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
+                            VLV_FIFO(SPRITEB_HI, 0x1));
+               dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
+                          VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
 
-       vlv_write_wm_values(intel_crtc, &wm);
+               I915_WRITE(DSPARB, dsparb);
+               I915_WRITE(DSPARB2, dsparb2);
+               break;
+       case PIPE_B:
+               dsparb = I915_READ(DSPARB);
+               dsparb2 = I915_READ(DSPARB2);
 
-       if (cxsr_enabled)
-               intel_set_memory_cxsr(dev_priv, true);
+               dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
+                           VLV_FIFO(SPRITED, 0xff));
+               dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
+                          VLV_FIFO(SPRITED, sprite1_start));
+
+               dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
+                            VLV_FIFO(SPRITED_HI, 0xff));
+               dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
+                          VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
+
+               I915_WRITE(DSPARB, dsparb);
+               I915_WRITE(DSPARB2, dsparb2);
+               break;
+       case PIPE_C:
+               dsparb3 = I915_READ(DSPARB3);
+               dsparb2 = I915_READ(DSPARB2);
+
+               dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
+                            VLV_FIFO(SPRITEF, 0xff));
+               dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
+                           VLV_FIFO(SPRITEF, sprite1_start));
+
+               dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
+                            VLV_FIFO(SPRITEF_HI, 0xff));
+               dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
+                          VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
+
+               I915_WRITE(DSPARB3, dsparb3);
+               I915_WRITE(DSPARB2, dsparb2);
+               break;
+       default:
+               break;
+       }
 }
 
-static void valleyview_update_sprite_wm(struct drm_plane *plane,
-                                       struct drm_crtc *crtc,
-                                       uint32_t sprite_width,
-                                       uint32_t sprite_height,
-                                       int pixel_size,
-                                       bool enabled, bool scaled)
+#undef VLV_FIFO
+
+static void vlv_merge_wm(struct drm_device *dev,
+                        struct vlv_wm_values *wm)
+{
+       struct intel_crtc *crtc;
+       int num_active_crtcs = 0;
+
+       wm->level = to_i915(dev)->wm.max_level;
+       wm->cxsr = true;
+
+       for_each_intel_crtc(dev, crtc) {
+               const struct vlv_wm_state *wm_state = &crtc->wm_state;
+
+               if (!crtc->active)
+                       continue;
+
+               if (!wm_state->cxsr)
+                       wm->cxsr = false;
+
+               num_active_crtcs++;
+               wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
+       }
+
+       if (num_active_crtcs != 1)
+               wm->cxsr = false;
+
+       if (num_active_crtcs > 1)
+               wm->level = VLV_WM_LEVEL_PM2;
+
+       for_each_intel_crtc(dev, crtc) {
+               struct vlv_wm_state *wm_state = &crtc->wm_state;
+               enum pipe pipe = crtc->pipe;
+
+               if (!crtc->active)
+                       continue;
+
+               wm->pipe[pipe] = wm_state->wm[wm->level];
+               if (wm->cxsr)
+                       wm->sr = wm_state->sr[wm->level];
+
+               wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
+               wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
+               wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
+               wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
+       }
+}
+
+static void vlv_update_wm(struct drm_crtc *crtc)
 {
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        enum pipe pipe = intel_crtc->pipe;
-       int sprite = to_intel_plane(plane)->plane;
-       bool cxsr_enabled;
-       struct vlv_wm_values wm = dev_priv->wm.vlv;
+       struct vlv_wm_values wm = {};
 
-       if (enabled) {
-               wm.ddl[pipe].sprite[sprite] =
-                       vlv_compute_drain_latency(crtc, plane);
+       vlv_compute_wm(intel_crtc);
+       vlv_merge_wm(dev, &wm);
 
-               wm.pipe[pipe].sprite[sprite] =
-                       vlv_compute_wm(intel_crtc,
-                                      to_intel_plane(plane),
-                                      vlv_get_fifo_size(dev, pipe, sprite+1));
-       } else {
-               wm.ddl[pipe].sprite[sprite] = 0;
-               wm.pipe[pipe].sprite[sprite] = 0;
+       if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) {
+               /* FIXME should be part of crtc atomic commit */
+               vlv_pipe_set_fifo_size(intel_crtc);
+               return;
        }
 
-       cxsr_enabled = vlv_compute_sr_wm(dev, &wm);
-
-       if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0)
-               return;
+       if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
+           dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
+               chv_set_memory_dvfs(dev_priv, false);
 
-       DRM_DEBUG_KMS("Setting FIFO watermarks - %c: sprite %c=%d, "
-                     "SR: plane=%d, cursor=%d\n", pipe_name(pipe),
-                     sprite_name(pipe, sprite),
-                     wm.pipe[pipe].sprite[sprite],
-                     wm.sr.plane, wm.sr.cursor);
+       if (wm.level < VLV_WM_LEVEL_PM5 &&
+           dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
+               chv_set_memory_pm5(dev_priv, false);
 
-       if (!cxsr_enabled)
+       if (!wm.cxsr && dev_priv->wm.vlv.cxsr)
                intel_set_memory_cxsr(dev_priv, false);
 
+       /* FIXME should be part of crtc atomic commit */
+       vlv_pipe_set_fifo_size(intel_crtc);
+
        vlv_write_wm_values(intel_crtc, &wm);
 
-       if (cxsr_enabled)
+       DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
+                     "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
+                     pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
+                     wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
+                     wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
+
+       if (wm.cxsr && !dev_priv->wm.vlv.cxsr)
                intel_set_memory_cxsr(dev_priv, true);
+
+       if (wm.level >= VLV_WM_LEVEL_PM5 &&
+           dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
+               chv_set_memory_pm5(dev_priv, true);
+
+       if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
+           dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
+               chv_set_memory_dvfs(dev_priv, true);
+
+       dev_priv->wm.vlv = wm;
 }
 
 #define single_plane_enabled(mask) is_power_of_2(mask)
@@ -1193,8 +1425,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
        if (crtc) {
                /* self-refresh has much higher latency */
                static const int sr_latency_ns = 12000;
-               const struct drm_display_mode *adjusted_mode =
-                       &to_intel_crtc(crtc)->config->base.adjusted_mode;
+               const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
                int clock = adjusted_mode->crtc_clock;
                int htotal = adjusted_mode->crtc_htotal;
                int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
@@ -1341,8 +1572,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
        if (HAS_FW_BLC(dev) && enabled) {
                /* self-refresh has much higher latency */
                static const int sr_latency_ns = 6000;
-               const struct drm_display_mode *adjusted_mode =
-                       &to_intel_crtc(enabled)->config->base.adjusted_mode;
+               const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode;
                int clock = adjusted_mode->crtc_clock;
                int htotal = adjusted_mode->crtc_htotal;
                int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
@@ -1411,23 +1641,22 @@ static void i845_update_wm(struct drm_crtc *unused_crtc)
        I915_WRITE(FW_BLC, fwater_lo);
 }
 
-static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
-                                   struct drm_crtc *crtc)
+uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
 {
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        uint32_t pixel_rate;
 
-       pixel_rate = intel_crtc->config->base.adjusted_mode.crtc_clock;
+       pixel_rate = pipe_config->base.adjusted_mode.crtc_clock;
 
        /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
         * adjust the pixel_rate here. */
 
-       if (intel_crtc->config->pch_pfit.enabled) {
+       if (pipe_config->pch_pfit.enabled) {
                uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
-               uint32_t pfit_size = intel_crtc->config->pch_pfit.size;
+               uint32_t pfit_size = pipe_config->pch_pfit.size;
+
+               pipe_w = pipe_config->pipe_src_w;
+               pipe_h = pipe_config->pipe_src_h;
 
-               pipe_w = intel_crtc->config->pipe_src_w;
-               pipe_h = intel_crtc->config->pipe_src_h;
                pfit_w = (pfit_size >> 16) & 0xFFFF;
                pfit_h = pfit_size & 0xFFFF;
                if (pipe_w < pfit_w)
@@ -1484,16 +1713,6 @@ struct skl_pipe_wm_parameters {
        uint32_t pipe_htotal;
        uint32_t pixel_rate; /* in KHz */
        struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
-       struct intel_plane_wm_parameters cursor;
-};
-
-struct ilk_pipe_wm_parameters {
-       bool active;
-       uint32_t pipe_htotal;
-       uint32_t pixel_rate;
-       struct intel_plane_wm_parameters pri;
-       struct intel_plane_wm_parameters spr;
-       struct intel_plane_wm_parameters cur;
 };
 
 struct ilk_wm_maximums {
@@ -1514,26 +1733,26 @@ struct intel_wm_config {
  * For both WM_PIPE and WM_LP.
  * mem_value must be in 0.1us units.
  */
-static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params,
+static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
+                                  const struct intel_plane_state *pstate,
                                   uint32_t mem_value,
                                   bool is_lp)
 {
+       int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
        uint32_t method1, method2;
 
-       if (!params->active || !params->pri.enabled)
+       if (!cstate->base.active || !pstate->visible)
                return 0;
 
-       method1 = ilk_wm_method1(params->pixel_rate,
-                                params->pri.bytes_per_pixel,
-                                mem_value);
+       method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
 
        if (!is_lp)
                return method1;
 
-       method2 = ilk_wm_method2(params->pixel_rate,
-                                params->pipe_htotal,
-                                params->pri.horiz_pixels,
-                                params->pri.bytes_per_pixel,
+       method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
+                                cstate->base.adjusted_mode.crtc_htotal,
+                                drm_rect_width(&pstate->dst),
+                                bpp,
                                 mem_value);
 
        return min(method1, method2);
@@ -1543,21 +1762,21 @@ static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params,
  * For both WM_PIPE and WM_LP.
  * mem_value must be in 0.1us units.
  */
-static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params,
+static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
+                                  const struct intel_plane_state *pstate,
                                   uint32_t mem_value)
 {
+       int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
        uint32_t method1, method2;
 
-       if (!params->active || !params->spr.enabled)
+       if (!cstate->base.active || !pstate->visible)
                return 0;
 
-       method1 = ilk_wm_method1(params->pixel_rate,
-                                params->spr.bytes_per_pixel,
-                                mem_value);
-       method2 = ilk_wm_method2(params->pixel_rate,
-                                params->pipe_htotal,
-                                params->spr.horiz_pixels,
-                                params->spr.bytes_per_pixel,
+       method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
+       method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
+                                cstate->base.adjusted_mode.crtc_htotal,
+                                drm_rect_width(&pstate->dst),
+                                bpp,
                                 mem_value);
        return min(method1, method2);
 }
@@ -1566,29 +1785,33 @@ static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params,
  * For both WM_PIPE and WM_LP.
  * mem_value must be in 0.1us units.
  */
-static uint32_t ilk_compute_cur_wm(const struct ilk_pipe_wm_parameters *params,
+static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
+                                  const struct intel_plane_state *pstate,
                                   uint32_t mem_value)
 {
-       if (!params->active || !params->cur.enabled)
+       int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+
+       if (!cstate->base.active || !pstate->visible)
                return 0;
 
-       return ilk_wm_method2(params->pixel_rate,
-                             params->pipe_htotal,
-                             params->cur.horiz_pixels,
-                             params->cur.bytes_per_pixel,
+       return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
+                             cstate->base.adjusted_mode.crtc_htotal,
+                             drm_rect_width(&pstate->dst),
+                             bpp,
                              mem_value);
 }
 
 /* Only for WM_LP. */
-static uint32_t ilk_compute_fbc_wm(const struct ilk_pipe_wm_parameters *params,
+static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
+                                  const struct intel_plane_state *pstate,
                                   uint32_t pri_val)
 {
-       if (!params->active || !params->pri.enabled)
+       int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+
+       if (!cstate->base.active || !pstate->visible)
                return 0;
 
-       return ilk_wm_fbc(pri_val,
-                         params->pri.horiz_pixels,
-                         params->pri.bytes_per_pixel);
+       return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp);
 }
 
 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
@@ -1753,10 +1976,12 @@ static bool ilk_validate_wm_level(int level,
 }
 
 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
+                                const struct intel_crtc *intel_crtc,
                                 int level,
-                                const struct ilk_pipe_wm_parameters *p,
+                                struct intel_crtc_state *cstate,
                                 struct intel_wm_level *result)
 {
+       struct intel_plane *intel_plane;
        uint16_t pri_latency = dev_priv->wm.pri_latency[level];
        uint16_t spr_latency = dev_priv->wm.spr_latency[level];
        uint16_t cur_latency = dev_priv->wm.cur_latency[level];
@@ -1768,10 +1993,29 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
                cur_latency *= 5;
        }
 
-       result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
-       result->spr_val = ilk_compute_spr_wm(p, spr_latency);
-       result->cur_val = ilk_compute_cur_wm(p, cur_latency);
-       result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
+       for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
+               struct intel_plane_state *pstate =
+                       to_intel_plane_state(intel_plane->base.state);
+
+               switch (intel_plane->base.type) {
+               case DRM_PLANE_TYPE_PRIMARY:
+                       result->pri_val = ilk_compute_pri_wm(cstate, pstate,
+                                                            pri_latency,
+                                                            level);
+                       result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
+                                                            result->pri_val);
+                       break;
+               case DRM_PLANE_TYPE_OVERLAY:
+                       result->spr_val = ilk_compute_spr_wm(cstate, pstate,
+                                                            spr_latency);
+                       break;
+               case DRM_PLANE_TYPE_CURSOR:
+                       result->cur_val = ilk_compute_cur_wm(cstate, pstate,
+                                                            cur_latency);
+                       break;
+               }
+       }
+
        result->enable = true;
 }
 
@@ -1780,7 +2024,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct drm_display_mode *mode = &intel_crtc->config->base.adjusted_mode;
+       const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
        u32 linetime, ips_linetime;
 
        if (!intel_crtc->active)
@@ -1789,10 +2033,10 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
        /* The WM are computed with base on how long it takes to fill a single
         * row at the given clock rate, multiplied by 8.
         * */
-       linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
-                                    mode->crtc_clock);
-       ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
-                                        intel_ddi_get_cdclk_freq(dev_priv));
+       linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
+                                    adjusted_mode->crtc_clock);
+       ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
+                                        dev_priv->cdclk_freq);
 
        return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
               PIPE_WM_LINETIME_TIME(linetime);
@@ -1923,7 +2167,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
 int ilk_wm_max_level(const struct drm_device *dev)
 {
        /* how many WM levels are we expecting */
-       if (IS_GEN9(dev))
+       if (INTEL_INFO(dev)->gen >= 9)
                return 7;
        else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                return 4;
@@ -2030,48 +2274,6 @@ static void skl_setup_wm_latency(struct drm_device *dev)
        intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
 }
 
-static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
-                                     struct ilk_pipe_wm_parameters *p)
-{
-       struct drm_device *dev = crtc->dev;
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       enum pipe pipe = intel_crtc->pipe;
-       struct drm_plane *plane;
-
-       if (!intel_crtc->active)
-               return;
-
-       p->active = true;
-       p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
-       p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
-
-       if (crtc->primary->state->fb)
-               p->pri.bytes_per_pixel =
-                       crtc->primary->state->fb->bits_per_pixel / 8;
-       else
-               p->pri.bytes_per_pixel = 4;
-
-       p->cur.bytes_per_pixel = 4;
-       /*
-        * TODO: for now, assume primary and cursor planes are always enabled.
-        * Setting them to false makes the screen flicker.
-        */
-       p->pri.enabled = true;
-       p->cur.enabled = true;
-
-       p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;
-       p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w;
-
-       drm_for_each_legacy_plane(plane, &dev->mode_config.plane_list) {
-               struct intel_plane *intel_plane = to_intel_plane(plane);
-
-               if (intel_plane->pipe == pipe) {
-                       p->spr = intel_plane->wm;
-                       break;
-               }
-       }
-}
-
 static void ilk_compute_wm_config(struct drm_device *dev,
                                  struct intel_wm_config *config)
 {
@@ -2091,34 +2293,47 @@ static void ilk_compute_wm_config(struct drm_device *dev,
 }
 
 /* Compute new watermarks for the pipe */
-static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
-                                 const struct ilk_pipe_wm_parameters *params,
+static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
                                  struct intel_pipe_wm *pipe_wm)
 {
+       struct drm_crtc *crtc = cstate->base.crtc;
        struct drm_device *dev = crtc->dev;
        const struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_plane *intel_plane;
+       struct intel_plane_state *sprstate = NULL;
        int level, max_level = ilk_wm_max_level(dev);
        /* LP0 watermark maximums depend on this pipe alone */
        struct intel_wm_config config = {
                .num_pipes_active = 1,
-               .sprites_enabled = params->spr.enabled,
-               .sprites_scaled = params->spr.scaled,
        };
        struct ilk_wm_maximums max;
 
-       pipe_wm->pipe_enabled = params->active;
-       pipe_wm->sprites_enabled = params->spr.enabled;
-       pipe_wm->sprites_scaled = params->spr.scaled;
+       for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+               if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
+                       sprstate = to_intel_plane_state(intel_plane->base.state);
+                       break;
+               }
+       }
+
+       config.sprites_enabled = sprstate->visible;
+       config.sprites_scaled = sprstate->visible &&
+               (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 ||
+               drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
+
+       pipe_wm->pipe_enabled = cstate->base.active;
+       pipe_wm->sprites_enabled = sprstate->visible;
+       pipe_wm->sprites_scaled = config.sprites_scaled;
 
        /* ILK/SNB: LP2+ watermarks only w/o sprites */
-       if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled)
+       if (INTEL_INFO(dev)->gen <= 6 && sprstate->visible)
                max_level = 1;
 
        /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
-       if (params->spr.scaled)
+       if (config.sprites_scaled)
                max_level = 0;
 
-       ilk_compute_wm_level(dev_priv, 0, params, &pipe_wm->wm[0]);
+       ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
 
        if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
@@ -2135,7 +2350,7 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
        for (level = 1; level <= max_level; level++) {
                struct intel_wm_level wm = {};
 
-               ilk_compute_wm_level(dev_priv, level, params, &wm);
+               ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
 
                /*
                 * Disable any watermark level that exceeds the
@@ -2192,6 +2407,7 @@ static void ilk_wm_merge(struct drm_device *dev,
                         const struct ilk_wm_maximums *max,
                         struct intel_pipe_wm *merged)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
        int level, max_level = ilk_wm_max_level(dev);
        int last_enabled_level = max_level;
 
@@ -2232,7 +2448,8 @@ static void ilk_wm_merge(struct drm_device *dev,
         * What we should check here is whether FBC can be
         * enabled sometime later.
         */
-       if (IS_GEN5(dev) && !merged->fbc_wm_enabled && intel_fbc_enabled(dev)) {
+       if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
+           intel_fbc_enabled(dev_priv)) {
                for (level = 2; level <= max_level; level++) {
                        struct intel_wm_level *wm = &merged->wm[level];
 
@@ -2536,6 +2753,7 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)
  */
 
 #define SKL_DDB_SIZE           896     /* in blocks */
+#define BXT_DDB_SIZE           512
 
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
@@ -2554,7 +2772,10 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
                return;
        }
 
-       ddb_size = SKL_DDB_SIZE;
+       if (IS_BROXTON(dev))
+               ddb_size = BXT_DDB_SIZE;
+       else
+               ddb_size = SKL_DDB_SIZE;
 
        ddb_size -= 4; /* 4 blocks for bypass path allocation */
 
@@ -2597,7 +2818,12 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
        int plane;
        u32 val;
 
+       memset(ddb, 0, sizeof(*ddb));
+
        for_each_pipe(dev_priv, pipe) {
+               if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
+                       continue;
+
                for_each_plane(dev_priv, pipe, plane) {
                        val = I915_READ(PLANE_BUF_CFG(pipe, plane));
                        skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
@@ -2605,13 +2831,24 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
                }
 
                val = I915_READ(CUR_BUF_CFG(pipe));
-               skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val);
+               skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
+                                          val);
        }
 }
 
 static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
 {
+
+       /* for planar format */
+       if (p->y_bytes_per_pixel) {
+               if (y)  /* y-plane data rate */
+                       return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+               else    /* uv-plane data rate */
+                       return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+       }
+
+       /* for packed formats */
        return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
 }
 
@@ -2634,7 +2871,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
                if (!p->enabled)
                        continue;
 
-               total_data_rate += skl_plane_relative_data_rate(p);
+               total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
+               if (p->y_bytes_per_pixel) {
+                       total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
+               }
        }
 
        return total_data_rate;
@@ -2653,6 +2893,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
        uint16_t alloc_size, start, cursor_blocks;
        uint16_t minimum[I915_MAX_PLANES];
+       uint16_t y_minimum[I915_MAX_PLANES];
        unsigned int total_data_rate;
        int plane;
 
@@ -2660,13 +2901,14 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        alloc_size = skl_ddb_entry_size(alloc);
        if (alloc_size == 0) {
                memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
-               memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe]));
+               memset(&ddb->plane[pipe][PLANE_CURSOR], 0,
+                      sizeof(ddb->plane[pipe][PLANE_CURSOR]));
                return;
        }
 
        cursor_blocks = skl_cursor_allocation(config);
-       ddb->cursor[pipe].start = alloc->end - cursor_blocks;
-       ddb->cursor[pipe].end = alloc->end;
+       ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks;
+       ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
 
        alloc_size -= cursor_blocks;
        alloc->end -= cursor_blocks;
@@ -2681,6 +2923,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
 
                minimum[plane] = 8;
                alloc_size -= minimum[plane];
+               y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
+               alloc_size -= y_minimum[plane];
        }
 
        /*
@@ -2694,16 +2938,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
        start = alloc->start;
        for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
                const struct intel_plane_wm_parameters *p;
-               unsigned int data_rate;
-               uint16_t plane_blocks;
+               unsigned int data_rate, y_data_rate;
+               uint16_t plane_blocks, y_plane_blocks = 0;
 
                p = &params->plane[plane];
                if (!p->enabled)
                        continue;
 
-               data_rate = skl_plane_relative_data_rate(p);
+               data_rate = skl_plane_relative_data_rate(p, 0);
 
                /*
+                * allocation for (packed formats) or (uv-plane part of planar format):
                 * promote the expression to 64 bits to avoid overflowing, the
                 * result is < available as data_rate / total_data_rate < 1
                 */
@@ -2715,6 +2960,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
                ddb->plane[pipe][plane].end = start + plane_blocks;
 
                start += plane_blocks;
+
+               /*
+                * allocation for y_plane part of planar format:
+                */
+               if (p->y_bytes_per_pixel) {
+                       y_data_rate = skl_plane_relative_data_rate(p, 1);
+                       y_plane_blocks = y_minimum[plane];
+                       y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
+                                               total_data_rate);
+
+                       ddb->y_plane[pipe][plane].start = start;
+                       ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+
+                       start += y_plane_blocks;
+               }
+
        }
 
 }
@@ -2786,8 +3047,8 @@ static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
                   sizeof(new_ddb->plane[pipe])))
                return true;
 
-       if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
-                   sizeof(new_ddb->cursor[pipe])))
+       if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
+                   sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
                return true;
 
        return false;
@@ -2827,13 +3088,19 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
                p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
 
                fb = crtc->primary->state->fb;
+               /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
                if (fb) {
                        p->plane[0].enabled = true;
-                       p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8;
+                       p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+                               drm_format_plane_cpp(fb->pixel_format, 1) :
+                               drm_format_plane_cpp(fb->pixel_format, 0);
+                       p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+                               drm_format_plane_cpp(fb->pixel_format, 0) : 0;
                        p->plane[0].tiling = fb->modifier[0];
                } else {
                        p->plane[0].enabled = false;
                        p->plane[0].bytes_per_pixel = 0;
+                       p->plane[0].y_bytes_per_pixel = 0;
                        p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
                }
                p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
@@ -2841,16 +3108,17 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
                p->plane[0].rotation = crtc->primary->state->rotation;
 
                fb = crtc->cursor->state->fb;
+               p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
                if (fb) {
-                       p->cursor.enabled = true;
-                       p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
-                       p->cursor.horiz_pixels = crtc->cursor->state->crtc_w;
-                       p->cursor.vert_pixels = crtc->cursor->state->crtc_h;
+                       p->plane[PLANE_CURSOR].enabled = true;
+                       p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
+                       p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
+                       p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
                } else {
-                       p->cursor.enabled = false;
-                       p->cursor.bytes_per_pixel = 0;
-                       p->cursor.horiz_pixels = 64;
-                       p->cursor.vert_pixels = 64;
+                       p->plane[PLANE_CURSOR].enabled = false;
+                       p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
+                       p->plane[PLANE_CURSOR].horiz_pixels = 64;
+                       p->plane[PLANE_CURSOR].vert_pixels = 64;
                }
        }
 
@@ -2876,22 +3144,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
        uint32_t plane_bytes_per_line, plane_blocks_per_line;
        uint32_t res_blocks, res_lines;
        uint32_t selected_result;
+       uint8_t bytes_per_pixel;
 
        if (latency == 0 || !p->active || !p_params->enabled)
                return false;
 
+       bytes_per_pixel = p_params->y_bytes_per_pixel ?
+               p_params->y_bytes_per_pixel :
+               p_params->bytes_per_pixel;
        method1 = skl_wm_method1(p->pixel_rate,
-                                p_params->bytes_per_pixel,
+                                bytes_per_pixel,
                                 latency);
        method2 = skl_wm_method2(p->pixel_rate,
                                 p->pipe_htotal,
                                 p_params->horiz_pixels,
-                                p_params->bytes_per_pixel,
+                                bytes_per_pixel,
                                 p_params->tiling,
                                 latency);
 
-       plane_bytes_per_line = p_params->horiz_pixels *
-                                       p_params->bytes_per_pixel;
+       plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
        plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
        if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
@@ -2961,11 +3232,12 @@ static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
                                                &result->plane_res_l[i]);
        }
 
-       ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
-       result->cursor_en = skl_compute_plane_wm(dev_priv, p, &p->cursor,
+       ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
+       result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
+                                                &p->plane[PLANE_CURSOR],
                                                 ddb_blocks, level,
-                                                &result->cursor_res_b,
-                                                &result->cursor_res_l);
+                                                &result->plane_res_b[PLANE_CURSOR],
+                                                &result->plane_res_l[PLANE_CURSOR]);
 }
 
 static uint32_t
@@ -2974,8 +3246,10 @@ skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
        if (!to_intel_crtc(crtc)->active)
                return 0;
 
-       return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
+       if (WARN_ON(p->pixel_rate == 0))
+               return 0;
 
+       return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
 }
 
 static void skl_compute_transition_wm(struct drm_crtc *crtc,
@@ -2991,7 +3265,7 @@ static void skl_compute_transition_wm(struct drm_crtc *crtc,
        /* Until we know more, just disable transition WMs */
        for (i = 0; i < intel_num_planes(intel_crtc); i++)
                trans_wm->plane_en[i] = false;
-       trans_wm->cursor_en = false;
+       trans_wm->plane_en[PLANE_CURSOR] = false;
 }
 
 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
@@ -3040,13 +3314,13 @@ static void skl_compute_wm_results(struct drm_device *dev,
 
                temp = 0;
 
-               temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
-               temp |= p_wm->wm[level].cursor_res_b;
+               temp |= p_wm->wm[level].plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
+               temp |= p_wm->wm[level].plane_res_b[PLANE_CURSOR];
 
-               if (p_wm->wm[level].cursor_en)
+               if (p_wm->wm[level].plane_en[PLANE_CURSOR])
                        temp |= PLANE_WM_EN;
 
-               r->cursor[pipe][level] = temp;
+               r->plane[pipe][PLANE_CURSOR][level] = temp;
 
        }
 
@@ -3062,12 +3336,12 @@ static void skl_compute_wm_results(struct drm_device *dev,
        }
 
        temp = 0;
-       temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT;
-       temp |= p_wm->trans_wm.cursor_res_b;
-       if (p_wm->trans_wm.cursor_en)
+       temp |= p_wm->trans_wm.plane_res_l[PLANE_CURSOR] << PLANE_WM_LINES_SHIFT;
+       temp |= p_wm->trans_wm.plane_res_b[PLANE_CURSOR];
+       if (p_wm->trans_wm.plane_en[PLANE_CURSOR])
                temp |= PLANE_WM_EN;
 
-       r->cursor_trans[pipe] = temp;
+       r->plane_trans[pipe][PLANE_CURSOR] = temp;
 
        r->wm_linetime[pipe] = p_wm->linetime;
 }
@@ -3101,20 +3375,25 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,
                                I915_WRITE(PLANE_WM(pipe, i, level),
                                           new->plane[pipe][i][level]);
                        I915_WRITE(CUR_WM(pipe, level),
-                                  new->cursor[pipe][level]);
+                                  new->plane[pipe][PLANE_CURSOR][level]);
                }
                for (i = 0; i < intel_num_planes(crtc); i++)
                        I915_WRITE(PLANE_WM_TRANS(pipe, i),
                                   new->plane_trans[pipe][i]);
-               I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
+               I915_WRITE(CUR_WM_TRANS(pipe),
+                          new->plane_trans[pipe][PLANE_CURSOR]);
 
-               for (i = 0; i < intel_num_planes(crtc); i++)
+               for (i = 0; i < intel_num_planes(crtc); i++) {
                        skl_ddb_entry_write(dev_priv,
                                            PLANE_BUF_CFG(pipe, i),
                                            &new->ddb.plane[pipe][i]);
+                       skl_ddb_entry_write(dev_priv,
+                                           PLANE_NV12_BUF_CFG(pipe, i),
+                                           &new->ddb.y_plane[pipe][i]);
+               }
 
                skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
-                                   &new->ddb.cursor[pipe]);
+                                   &new->ddb.plane[pipe][PLANE_CURSOR]);
        }
 }
 
@@ -3176,7 +3455,7 @@ static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
 {
        struct drm_device *dev = dev_priv->dev;
        struct skl_ddb_allocation *cur_ddb, *new_ddb;
-       bool reallocated[I915_MAX_PIPES] = {false, false, false};
+       bool reallocated[I915_MAX_PIPES] = {};
        struct intel_crtc *crtc;
        enum pipe pipe;
 
@@ -3269,6 +3548,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
                return false;
 
        intel_crtc->wm.skl_active = *pipe_wm;
+
        return true;
 }
 
@@ -3321,6 +3601,26 @@ static void skl_update_other_pipe_wm(struct drm_device *dev,
        }
 }
 
+static void skl_clear_wm(struct skl_wm_values *watermarks, enum pipe pipe)
+{
+       watermarks->wm_linetime[pipe] = 0;
+       memset(watermarks->plane[pipe], 0,
+              sizeof(uint32_t) * 8 * I915_MAX_PLANES);
+       memset(watermarks->plane_trans[pipe],
+              0, sizeof(uint32_t) * I915_MAX_PLANES);
+       watermarks->plane_trans[pipe][PLANE_CURSOR] = 0;
+
+       /* Clear ddb entries for pipe */
+       memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry));
+       memset(&watermarks->ddb.plane[pipe], 0,
+              sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
+       memset(&watermarks->ddb.y_plane[pipe], 0,
+              sizeof(struct skl_ddb_entry) * I915_MAX_PLANES);
+       memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0,
+              sizeof(struct skl_ddb_entry));
+
+}
+
 static void skl_update_wm(struct drm_crtc *crtc)
 {
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -3331,7 +3631,11 @@ static void skl_update_wm(struct drm_crtc *crtc)
        struct skl_pipe_wm pipe_wm = {};
        struct intel_wm_config config = {};
 
-       memset(results, 0, sizeof(*results));
+
+       /* Clear all dirty flags */
+       memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES);
+
+       skl_clear_wm(results, intel_crtc->pipe);
 
        skl_compute_wm_global_parameters(dev, &config);
 
@@ -3362,8 +3666,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
        intel_plane->wm.scaled = scaled;
        intel_plane->wm.horiz_pixels = sprite_width;
        intel_plane->wm.vert_pixels = sprite_height;
-       intel_plane->wm.bytes_per_pixel = pixel_size;
        intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
+
+       /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
+       intel_plane->wm.bytes_per_pixel =
+               (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+               drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
+       intel_plane->wm.y_bytes_per_pixel =
+               (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+               drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
+
        /*
         * Framebuffer can be NULL on plane disable, but it does not
         * matter for watermarks if we assume no tiling in that case.
@@ -3378,19 +3690,19 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
 static void ilk_update_wm(struct drm_crtc *crtc)
 {
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct ilk_wm_maximums max;
-       struct ilk_pipe_wm_parameters params = {};
        struct ilk_wm_values results = {};
        enum intel_ddb_partitioning partitioning;
        struct intel_pipe_wm pipe_wm = {};
        struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
        struct intel_wm_config config = {};
 
-       ilk_compute_wm_parameters(crtc, &params);
+       WARN_ON(cstate->base.active != intel_crtc->active);
 
-       intel_compute_pipe_wm(crtc, &params, &pipe_wm);
+       intel_compute_pipe_wm(cstate, &pipe_wm);
 
        if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
                return;
@@ -3430,12 +3742,6 @@ ilk_update_sprite_wm(struct drm_plane *plane,
        struct drm_device *dev = plane->dev;
        struct intel_plane *intel_plane = to_intel_plane(plane);
 
-       intel_plane->wm.enabled = enabled;
-       intel_plane->wm.scaled = scaled;
-       intel_plane->wm.horiz_pixels = sprite_width;
-       intel_plane->wm.vert_pixels = sprite_width;
-       intel_plane->wm.bytes_per_pixel = pixel_size;
-
        /*
         * IVB workaround: must disable low power watermarks for at least
         * one frame before enabling scaling.  LP watermarks can be re-enabled
@@ -3467,10 +3773,10 @@ static void skl_pipe_wm_active_state(uint32_t val,
                                        (val >> PLANE_WM_LINES_SHIFT) &
                                                PLANE_WM_LINES_MASK;
                } else {
-                       active->wm[level].cursor_en = is_enabled;
-                       active->wm[level].cursor_res_b =
+                       active->wm[level].plane_en[PLANE_CURSOR] = is_enabled;
+                       active->wm[level].plane_res_b[PLANE_CURSOR] =
                                        val & PLANE_WM_BLOCKS_MASK;
-                       active->wm[level].cursor_res_l =
+                       active->wm[level].plane_res_l[PLANE_CURSOR] =
                                        (val >> PLANE_WM_LINES_SHIFT) &
                                                PLANE_WM_LINES_MASK;
                }
@@ -3483,10 +3789,10 @@ static void skl_pipe_wm_active_state(uint32_t val,
                                        (val >> PLANE_WM_LINES_SHIFT) &
                                                PLANE_WM_LINES_MASK;
                } else {
-                       active->trans_wm.cursor_en = is_enabled;
-                       active->trans_wm.cursor_res_b =
+                       active->trans_wm.plane_en[PLANE_CURSOR] = is_enabled;
+                       active->trans_wm.plane_res_b[PLANE_CURSOR] =
                                        val & PLANE_WM_BLOCKS_MASK;
-                       active->trans_wm.cursor_res_l =
+                       active->trans_wm.plane_res_l[PLANE_CURSOR] =
                                        (val >> PLANE_WM_LINES_SHIFT) &
                                                PLANE_WM_LINES_MASK;
                }
@@ -3512,12 +3818,12 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
                for (i = 0; i < intel_num_planes(intel_crtc); i++)
                        hw->plane[pipe][i][level] =
                                        I915_READ(PLANE_WM(pipe, i, level));
-               hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level));
+               hw->plane[pipe][PLANE_CURSOR][level] = I915_READ(CUR_WM(pipe, level));
        }
 
        for (i = 0; i < intel_num_planes(intel_crtc); i++)
                hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
-       hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe));
+       hw->plane_trans[pipe][PLANE_CURSOR] = I915_READ(CUR_WM_TRANS(pipe));
 
        if (!intel_crtc->active)
                return;
@@ -3532,7 +3838,7 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
                        skl_pipe_wm_active_state(temp, active, false,
                                                false, i, level);
                }
-               temp = hw->cursor[pipe][level];
+               temp = hw->plane[pipe][PLANE_CURSOR][level];
                skl_pipe_wm_active_state(temp, active, false, true, i, level);
        }
 
@@ -3541,7 +3847,7 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
                skl_pipe_wm_active_state(temp, active, true, false, i, 0);
        }
 
-       temp = hw->cursor_trans[pipe];
+       temp = hw->plane_trans[pipe][PLANE_CURSOR];
        skl_pipe_wm_active_state(temp, active, true, true, i, 0);
 }
 
@@ -3603,6 +3909,159 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
        }
 }
 
+#define _FW_WM(value, plane) \
+       (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
+#define _FW_WM_VLV(value, plane) \
+       (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
+
+static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
+                              struct vlv_wm_values *wm)
+{
+       enum pipe pipe;
+       uint32_t tmp;
+
+       for_each_pipe(dev_priv, pipe) {
+               tmp = I915_READ(VLV_DDL(pipe));
+
+               wm->ddl[pipe].primary =
+                       (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
+               wm->ddl[pipe].cursor =
+                       (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
+               wm->ddl[pipe].sprite[0] =
+                       (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
+               wm->ddl[pipe].sprite[1] =
+                       (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
+       }
+
+       tmp = I915_READ(DSPFW1);
+       wm->sr.plane = _FW_WM(tmp, SR);
+       wm->pipe[PIPE_B].cursor = _FW_WM(tmp, CURSORB);
+       wm->pipe[PIPE_B].primary = _FW_WM_VLV(tmp, PLANEB);
+       wm->pipe[PIPE_A].primary = _FW_WM_VLV(tmp, PLANEA);
+
+       tmp = I915_READ(DSPFW2);
+       wm->pipe[PIPE_A].sprite[1] = _FW_WM_VLV(tmp, SPRITEB);
+       wm->pipe[PIPE_A].cursor = _FW_WM(tmp, CURSORA);
+       wm->pipe[PIPE_A].sprite[0] = _FW_WM_VLV(tmp, SPRITEA);
+
+       tmp = I915_READ(DSPFW3);
+       wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
+
+       if (IS_CHERRYVIEW(dev_priv)) {
+               tmp = I915_READ(DSPFW7_CHV);
+               wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
+               wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
+
+               tmp = I915_READ(DSPFW8_CHV);
+               wm->pipe[PIPE_C].sprite[1] = _FW_WM_VLV(tmp, SPRITEF);
+               wm->pipe[PIPE_C].sprite[0] = _FW_WM_VLV(tmp, SPRITEE);
+
+               tmp = I915_READ(DSPFW9_CHV);
+               wm->pipe[PIPE_C].primary = _FW_WM_VLV(tmp, PLANEC);
+               wm->pipe[PIPE_C].cursor = _FW_WM(tmp, CURSORC);
+
+               tmp = I915_READ(DSPHOWM);
+               wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
+               wm->pipe[PIPE_C].sprite[1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
+               wm->pipe[PIPE_C].sprite[0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
+               wm->pipe[PIPE_C].primary |= _FW_WM(tmp, PLANEC_HI) << 8;
+               wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
+               wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
+               wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
+               wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
+               wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
+               wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
+       } else {
+               tmp = I915_READ(DSPFW7);
+               wm->pipe[PIPE_B].sprite[1] = _FW_WM_VLV(tmp, SPRITED);
+               wm->pipe[PIPE_B].sprite[0] = _FW_WM_VLV(tmp, SPRITEC);
+
+               tmp = I915_READ(DSPHOWM);
+               wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
+               wm->pipe[PIPE_B].sprite[1] |= _FW_WM(tmp, SPRITED_HI) << 8;
+               wm->pipe[PIPE_B].sprite[0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
+               wm->pipe[PIPE_B].primary |= _FW_WM(tmp, PLANEB_HI) << 8;
+               wm->pipe[PIPE_A].sprite[1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
+               wm->pipe[PIPE_A].sprite[0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
+               wm->pipe[PIPE_A].primary |= _FW_WM(tmp, PLANEA_HI) << 8;
+       }
+}
+
+#undef _FW_WM
+#undef _FW_WM_VLV
+
+void vlv_wm_get_hw_state(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct vlv_wm_values *wm = &dev_priv->wm.vlv;
+       struct intel_plane *plane;
+       enum pipe pipe;
+       u32 val;
+
+       vlv_read_wm_values(dev_priv, wm);
+
+       for_each_intel_plane(dev, plane) {
+               switch (plane->base.type) {
+                       int sprite;
+               case DRM_PLANE_TYPE_CURSOR:
+                       plane->wm.fifo_size = 63;
+                       break;
+               case DRM_PLANE_TYPE_PRIMARY:
+                       plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0);
+                       break;
+               case DRM_PLANE_TYPE_OVERLAY:
+                       sprite = plane->plane;
+                       plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1);
+                       break;
+               }
+       }
+
+       wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
+       wm->level = VLV_WM_LEVEL_PM2;
+
+       if (IS_CHERRYVIEW(dev_priv)) {
+               mutex_lock(&dev_priv->rps.hw_lock);
+
+               val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
+               if (val & DSP_MAXFIFO_PM5_ENABLE)
+                       wm->level = VLV_WM_LEVEL_PM5;
+
+               /*
+                * If DDR DVFS is disabled in the BIOS, Punit
+                * will never ack the request. So if that happens
+                * assume we don't have to enable/disable DDR DVFS
+                * dynamically. To test that just set the REQ_ACK
+                * bit to poke the Punit, but don't change the
+                * HIGH/LOW bits so that we don't actually change
+                * the current state.
+                */
+               val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
+               val |= FORCE_DDR_FREQ_REQ_ACK;
+               vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
+
+               if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
+                             FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
+                       DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
+                                     "assuming DDR DVFS is disabled\n");
+                       dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
+               } else {
+                       val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
+                       if ((val & FORCE_DDR_HIGH_FREQ) == 0)
+                               wm->level = VLV_WM_LEVEL_DDR_DVFS;
+               }
+
+               mutex_unlock(&dev_priv->rps.hw_lock);
+       }
+
+       for_each_pipe(dev_priv, pipe)
+               DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
+                             pipe_name(pipe), wm->pipe[pipe].primary, wm->pipe[pipe].cursor,
+                             wm->pipe[pipe].sprite[0], wm->pipe[pipe].sprite[1]);
+
+       DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
+                     wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
+}
+
 void ilk_wm_get_hw_state(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3749,7 +4208,7 @@ static void ironlake_enable_drps(struct drm_device *dev)
        fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
                MEMMODE_FSTART_SHIFT;
 
-       vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
+       vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
                PXVFREQ_PX_SHIFT;
 
        dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
@@ -3780,10 +4239,10 @@ static void ironlake_enable_drps(struct drm_device *dev)
 
        ironlake_set_drps(dev, fstart);
 
-       dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
-               I915_READ(0x112e0);
+       dev_priv->ips.last_count1 = I915_READ(DMIEC) +
+               I915_READ(DDREC) + I915_READ(CSIEC);
        dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-       dev_priv->ips.last_count2 = I915_READ(0x112f4);
+       dev_priv->ips.last_count2 = I915_READ(GFXEC);
        dev_priv->ips.last_time2 = ktime_get_raw_ns();
 
        spin_unlock_irq(&mchdev_lock);
@@ -3928,6 +4387,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                    GEN6_RP_DOWN_IDLE_AVG);
 
        dev_priv->rps.power = new_power;
+       dev_priv->rps.up_threshold = threshold_up;
+       dev_priv->rps.down_threshold = threshold_down;
        dev_priv->rps.last_adj = 0;
 }
 
@@ -3952,6 +4413,10 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
+       /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
+       if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0))
+               return;
+
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
        WARN_ON(val > dev_priv->rps.max_freq);
        WARN_ON(val < dev_priv->rps.min_freq);
@@ -3984,7 +4449,7 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
        POSTING_READ(GEN6_RPNSWREQ);
 
        dev_priv->rps.cur_freq = val;
-       trace_intel_gpu_freq_change(val * 50);
+       trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 }
 
 static void valleyview_set_rps(struct drm_device *dev, u8 val)
@@ -3999,59 +4464,37 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
                      "Odd GPU freq value\n"))
                val &= ~1;
 
-       if (val != dev_priv->rps.cur_freq)
-               vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
        I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
+       if (val != dev_priv->rps.cur_freq) {
+               vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+               if (!IS_CHERRYVIEW(dev_priv))
+                       gen6_set_rps_thresholds(dev_priv, val);
+       }
+
        dev_priv->rps.cur_freq = val;
        trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 }
 
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
  *
  * * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
 */
 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 {
-       struct drm_device *dev = dev_priv->dev;
        u32 val = dev_priv->rps.idle_freq;
 
-       /* CHV and latest VLV don't need to force the gfx clock */
-       if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
-               valleyview_set_rps(dev_priv->dev, val);
-               return;
-       }
-
-       /*
-        * When we are idle.  Drop to min voltage state.
-        */
-
        if (dev_priv->rps.cur_freq <= val)
                return;
 
-       /* Mask turbo interrupt so that they will not come in between */
-       I915_WRITE(GEN6_PMINTRMSK,
-                  gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-
-       vlv_force_gfx_clock(dev_priv, true);
-
-       dev_priv->rps.cur_freq = val;
-
-       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
-       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
-                               & GENFREQSTATUS) == 0, 100))
-               DRM_ERROR("timed out waiting for Punit\n");
-
-       vlv_force_gfx_clock(dev_priv, false);
-
-       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+       /* Wake up the media well, as that takes a lot less
+        * power than the Render well. */
+       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+       valleyview_set_rps(dev_priv->dev, val);
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
 }
 
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
@@ -4080,21 +4523,47 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
                I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
+
+       spin_lock(&dev_priv->rps.client_lock);
+       while (!list_empty(&dev_priv->rps.clients))
+               list_del_init(dev_priv->rps.clients.next);
+       spin_unlock(&dev_priv->rps.client_lock);
 }
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+                   struct intel_rps_client *rps,
+                   unsigned long submitted)
 {
-       u32 val;
+       /* This is intentionally racy! We peek at the state here, then
+        * validate inside the RPS worker.
+        */
+       if (!(dev_priv->mm.busy &&
+             dev_priv->rps.enabled &&
+             dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
+               return;
 
-       mutex_lock(&dev_priv->rps.hw_lock);
-       val = dev_priv->rps.max_freq_softlimit;
-       if (dev_priv->rps.enabled &&
-           dev_priv->mm.busy &&
-           dev_priv->rps.cur_freq < val) {
-               intel_set_rps(dev_priv->dev, val);
-               dev_priv->rps.last_adj = 0;
+       /* Force a RPS boost (and don't count it against the client) if
+        * the GPU is severely congested.
+        */
+       if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
+               rps = NULL;
+
+       spin_lock(&dev_priv->rps.client_lock);
+       if (rps == NULL || list_empty(&rps->link)) {
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->rps.interrupts_enabled) {
+                       dev_priv->rps.client_boost = true;
+                       queue_work(dev_priv->wq, &dev_priv->rps.work);
+               }
+               spin_unlock_irq(&dev_priv->irq_lock);
+
+               if (rps != NULL) {
+                       list_add(&rps->link, &dev_priv->rps.clients);
+                       rps->boosts++;
+               } else
+                       dev_priv->rps.boosts++;
        }
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       spin_unlock(&dev_priv->rps.client_lock);
 }
 
 void intel_set_rps(struct drm_device *dev, u8 val)
@@ -4162,12 +4631,8 @@ static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
 
 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
 {
-       /* No RC6 before Ironlake */
-       if (INTEL_INFO(dev)->gen < 5)
-               return 0;
-
-       /* RC6 is only on Ironlake mobile not on desktop */
-       if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev))
+       /* No RC6 before Ironlake and code is gone for ilk. */
+       if (INTEL_INFO(dev)->gen < 6)
                return 0;
 
        /* Respect the kernel parameter if it is set */
@@ -4187,10 +4652,6 @@ static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
                return enable_rc6 & mask;
        }
 
-       /* Disable RC6 on Ironlake */
-       if (INTEL_INFO(dev)->gen == 5)
-               return 0;
-
        if (IS_IVYBRIDGE(dev))
                return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
 
@@ -4209,25 +4670,26 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
        u32 ddcc_status = 0;
        int ret;
 
-       rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
        /* All of these values are in units of 50MHz */
        dev_priv->rps.cur_freq          = 0;
        /* static values from HW: RP0 > RP1 > RPn (min_freq) */
-       dev_priv->rps.rp0_freq          = (rp_state_cap >>  0) & 0xff;
-       dev_priv->rps.rp1_freq          = (rp_state_cap >>  8) & 0xff;
-       dev_priv->rps.min_freq          = (rp_state_cap >> 16) & 0xff;
-       if (IS_SKYLAKE(dev)) {
-               /* Store the frequency values in 16.66 MHZ units, which is
-                  the natural hardware unit for SKL */
-               dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
-               dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
-               dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
+       if (IS_BROXTON(dev)) {
+               rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
+               dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff;
+               dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
+               dev_priv->rps.min_freq = (rp_state_cap >>  0) & 0xff;
+       } else {
+               rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+               dev_priv->rps.rp0_freq = (rp_state_cap >>  0) & 0xff;
+               dev_priv->rps.rp1_freq = (rp_state_cap >>  8) & 0xff;
+               dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff;
        }
+
        /* hw_max = RP0 until we check for overclocking */
        dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
 
        dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
-       if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
+       if (IS_HASWELL(dev) || IS_BROADWELL(dev) || IS_SKYLAKE(dev)) {
                ret = sandybridge_pcode_read(dev_priv,
                                        HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
                                        &ddcc_status);
@@ -4239,6 +4701,16 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
                                        dev_priv->rps.max_freq);
        }
 
+       if (IS_SKYLAKE(dev)) {
+               /* Store the frequency values in 16.66 MHZ units, which is
+                  the natural hardware unit for SKL */
+               dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
+               dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER;
+               dev_priv->rps.min_freq *= GEN9_FREQ_SCALER;
+               dev_priv->rps.max_freq *= GEN9_FREQ_SCALER;
+               dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER;
+       }
+
        dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
 
        /* Preserve min/max settings in case of re-init */
@@ -4248,8 +4720,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
        if (dev_priv->rps.min_freq_softlimit == 0) {
                if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                        dev_priv->rps.min_freq_softlimit =
-                               /* max(RPe, 450 MHz) */
-                               max(dev_priv->rps.efficient_freq, (u8) 9);
+                               max_t(int, dev_priv->rps.efficient_freq,
+                                     intel_freq_opcode(dev_priv, 450));
                else
                        dev_priv->rps.min_freq_softlimit =
                                dev_priv->rps.min_freq;
@@ -4265,6 +4737,12 @@ static void gen9_enable_rps(struct drm_device *dev)
 
        gen6_init_rps_frequencies(dev);
 
+       /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
+       if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) {
+               intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+               return;
+       }
+
        /* Program defaults and thresholds for RPS*/
        I915_WRITE(GEN6_RC_VIDEO_FREQ,
                GEN9_FREQUENCY(dev_priv->rps.rp1_freq));
@@ -4302,13 +4780,21 @@ static void gen9_enable_rc6(struct drm_device *dev)
        I915_WRITE(GEN6_RC_CONTROL, 0);
 
        /* 2b: Program RC6 thresholds.*/
-       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+
+       /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
+       if (IS_SKYLAKE(dev))
+               I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+       else
+               I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
        for_each_ring(ring, dev_priv, unused)
                I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
+
+       if (HAS_GUC_UCODE(dev))
+               I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
+
        I915_WRITE(GEN6_RC_SLEEP, 0);
-       I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
 
        /* 2c: Program Coarse Power Gating Policies. */
        I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
@@ -4319,12 +4805,30 @@ static void gen9_enable_rc6(struct drm_device *dev)
                rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
        DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
                        "on" : "off");
-       I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
-                                  GEN6_RC_CTL_EI_MODE(1) |
-                                  rc6_mask);
+       /* WaRsUseTimeoutMode */
+       if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) ||
+           (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) {
+               I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
+               I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
+                          GEN7_RC_CTL_TO_MODE |
+                          rc6_mask);
+       } else {
+               I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+               I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
+                          GEN6_RC_CTL_EI_MODE(1) |
+                          rc6_mask);
+       }
 
-       /* 3b: Enable Coarse Power Gating only when RC6 is enabled */
-       I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
+       /*
+        * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+        * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
+        */
+       if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
+           ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
+               I915_WRITE(GEN9_PG_ENABLE, 0);
+       else
+               I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
+                               (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0);
 
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
@@ -4526,6 +5030,7 @@ static void __gen6_update_ring_freq(struct drm_device *dev)
        int min_freq = 15;
        unsigned int gpu_freq;
        unsigned int max_ia_freq, min_ring_freq;
+       unsigned int max_gpu_freq, min_gpu_freq;
        int scaling_factor = 180;
        struct cpufreq_policy *policy;
 
@@ -4550,17 +5055,31 @@ static void __gen6_update_ring_freq(struct drm_device *dev)
        /* convert DDR frequency from units of 266.6MHz to bandwidth */
        min_ring_freq = mult_frac(min_ring_freq, 8, 3);
 
+       if (IS_SKYLAKE(dev)) {
+               /* Convert GT frequency to 50 HZ units */
+               min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
+               max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
+       } else {
+               min_gpu_freq = dev_priv->rps.min_freq;
+               max_gpu_freq = dev_priv->rps.max_freq;
+       }
+
        /*
         * For each potential GPU frequency, load a ring frequency we'd like
         * to use for memory access.  We do this by specifying the IA frequency
         * the PCU should use as a reference to determine the ring frequency.
         */
-       for (gpu_freq = dev_priv->rps.max_freq; gpu_freq >= dev_priv->rps.min_freq;
-            gpu_freq--) {
-               int diff = dev_priv->rps.max_freq - gpu_freq;
+       for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
+               int diff = max_gpu_freq - gpu_freq;
                unsigned int ia_freq = 0, ring_freq = 0;
 
-               if (INTEL_INFO(dev)->gen >= 8) {
+               if (IS_SKYLAKE(dev)) {
+                       /*
+                        * ring_freq = 2 * GT. ring_freq is in 100MHz units
+                        * No floor required for ring frequency on SKL.
+                        */
+                       ring_freq = gpu_freq;
+               } else if (INTEL_INFO(dev)->gen >= 8) {
                        /* max(2 * GT, DDR). NB: GT is 50MHz units */
                        ring_freq = max(min_ring_freq, gpu_freq);
                } else if (IS_HASWELL(dev)) {
@@ -4594,7 +5113,7 @@ void gen6_update_ring_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev))
+       if (!HAS_CORE_RING_FREQ(dev))
                return;
 
        mutex_lock(&dev_priv->rps.hw_lock);
@@ -4607,32 +5126,27 @@ static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
        struct drm_device *dev = dev_priv->dev;
        u32 val, rp0;
 
-       if (dev->pdev->revision >= 0x20) {
-               val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
+       val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
 
-               switch (INTEL_INFO(dev)->eu_total) {
-               case 8:
-                               /* (2 * 4) config */
-                               rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
-                               break;
-               case 12:
-                               /* (2 * 6) config */
-                               rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
-                               break;
-               case 16:
-                               /* (2 * 8) config */
-               default:
-                               /* Setting (2 * 8) Min RP0 for any other combination */
-                               rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
-                               break;
-               }
-               rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
-       } else {
-               /* For pre-production hardware */
-               val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-               rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
-                      PUNIT_GPU_STATUS_MAX_FREQ_MASK;
+       switch (INTEL_INFO(dev)->eu_total) {
+       case 8:
+               /* (2 * 4) config */
+               rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
+               break;
+       case 12:
+               /* (2 * 6) config */
+               rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
+               break;
+       case 16:
+               /* (2 * 8) config */
+       default:
+               /* Setting (2 * 8) Min RP0 for any other combination */
+               rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
+               break;
        }
+
+       rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
+
        return rp0;
 }
 
@@ -4648,37 +5162,12 @@ static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
 
 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
 {
-       struct drm_device *dev = dev_priv->dev;
        u32 val, rp1;
 
-       if (dev->pdev->revision >= 0x20) {
-               val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-               rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
-       } else {
-               /* For pre-production hardware */
-               val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-               rp1 = ((val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
-                      PUNIT_GPU_STATUS_MAX_FREQ_MASK);
-       }
-       return rp1;
-}
+       val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
+       rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
 
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-       struct drm_device *dev = dev_priv->dev;
-       u32 val, rpn;
-
-       if (dev->pdev->revision >= 0x20) {
-               val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-               rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-                      FB_GFX_FREQ_FUSE_MASK);
-       } else { /* For pre-production hardware */
-               val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-               rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
-                      PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
-       }
-
-       return rpn;
+       return rp1;
 }
 
 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
@@ -4887,30 +5376,15 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
 
        mutex_lock(&dev_priv->rps.hw_lock);
 
-       mutex_lock(&dev_priv->dpio_lock);
+       mutex_lock(&dev_priv->sb_lock);
        val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-       mutex_unlock(&dev_priv->dpio_lock);
+       mutex_unlock(&dev_priv->sb_lock);
 
        switch ((val >> 2) & 0x7) {
-       case 0:
-       case 1:
-               dev_priv->rps.cz_freq = 200;
-               dev_priv->mem_freq = 1600;
-               break;
-       case 2:
-               dev_priv->rps.cz_freq = 267;
-               dev_priv->mem_freq = 1600;
-               break;
        case 3:
-               dev_priv->rps.cz_freq = 333;
                dev_priv->mem_freq = 2000;
                break;
-       case 4:
-               dev_priv->rps.cz_freq = 320;
-               dev_priv->mem_freq = 1600;
-               break;
-       case 5:
-               dev_priv->rps.cz_freq = 400;
+       default:
                dev_priv->mem_freq = 1600;
                break;
        }
@@ -4932,7 +5406,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
                         intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
                         dev_priv->rps.rp1_freq);
 
-       dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+       /* PUnit validated range is only [RPe, RP0] */
+       dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
        DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
                         intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
                         dev_priv->rps.min_freq);
@@ -4994,8 +5469,8 @@ static void cherryview_enable_rps(struct drm_device *dev)
                I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
        I915_WRITE(GEN6_RC_SLEEP, 0);
 
-       /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
-       I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+       /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+       I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
 
        /* allows RC6 residency counter to work */
        I915_WRITE(VLV_COUNTER_CONTROL,
@@ -5030,12 +5505,18 @@ static void cherryview_enable_rps(struct drm_device *dev)
                   GEN6_RP_UP_BUSY_AVG |
                   GEN6_RP_DOWN_IDLE_AVG);
 
+       /* Setting Fixed Bias */
+       val = VLV_OVERRIDE_EN |
+                 VLV_SOC_TDP_EN |
+                 CHV_BIAS_CPU_50_SOC_50;
+       vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
        /* RPS code assumes GPLL is used */
        WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
-       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
+       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
        DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
        dev_priv->rps.cur_freq = (val >> 8) & 0xff;
@@ -5114,12 +5595,18 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
        I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
 
+       /* Setting Fixed Bias */
+       val = VLV_OVERRIDE_EN |
+                 VLV_SOC_TDP_EN |
+                 VLV_BIAS_CPU_125_SOC_875;
+       vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
        val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
        /* RPS code assumes GPLL is used */
        WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
-       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
+       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
        DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
        dev_priv->rps.cur_freq = (val >> 8) & 0xff;
@@ -5328,7 +5815,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
        assert_spin_locked(&mchdev_lock);
 
-       pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4));
+       pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq));
        pxvid = (pxvid >> 24) & 0x7f;
        ext_v = pvid_to_extvid(dev_priv, pxvid);
 
@@ -5571,13 +6058,13 @@ static void intel_init_emon(struct drm_device *dev)
        I915_WRITE(CSIEW2, 0x04000004);
 
        for (i = 0; i < 5; i++)
-               I915_WRITE(PEW + (i * 4), 0);
+               I915_WRITE(PEW(i), 0);
        for (i = 0; i < 3; i++)
-               I915_WRITE(DEW + (i * 4), 0);
+               I915_WRITE(DEW(i), 0);
 
        /* Program P-state weights to account for frequency power adjustment */
        for (i = 0; i < 16; i++) {
-               u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
+               u32 pxvidfreq = I915_READ(PXVFREQ(i));
                unsigned long freq = intel_pxfreq(pxvidfreq);
                unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
                        PXVFREQ_PX_SHIFT;
@@ -5598,7 +6085,7 @@ static void intel_init_emon(struct drm_device *dev)
        for (i = 0; i < 4; i++) {
                u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
                        (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-               I915_WRITE(PXW + (i * 4), val);
+               I915_WRITE(PXW(i), val);
        }
 
        /* Adjust magic regs to magic values (more experimental results) */
@@ -5614,7 +6101,7 @@ static void intel_init_emon(struct drm_device *dev)
        I915_WRITE(EG7, 0);
 
        for (i = 0; i < 8; i++)
-               I915_WRITE(PXWL + (i * 4), 0);
+               I915_WRITE(PXWL(i), 0);
 
        /* Enable PMON + select events */
        I915_WRITE(ECR, 0x80000019);
@@ -5714,7 +6201,8 @@ static void intel_gen6_powersave_work(struct work_struct *work)
        } else if (INTEL_INFO(dev)->gen >= 9) {
                gen9_enable_rc6(dev);
                gen9_enable_rps(dev);
-               __gen6_update_ring_freq(dev);
+               if (IS_SKYLAKE(dev))
+                       __gen6_update_ring_freq(dev);
        } else if (IS_BROADWELL(dev)) {
                gen8_enable_rps(dev);
                __gen6_update_ring_freq(dev);
@@ -5796,13 +6284,15 @@ static void ibx_init_clock_gating(struct drm_device *dev)
 static void g4x_disable_trickle_feed(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int pipe;
+       enum pipe pipe;
 
        for_each_pipe(dev_priv, pipe) {
                I915_WRITE(DSPCNTR(pipe),
                           I915_READ(DSPCNTR(pipe)) |
                           DISPPLANE_TRICKLE_FEED_DISABLE);
-               intel_flush_primary_plane(dev_priv, pipe);
+
+               I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+               POSTING_READ(DSPSURF(pipe));
        }
 }
 
@@ -6065,14 +6555,14 @@ static void lpt_init_clock_gating(struct drm_device *dev)
         * TODO: this bit should only be enabled when really needed, then
         * disabled when not needed anymore in order to save power.
         */
-       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
+       if (HAS_PCH_LPT_LP(dev))
                I915_WRITE(SOUTH_DSPCLK_GATE_D,
                           I915_READ(SOUTH_DSPCLK_GATE_D) |
                           PCH_LP_PARTITION_LEVEL_DISABLE);
 
        /* WADPOClockGatingDisable:hsw */
-       I915_WRITE(_TRANSA_CHICKEN1,
-                  I915_READ(_TRANSA_CHICKEN1) |
+       I915_WRITE(TRANS_CHICKEN1(PIPE_A),
+                  I915_READ(TRANS_CHICKEN1(PIPE_A)) |
                   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
 }
 
@@ -6080,7 +6570,7 @@ static void lpt_suspend_hw(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+       if (HAS_PCH_LPT_LP(dev)) {
                uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
 
                val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
@@ -6092,10 +6582,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        enum pipe pipe;
+       uint32_t misccpctl;
 
-       I915_WRITE(WM3_LP_ILK, 0);
-       I915_WRITE(WM2_LP_ILK, 0);
-       I915_WRITE(WM1_LP_ILK, 0);
+       ilk_init_lp_watermarks(dev);
 
        /* WaSwitchSolVfFArbitrationPriority:bdw */
        I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -6124,6 +6613,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
+       /*
+        * WaProgramL3SqcReg1Default:bdw
+        * WaTempDisableDOPClkGating:bdw
+        */
+       misccpctl = I915_READ(GEN7_MISCCPCTL);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+       I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+       /*
+        * WaGttCachingOffByDefault:bdw
+        * GTT cache may not work with big pages, so if those
+        * are ever enabled GTT cache may need to be disabled.
+        */
+       I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+
        lpt_init_clock_gating(dev);
 }
 
@@ -6399,6 +6904,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
        /* WaDisableSDEUnitClockGating:chv */
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+       /*
+        * GTT cache may not work with big pages, so if those
+        * are ever enabled GTT cache may need to be disabled.
+        */
+       I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
 }
 
 static void g4x_init_clock_gating(struct drm_device *dev)
@@ -6542,7 +7053,9 @@ void intel_init_pm(struct drm_device *dev)
        if (INTEL_INFO(dev)->gen >= 9) {
                skl_setup_wm_latency(dev);
 
-               dev_priv->display.init_clock_gating = skl_init_clock_gating;
+               if (IS_BROXTON(dev))
+                       dev_priv->display.init_clock_gating =
+                               bxt_init_clock_gating;
                dev_priv->display.update_wm = skl_update_wm;
                dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
        } else if (HAS_PCH_SPLIT(dev)) {
@@ -6570,13 +7083,15 @@ void intel_init_pm(struct drm_device *dev)
                else if (INTEL_INFO(dev)->gen == 8)
                        dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
        } else if (IS_CHERRYVIEW(dev)) {
-               dev_priv->display.update_wm = valleyview_update_wm;
-               dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
+               vlv_setup_wm_latency(dev);
+
+               dev_priv->display.update_wm = vlv_update_wm;
                dev_priv->display.init_clock_gating =
                        cherryview_init_clock_gating;
        } else if (IS_VALLEYVIEW(dev)) {
-               dev_priv->display.update_wm = valleyview_update_wm;
-               dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
+               vlv_setup_wm_latency(dev);
+
+               dev_priv->display.update_wm = vlv_update_wm;
                dev_priv->display.init_clock_gating =
                        valleyview_init_clock_gating;
        } else if (IS_PINEVIEW(dev)) {
@@ -6693,7 +7208,7 @@ static int vlv_gpu_freq_div(unsigned int czclk_freq)
 
 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
-       int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
+       int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
 
        div = vlv_gpu_freq_div(czclk_freq);
        if (div < 0)
@@ -6704,7 +7219,7 @@ static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
 
 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
-       int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
+       int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
 
        mul = vlv_gpu_freq_div(czclk_freq);
        if (mul < 0)
@@ -6715,7 +7230,7 @@ static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
 
 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
-       int div, czclk_freq = dev_priv->rps.cz_freq;
+       int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
 
        div = vlv_gpu_freq_div(czclk_freq) / 2;
        if (div < 0)
@@ -6726,7 +7241,7 @@ static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
 
 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
-       int mul, czclk_freq = dev_priv->rps.cz_freq;
+       int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
 
        mul = vlv_gpu_freq_div(czclk_freq) / 2;
        if (mul < 0)
@@ -6739,7 +7254,8 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
        if (IS_GEN9(dev_priv->dev))
-               return (val * GT_FREQUENCY_MULTIPLIER) / GEN9_FREQ_SCALER;
+               return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+                                        GEN9_FREQ_SCALER);
        else if (IS_CHERRYVIEW(dev_priv->dev))
                return chv_gpu_freq(dev_priv, val);
        else if (IS_VALLEYVIEW(dev_priv->dev))
@@ -6751,13 +7267,54 @@ int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
        if (IS_GEN9(dev_priv->dev))
-               return (val * GEN9_FREQ_SCALER) / GT_FREQUENCY_MULTIPLIER;
+               return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+                                        GT_FREQUENCY_MULTIPLIER);
        else if (IS_CHERRYVIEW(dev_priv->dev))
                return chv_freq_opcode(dev_priv, val);
        else if (IS_VALLEYVIEW(dev_priv->dev))
                return byt_freq_opcode(dev_priv, val);
        else
-               return val / GT_FREQUENCY_MULTIPLIER;
+               return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
+
+struct request_boost {
+       struct work_struct work;
+       struct drm_i915_gem_request *req;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+       struct request_boost *boost = container_of(work, struct request_boost, work);
+       struct drm_i915_gem_request *req = boost->req;
+
+       if (!i915_gem_request_completed(req, true))
+               gen6_rps_boost(to_i915(req->ring->dev), NULL,
+                              req->emitted_jiffies);
+
+       i915_gem_request_unreference__unlocked(req);
+       kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+                                      struct drm_i915_gem_request *req)
+{
+       struct request_boost *boost;
+
+       if (req == NULL || INTEL_INFO(dev)->gen < 6)
+               return;
+
+       if (i915_gem_request_completed(req, true))
+               return;
+
+       boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+       if (boost == NULL)
+               return;
+
+       i915_gem_request_reference(req);
+       boost->req = req;
+
+       INIT_WORK(&boost->work, __intel_rps_boost_work);
+       queue_work(to_i915(dev)->wq, &boost->work);
 }
 
 void intel_pm_setup(struct drm_device *dev)
@@ -6765,9 +7322,13 @@ void intel_pm_setup(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        mutex_init(&dev_priv->rps.hw_lock);
+       spin_lock_init(&dev_priv->rps.client_lock);
 
        INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
                          intel_gen6_powersave_work);
+       INIT_LIST_HEAD(&dev_priv->rps.clients);
+       INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
+       INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
        dev_priv->pm.suspended = false;
 }